diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..5408215 --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +See README.txt diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..805d7df --- /dev/null +++ b/COPYING @@ -0,0 +1 @@ +See LICENSE.txt diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..5408215 --- /dev/null +++ b/ChangeLog @@ -0,0 +1 @@ +See README.txt diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..2ee1151 --- /dev/null +++ b/INSTALL @@ -0,0 +1,9 @@ + +It is advised to run ./autogen.sh before./configure (autoconf and automake +need to be installed on your system for autogen.sh to work) + +./configure has an option named --with-cuda that allows you to specify +where your CUDA 5.5 toolkit is installed (usually /usr/local/cuda-5.5, +but some distros may have a different default location) + +See README.txt diff --git a/LICENSE b/LICENSE index 70566f2..2d7f3b9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,3 @@ -GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - {one line to give the program's name and a brief idea of what it does.} - Copyright (C) {year} {name of author} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - {project} Copyright (C) {year} {fullname} - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. \ No newline at end of file +ccminer is available under the terms of the GNU Public License version 3. + +See LICENSE.TXT for details. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..8e70fb3 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,885 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. + + + + + + + + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..26f8d7e --- /dev/null +++ b/Makefile.am @@ -0,0 +1,45 @@ + +if WANT_JANSSON +JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson +else +JANSSON_INCLUDES= +endif + +EXTRA_DIST = autogen.sh README.txt LICENSE.txt \ + cudaminer.sln cudaminer.vcxproj cudaminer.vcxproj.filters \ + compat/gettimeofday.c compat/getopt/getopt_long.c cpuminer-config.h.in + +SUBDIRS = compat + +bin_PROGRAMS = ccminer + +ccminer_SOURCES = elist.h miner.h compat.h \ + compat/inttypes.h compat/stdbool.h compat/unistd.h \ + compat/sys/time.h compat/getopt/getopt.h \ + cpu-miner.c util.c blake.c groestl.c hefty1.c keccak.c scrypt.c sha2.c \ + sph_blake.h sph_groestl.h sph_keccak.h sph_types.h \ + heavy.cu \ + cuda_blake512.cu cuda_blake512.h \ + cuda_combine.cu cuda_combine.h \ + cuda_groestl512.cu cuda_groestl512.h \ + cuda_hefty1.cu cuda_hefty1.h \ + cuda_keccak512.cu cuda_keccak512.h \ + cuda_sha256.cu cuda_sha256.h \ + cuda_fugue256.cu \ + fuguecoin.cpp fugue.c sph_fugue.h uint256.h + + +ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ +ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ +ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME + +.cu.o: + $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +## Thrust needs Compute 2.0 minimum +#heavy.o: heavy.cu +# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< +# +#cuda_hefty1.o: cuda_hefty1.cu +# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..963c70d --- /dev/null +++ b/Makefile.in @@ -0,0 +1,1107 @@ +# Makefile.in generated by automake 1.13.3 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +bin_PROGRAMS = ccminer$(EXEEXT) +subdir = . +DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) \ + $(srcdir)/cpuminer-config.h.in depcomp COPYING compile \ + config.guess config.sub install-sh missing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = cpuminer-config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \ + ccminer-util.$(OBJEXT) ccminer-blake.$(OBJEXT) \ + ccminer-groestl.$(OBJEXT) ccminer-hefty1.$(OBJEXT) \ + ccminer-keccak.$(OBJEXT) ccminer-scrypt.$(OBJEXT) \ + ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \ + cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \ + cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \ + cuda_sha256.$(OBJEXT) cuda_fugue256.$(OBJEXT) \ + ccminer-fuguecoin.$(OBJEXT) ccminer-fugue.$(OBJEXT) +ccminer_OBJECTS = $(am_ccminer_OBJECTS) +ccminer_DEPENDENCIES = +ccminer_LINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(ccminer_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(ccminer_SOURCES) +DIST_SOURCES = $(ccminer_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \ + $(LISP)cpuminer-config.h.in +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CUDA_CFLAGS = @CUDA_CFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CUDA_LIBS = @CUDA_LIBS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JANSSON_LIBS = @JANSSON_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBCURL = @LIBCURL@ +LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NVCC = @NVCC@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PTHREAD_FLAGS = @PTHREAD_FLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +WS2_LIBS = @WS2_LIBS@ +_libcurl_config = @_libcurl_config@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +@WANT_JANSSON_FALSE@JANSSON_INCLUDES = +@WANT_JANSSON_TRUE@JANSSON_INCLUDES = -I$(top_srcdir)/compat/jansson +EXTRA_DIST = autogen.sh README.txt LICENSE.txt \ + cudaminer.sln cudaminer.vcxproj cudaminer.vcxproj.filters \ + compat/gettimeofday.c compat/getopt/getopt_long.c cpuminer-config.h.in + +SUBDIRS = compat +ccminer_SOURCES = elist.h miner.h compat.h \ + compat/inttypes.h compat/stdbool.h compat/unistd.h \ + compat/sys/time.h compat/getopt/getopt.h \ + cpu-miner.c util.c blake.c groestl.c hefty1.c keccak.c scrypt.c sha2.c \ + sph_blake.h sph_groestl.h sph_keccak.h sph_types.h \ + heavy.cu \ + cuda_blake512.cu cuda_blake512.h \ + cuda_combine.cu cuda_combine.h \ + cuda_groestl512.cu cuda_groestl512.h \ + cuda_hefty1.cu cuda_hefty1.h \ + cuda_keccak512.cu cuda_keccak512.h \ + cuda_sha256.cu cuda_sha256.h \ + cuda_fugue256.cu \ + fuguecoin.cpp fugue.c sph_fugue.h uint256.h + +ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ +ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ +ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME +all: cpuminer-config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cpp .cu .o .obj +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +cpuminer-config.h: stamp-h1 + @if test ! -f $@; then rm -f stamp-h1; else :; fi + @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi + +stamp-h1: $(srcdir)/cpuminer-config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status cpuminer-config.h +$(srcdir)/cpuminer-config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f cpuminer-config.h stamp-h1 +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) + +ccminer$(EXEEXT): $(ccminer_OBJECTS) $(ccminer_DEPENDENCIES) $(EXTRA_ccminer_DEPENDENCIES) + @rm -f ccminer$(EXEEXT) + $(AM_V_CXXLD)$(ccminer_LINK) $(ccminer_OBJECTS) $(ccminer_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-blake.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-cpu-miner.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestl.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-hefty1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-keccak.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-scrypt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-sha2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-util.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c `$(CYGPATH_W) '$<'` + +ccminer-cpu-miner.o: cpu-miner.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-cpu-miner.o -MD -MP -MF $(DEPDIR)/ccminer-cpu-miner.Tpo -c -o ccminer-cpu-miner.o `test -f 'cpu-miner.c' || echo '$(srcdir)/'`cpu-miner.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-cpu-miner.Tpo $(DEPDIR)/ccminer-cpu-miner.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cpu-miner.c' object='ccminer-cpu-miner.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-cpu-miner.o `test -f 'cpu-miner.c' || echo '$(srcdir)/'`cpu-miner.c + +ccminer-cpu-miner.obj: cpu-miner.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-cpu-miner.obj -MD -MP -MF $(DEPDIR)/ccminer-cpu-miner.Tpo -c -o ccminer-cpu-miner.obj `if test -f 'cpu-miner.c'; then $(CYGPATH_W) 'cpu-miner.c'; else $(CYGPATH_W) '$(srcdir)/cpu-miner.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-cpu-miner.Tpo $(DEPDIR)/ccminer-cpu-miner.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cpu-miner.c' object='ccminer-cpu-miner.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-cpu-miner.obj `if test -f 'cpu-miner.c'; then $(CYGPATH_W) 'cpu-miner.c'; else $(CYGPATH_W) '$(srcdir)/cpu-miner.c'; fi` + +ccminer-util.o: util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-util.o -MD -MP -MF $(DEPDIR)/ccminer-util.Tpo -c -o ccminer-util.o `test -f 'util.c' || echo '$(srcdir)/'`util.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-util.Tpo $(DEPDIR)/ccminer-util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util.c' object='ccminer-util.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-util.o `test -f 'util.c' || echo '$(srcdir)/'`util.c + +ccminer-util.obj: util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-util.obj -MD -MP -MF $(DEPDIR)/ccminer-util.Tpo -c -o ccminer-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-util.Tpo $(DEPDIR)/ccminer-util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util.c' object='ccminer-util.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` + +ccminer-blake.o: blake.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-blake.o -MD -MP -MF $(DEPDIR)/ccminer-blake.Tpo -c -o ccminer-blake.o `test -f 'blake.c' || echo '$(srcdir)/'`blake.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-blake.Tpo $(DEPDIR)/ccminer-blake.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='blake.c' object='ccminer-blake.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-blake.o `test -f 'blake.c' || echo '$(srcdir)/'`blake.c + +ccminer-blake.obj: blake.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-blake.obj -MD -MP -MF $(DEPDIR)/ccminer-blake.Tpo -c -o ccminer-blake.obj `if test -f 'blake.c'; then $(CYGPATH_W) 'blake.c'; else $(CYGPATH_W) '$(srcdir)/blake.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-blake.Tpo $(DEPDIR)/ccminer-blake.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='blake.c' object='ccminer-blake.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-blake.obj `if test -f 'blake.c'; then $(CYGPATH_W) 'blake.c'; else $(CYGPATH_W) '$(srcdir)/blake.c'; fi` + +ccminer-groestl.o: groestl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-groestl.o -MD -MP -MF $(DEPDIR)/ccminer-groestl.Tpo -c -o ccminer-groestl.o `test -f 'groestl.c' || echo '$(srcdir)/'`groestl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-groestl.Tpo $(DEPDIR)/ccminer-groestl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='groestl.c' object='ccminer-groestl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-groestl.o `test -f 'groestl.c' || echo '$(srcdir)/'`groestl.c + +ccminer-groestl.obj: groestl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-groestl.obj -MD -MP -MF $(DEPDIR)/ccminer-groestl.Tpo -c -o ccminer-groestl.obj `if test -f 'groestl.c'; then $(CYGPATH_W) 'groestl.c'; else $(CYGPATH_W) '$(srcdir)/groestl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-groestl.Tpo $(DEPDIR)/ccminer-groestl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='groestl.c' object='ccminer-groestl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-groestl.obj `if test -f 'groestl.c'; then $(CYGPATH_W) 'groestl.c'; else $(CYGPATH_W) '$(srcdir)/groestl.c'; fi` + +ccminer-hefty1.o: hefty1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-hefty1.o -MD -MP -MF $(DEPDIR)/ccminer-hefty1.Tpo -c -o ccminer-hefty1.o `test -f 'hefty1.c' || echo '$(srcdir)/'`hefty1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-hefty1.Tpo $(DEPDIR)/ccminer-hefty1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hefty1.c' object='ccminer-hefty1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-hefty1.o `test -f 'hefty1.c' || echo '$(srcdir)/'`hefty1.c + +ccminer-hefty1.obj: hefty1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-hefty1.obj -MD -MP -MF $(DEPDIR)/ccminer-hefty1.Tpo -c -o ccminer-hefty1.obj `if test -f 'hefty1.c'; then $(CYGPATH_W) 'hefty1.c'; else $(CYGPATH_W) '$(srcdir)/hefty1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-hefty1.Tpo $(DEPDIR)/ccminer-hefty1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hefty1.c' object='ccminer-hefty1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-hefty1.obj `if test -f 'hefty1.c'; then $(CYGPATH_W) 'hefty1.c'; else $(CYGPATH_W) '$(srcdir)/hefty1.c'; fi` + +ccminer-keccak.o: keccak.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-keccak.o -MD -MP -MF $(DEPDIR)/ccminer-keccak.Tpo -c -o ccminer-keccak.o `test -f 'keccak.c' || echo '$(srcdir)/'`keccak.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-keccak.Tpo $(DEPDIR)/ccminer-keccak.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='keccak.c' object='ccminer-keccak.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-keccak.o `test -f 'keccak.c' || echo '$(srcdir)/'`keccak.c + +ccminer-keccak.obj: keccak.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-keccak.obj -MD -MP -MF $(DEPDIR)/ccminer-keccak.Tpo -c -o ccminer-keccak.obj `if test -f 'keccak.c'; then $(CYGPATH_W) 'keccak.c'; else $(CYGPATH_W) '$(srcdir)/keccak.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-keccak.Tpo $(DEPDIR)/ccminer-keccak.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='keccak.c' object='ccminer-keccak.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-keccak.obj `if test -f 'keccak.c'; then $(CYGPATH_W) 'keccak.c'; else $(CYGPATH_W) '$(srcdir)/keccak.c'; fi` + +ccminer-scrypt.o: scrypt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-scrypt.o -MD -MP -MF $(DEPDIR)/ccminer-scrypt.Tpo -c -o ccminer-scrypt.o `test -f 'scrypt.c' || echo '$(srcdir)/'`scrypt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-scrypt.Tpo $(DEPDIR)/ccminer-scrypt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='scrypt.c' object='ccminer-scrypt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-scrypt.o `test -f 'scrypt.c' || echo '$(srcdir)/'`scrypt.c + +ccminer-scrypt.obj: scrypt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-scrypt.obj -MD -MP -MF $(DEPDIR)/ccminer-scrypt.Tpo -c -o ccminer-scrypt.obj `if test -f 'scrypt.c'; then $(CYGPATH_W) 'scrypt.c'; else $(CYGPATH_W) '$(srcdir)/scrypt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-scrypt.Tpo $(DEPDIR)/ccminer-scrypt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='scrypt.c' object='ccminer-scrypt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-scrypt.obj `if test -f 'scrypt.c'; then $(CYGPATH_W) 'scrypt.c'; else $(CYGPATH_W) '$(srcdir)/scrypt.c'; fi` + +ccminer-sha2.o: sha2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-sha2.o -MD -MP -MF $(DEPDIR)/ccminer-sha2.Tpo -c -o ccminer-sha2.o `test -f 'sha2.c' || echo '$(srcdir)/'`sha2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-sha2.Tpo $(DEPDIR)/ccminer-sha2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sha2.c' object='ccminer-sha2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-sha2.o `test -f 'sha2.c' || echo '$(srcdir)/'`sha2.c + +ccminer-sha2.obj: sha2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-sha2.obj -MD -MP -MF $(DEPDIR)/ccminer-sha2.Tpo -c -o ccminer-sha2.obj `if test -f 'sha2.c'; then $(CYGPATH_W) 'sha2.c'; else $(CYGPATH_W) '$(srcdir)/sha2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-sha2.Tpo $(DEPDIR)/ccminer-sha2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sha2.c' object='ccminer-sha2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-sha2.obj `if test -f 'sha2.c'; then $(CYGPATH_W) 'sha2.c'; else $(CYGPATH_W) '$(srcdir)/sha2.c'; fi` + +ccminer-fugue.o: fugue.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-fugue.o -MD -MP -MF $(DEPDIR)/ccminer-fugue.Tpo -c -o ccminer-fugue.o `test -f 'fugue.c' || echo '$(srcdir)/'`fugue.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-fugue.Tpo $(DEPDIR)/ccminer-fugue.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fugue.c' object='ccminer-fugue.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-fugue.o `test -f 'fugue.c' || echo '$(srcdir)/'`fugue.c + +ccminer-fugue.obj: fugue.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-fugue.obj -MD -MP -MF $(DEPDIR)/ccminer-fugue.Tpo -c -o ccminer-fugue.obj `if test -f 'fugue.c'; then $(CYGPATH_W) 'fugue.c'; else $(CYGPATH_W) '$(srcdir)/fugue.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-fugue.Tpo $(DEPDIR)/ccminer-fugue.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fugue.c' object='ccminer-fugue.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-fugue.obj `if test -f 'fugue.c'; then $(CYGPATH_W) 'fugue.c'; else $(CYGPATH_W) '$(srcdir)/fugue.c'; fi` + +.cpp.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ccminer-fuguecoin.o: fuguecoin.cpp +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-fuguecoin.o -MD -MP -MF $(DEPDIR)/ccminer-fuguecoin.Tpo -c -o ccminer-fuguecoin.o `test -f 'fuguecoin.cpp' || echo '$(srcdir)/'`fuguecoin.cpp +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-fuguecoin.Tpo $(DEPDIR)/ccminer-fuguecoin.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='fuguecoin.cpp' object='ccminer-fuguecoin.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.o `test -f 'fuguecoin.cpp' || echo '$(srcdir)/'`fuguecoin.cpp + +ccminer-fuguecoin.obj: fuguecoin.cpp +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-fuguecoin.obj -MD -MP -MF $(DEPDIR)/ccminer-fuguecoin.Tpo -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ccminer-fuguecoin.Tpo $(DEPDIR)/ccminer-fuguecoin.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='fuguecoin.cpp' object='ccminer-fuguecoin.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi` + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile $(PROGRAMS) cpuminer-config.h +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-binPROGRAMS clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: $(am__recursive_targets) all install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--refresh check check-am clean clean-binPROGRAMS \ + clean-cscope clean-generic cscope cscopelist-am ctags ctags-am \ + dist dist-all dist-bzip2 dist-gzip dist-lzip dist-shar \ + dist-tarZ dist-xz dist-zip distcheck distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-tags distcleancheck distdir distuninstallcheck dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS + + +.cu.o: + $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +#heavy.o: heavy.cu +# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< +# +#cuda_hefty1.o: cuda_hefty1.cu +# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..d3f5a12 --- /dev/null +++ b/NEWS @@ -0,0 +1 @@ + diff --git a/README b/README new file mode 100644 index 0000000..f5c26fc --- /dev/null +++ b/README @@ -0,0 +1,3 @@ + +A CUDA based miner for Heavycoin and Fuguecoin. For more information, take a look at README.txt + diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..1298197 --- /dev/null +++ b/README.txt @@ -0,0 +1,117 @@ + +HeavyCUDA release Mar 18th 2014 - Initial Release +------------------------------------------------------------- + +*************************************************************** +If you find this tool useful and like to support its continued + development, then consider a donation. + + LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm + BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM + YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4 + VTC donation address: VrjeFzMgvteCGarLw85KivBzmsiH9fqp4a + MAX donation address: mHrhQP9EFArechWxTFJ97s9D3jvcCvEEnt + DOGE donation address: DT9ghsGmez6ojVdEZgvaZbT2Z3TruXG6yP + PANDA donation address: PvgtxJ2ZKaudRogCXfUMLXVaWUMcKQgRed + MRC donation address: 1Lxc4JPDpQRJB8BN4YwhmSQ3Rcu8gjj2Kd + HVC donation address: HNN3PyyTMkDo4RkEjkWSGMwqia1yD8mwJN +*************************************************************** + +>>> Introduction <<< + +This is a CUDA accelerated mining application for use with +HeavyCoin and FugueCoin. We did not take effort on usability, +so please set your parameters carefuly. + +THIS PROGRAMM IS PROVIDED "AS-IS", USE IT AT YOUR OWN RISK! + +If you're interessted and read the source-code, please excuse +that the most of our comments are in german. + +>>> Command Line Interface <<< + +This code is based on the pooler cpuminer 2.3.2 release and inherits +its command line interface and options. + + -a, --algo=ALGO specify the algorithm to use + heavy use to mine Heavycoin + fugue256 use to mine Fuguecoin + + -o, --url=URL URL of mining server (default: " DEF_RPC_URL ") + -O, --userpass=U:P username:password pair for mining server + -u, --user=USERNAME username for mining server + -p, --pass=PASSWORD password for mining server + -v, --vote Heavycoin block vote (default: 512) + --cert=FILE certificate for mining server using SSL + -x, --proxy=[PROTOCOL://]HOST[:PORT] connect through a proxy + -t, --threads=N number of miner threads (default: number of nVidia GPUs in your system) + -r, --retries=N number of times to retry if a network call fails + (default: retry indefinitely) + -R, --retry-pause=N time to pause between retries, in seconds (default: 15) + -T, --timeout=N network timeout, in seconds (default: 270) + -s, --scantime=N upper bound on time spent scanning current work when + long polling is unavailable, in seconds (default: 5) + --no-longpoll disable X-Long-Polling support + --no-stratum disable X-Stratum support + -q, --quiet disable per-thread hashmeter output + -D, --debug enable debug output + -P, --protocol-dump verbose dump of protocol-level activities + -B, --background run the miner in the background + --benchmark run in offline benchmark mode + -c, --config=FILE load a JSON-format configuration file + -V, --version display version information and exit + -h, --help display this help text and exit + +>>> Examples <<< + +Example for Heavycoin Mining on heavycoinpool.com with a single gpu in your system + +cudaminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <> -p <> -v 512 + + + +Example for Heavycoin Mining on hvc.1gh.com with a dual gpu in your system + +cudaminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <> -p x -v 512 + + + +Example for Fuguecoin solo-mining with 4 gpu's in your system and a Fuguecoin-wallet running on localhost + +cudaminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <> -p <> + +For solo-mining you typically use -o 127.0.0.1:xxxx where xxxx represents +the RPC portnumber specified in your wallet's .conf file and you have to +pass the same username and password with -O as specified in the wallet's +.conf file. The wallet must also be started with the -server option and +the server flag in the wallet's .conf file set to 1 + + +>>> Additional Notes <<< + +This code should be running on nVidia GPUs ranging from compute capability +2.0 up to compute capability 3.5. Just don't expect any hashing miracles +from your old clunkers. + +>>> RELEASE HISTORY <<< + + March, 18 2014 initial release. + + +>>> AUTHORS <<< + +Notable contributors to this application are: + +Christian Buchner, Christian H. (Germany): CUDA implementation + +and also many thanks to anyone else who contributed to the original +cpuminer application (Jeff Garzik, pooler), it's original HVC-fork +and the HVC-fork available at hvc.1gh.com + +Source code is included to satisfy GNU GPL V2 requirements. + + +With kind regards, + + Christian Buchner ( Christian.Buchner@gmail.com ) + Christian H. ( Chris84 ) diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..1b2c558 --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,1379 @@ +# generated automatically by aclocal 1.13.3 -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# LIBCURL_CHECK_CONFIG ([DEFAULT-ACTION], [MINIMUM-VERSION], +# [ACTION-IF-YES], [ACTION-IF-NO]) +# ---------------------------------------------------------- +# David Shaw May-09-2006 +# +# Checks for libcurl. DEFAULT-ACTION is the string yes or no to +# specify whether to default to --with-libcurl or --without-libcurl. +# If not supplied, DEFAULT-ACTION is yes. MINIMUM-VERSION is the +# minimum version of libcurl to accept. Pass the version as a regular +# version number like 7.10.1. If not supplied, any version is +# accepted. ACTION-IF-YES is a list of shell commands to run if +# libcurl was successfully found and passed the various tests. +# ACTION-IF-NO is a list of shell commands that are run otherwise. +# Note that using --without-libcurl does run ACTION-IF-NO. +# +# This macro #defines HAVE_LIBCURL if a working libcurl setup is +# found, and sets @LIBCURL@ and @LIBCURL_CPPFLAGS@ to the necessary +# values. Other useful defines are LIBCURL_FEATURE_xxx where xxx are +# the various features supported by libcurl, and LIBCURL_PROTOCOL_yyy +# where yyy are the various protocols supported by libcurl. Both xxx +# and yyy are capitalized. See the list of AH_TEMPLATEs at the top of +# the macro for the complete list of possible defines. Shell +# variables $libcurl_feature_xxx and $libcurl_protocol_yyy are also +# defined to 'yes' for those features and protocols that were found. +# Note that xxx and yyy keep the same capitalization as in the +# curl-config list (e.g. it's "HTTP" and not "http"). +# +# Users may override the detected values by doing something like: +# LIBCURL="-lcurl" LIBCURL_CPPFLAGS="-I/usr/myinclude" ./configure +# +# For the sake of sanity, this macro assumes that any libcurl that is +# found is after version 7.7.2, the first version that included the +# curl-config script. Note that it is very important for people +# packaging binary versions of libcurl to include this script! +# Without curl-config, we can only guess what protocols are available, +# or use curl_version_info to figure it out at runtime. + +AC_DEFUN([LIBCURL_CHECK_CONFIG], +[ + AH_TEMPLATE([LIBCURL_FEATURE_SSL],[Defined if libcurl supports SSL]) + AH_TEMPLATE([LIBCURL_FEATURE_KRB4],[Defined if libcurl supports KRB4]) + AH_TEMPLATE([LIBCURL_FEATURE_IPV6],[Defined if libcurl supports IPv6]) + AH_TEMPLATE([LIBCURL_FEATURE_LIBZ],[Defined if libcurl supports libz]) + AH_TEMPLATE([LIBCURL_FEATURE_ASYNCHDNS],[Defined if libcurl supports AsynchDNS]) + AH_TEMPLATE([LIBCURL_FEATURE_IDN],[Defined if libcurl supports IDN]) + AH_TEMPLATE([LIBCURL_FEATURE_SSPI],[Defined if libcurl supports SSPI]) + AH_TEMPLATE([LIBCURL_FEATURE_NTLM],[Defined if libcurl supports NTLM]) + + AH_TEMPLATE([LIBCURL_PROTOCOL_HTTP],[Defined if libcurl supports HTTP]) + AH_TEMPLATE([LIBCURL_PROTOCOL_HTTPS],[Defined if libcurl supports HTTPS]) + AH_TEMPLATE([LIBCURL_PROTOCOL_FTP],[Defined if libcurl supports FTP]) + AH_TEMPLATE([LIBCURL_PROTOCOL_FTPS],[Defined if libcurl supports FTPS]) + AH_TEMPLATE([LIBCURL_PROTOCOL_FILE],[Defined if libcurl supports FILE]) + AH_TEMPLATE([LIBCURL_PROTOCOL_TELNET],[Defined if libcurl supports TELNET]) + AH_TEMPLATE([LIBCURL_PROTOCOL_LDAP],[Defined if libcurl supports LDAP]) + AH_TEMPLATE([LIBCURL_PROTOCOL_DICT],[Defined if libcurl supports DICT]) + AH_TEMPLATE([LIBCURL_PROTOCOL_TFTP],[Defined if libcurl supports TFTP]) + AH_TEMPLATE([LIBCURL_PROTOCOL_RTSP],[Defined if libcurl supports RTSP]) + AH_TEMPLATE([LIBCURL_PROTOCOL_POP3],[Defined if libcurl supports POP3]) + AH_TEMPLATE([LIBCURL_PROTOCOL_IMAP],[Defined if libcurl supports IMAP]) + AH_TEMPLATE([LIBCURL_PROTOCOL_SMTP],[Defined if libcurl supports SMTP]) + + AC_ARG_WITH(libcurl, + AC_HELP_STRING([--with-libcurl=PREFIX],[look for the curl library in PREFIX/lib and headers in PREFIX/include]), + [_libcurl_with=$withval],[_libcurl_with=ifelse([$1],,[yes],[$1])]) + + if test "$_libcurl_with" != "no" ; then + + AC_PROG_AWK + + _libcurl_version_parse="eval $AWK '{split(\$NF,A,\".\"); X=256*256*A[[1]]+256*A[[2]]+A[[3]]; print X;}'" + + _libcurl_try_link=yes + + if test -d "$_libcurl_with" ; then + LIBCURL_CPPFLAGS="-I$withval/include" + _libcurl_ldflags="-L$withval/lib" + AC_PATH_PROG([_libcurl_config],[curl-config],[], + ["$withval/bin"]) + else + AC_PATH_PROG([_libcurl_config],[curl-config],[],[$PATH]) + fi + + if test x$_libcurl_config != "x" ; then + AC_CACHE_CHECK([for the version of libcurl], + [libcurl_cv_lib_curl_version], + [libcurl_cv_lib_curl_version=`$_libcurl_config --version | $AWK '{print $[]2}'`]) + + _libcurl_version=`echo $libcurl_cv_lib_curl_version | $_libcurl_version_parse` + _libcurl_wanted=`echo ifelse([$2],,[0],[$2]) | $_libcurl_version_parse` + + if test $_libcurl_wanted -gt 0 ; then + AC_CACHE_CHECK([for libcurl >= version $2], + [libcurl_cv_lib_version_ok], + [ + if test $_libcurl_version -ge $_libcurl_wanted ; then + libcurl_cv_lib_version_ok=yes + else + libcurl_cv_lib_version_ok=no + fi + ]) + fi + + if test $_libcurl_wanted -eq 0 || test x$libcurl_cv_lib_version_ok = xyes ; then + if test x"$LIBCURL_CPPFLAGS" = "x" ; then + LIBCURL_CPPFLAGS=`$_libcurl_config --cflags` + fi + if test x"$LIBCURL" = "x" ; then + LIBCURL=`$_libcurl_config --libs` + + # This is so silly, but Apple actually has a bug in their + # curl-config script. Fixed in Tiger, but there are still + # lots of Panther installs around. + case "${host}" in + powerpc-apple-darwin7*) + LIBCURL=`echo $LIBCURL | sed -e 's|-arch i386||g'` + ;; + esac + fi + + # All curl-config scripts support --feature + _libcurl_features=`$_libcurl_config --feature` + + # Is it modern enough to have --protocols? (7.12.4) + if test $_libcurl_version -ge 461828 ; then + _libcurl_protocols=`$_libcurl_config --protocols` + fi + else + _libcurl_try_link=no + fi + + unset _libcurl_wanted + fi + + if test $_libcurl_try_link = yes ; then + + # we didn't find curl-config, so let's see if the user-supplied + # link line (or failing that, "-lcurl") is enough. + LIBCURL=${LIBCURL-"$_libcurl_ldflags -lcurl"} + + AC_CACHE_CHECK([whether libcurl is usable], + [libcurl_cv_lib_curl_usable], + [ + _libcurl_save_cppflags=$CPPFLAGS + CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" + _libcurl_save_libs=$LIBS + LIBS="$LIBCURL $LIBS" + + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]],[[ +/* Try and use a few common options to force a failure if we are + missing symbols or can't link. */ +int x; +curl_easy_setopt(NULL,CURLOPT_URL,NULL); +x=CURL_ERROR_SIZE; +x=CURLOPT_WRITEFUNCTION; +x=CURLOPT_FILE; +x=CURLOPT_ERRORBUFFER; +x=CURLOPT_STDERR; +x=CURLOPT_VERBOSE; +if (x) ; +]])],libcurl_cv_lib_curl_usable=yes,libcurl_cv_lib_curl_usable=no) + + CPPFLAGS=$_libcurl_save_cppflags + LIBS=$_libcurl_save_libs + unset _libcurl_save_cppflags + unset _libcurl_save_libs + ]) + + if test $libcurl_cv_lib_curl_usable = yes ; then + + # Does curl_free() exist in this version of libcurl? + # If not, fake it with free() + + _libcurl_save_cppflags=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" + _libcurl_save_libs=$LIBS + LIBS="$LIBS $LIBCURL" + + AC_CHECK_FUNC(curl_free,, + AC_DEFINE(curl_free,free, + [Define curl_free() as free() if our version of curl lacks curl_free.])) + + CPPFLAGS=$_libcurl_save_cppflags + LIBS=$_libcurl_save_libs + unset _libcurl_save_cppflags + unset _libcurl_save_libs + + AC_DEFINE(HAVE_LIBCURL,1, + [Define to 1 if you have a functional curl library.]) + AC_SUBST(LIBCURL_CPPFLAGS) + AC_SUBST(LIBCURL) + + for _libcurl_feature in $_libcurl_features ; do + AC_DEFINE_UNQUOTED(AS_TR_CPP(libcurl_feature_$_libcurl_feature),[1]) + eval AS_TR_SH(libcurl_feature_$_libcurl_feature)=yes + done + + if test "x$_libcurl_protocols" = "x" ; then + + # We don't have --protocols, so just assume that all + # protocols are available + _libcurl_protocols="HTTP FTP FILE TELNET LDAP DICT TFTP" + + if test x$libcurl_feature_SSL = xyes ; then + _libcurl_protocols="$_libcurl_protocols HTTPS" + + # FTPS wasn't standards-compliant until version + # 7.11.0 (0x070b00 == 461568) + if test $_libcurl_version -ge 461568; then + _libcurl_protocols="$_libcurl_protocols FTPS" + fi + fi + + # RTSP, IMAP, POP3 and SMTP were added in + # 7.20.0 (0x071400 == 463872) + if test $_libcurl_version -ge 463872; then + _libcurl_protocols="$_libcurl_protocols RTSP IMAP POP3 SMTP" + fi + fi + + for _libcurl_protocol in $_libcurl_protocols ; do + AC_DEFINE_UNQUOTED(AS_TR_CPP(libcurl_protocol_$_libcurl_protocol),[1]) + eval AS_TR_SH(libcurl_protocol_$_libcurl_protocol)=yes + done + else + unset LIBCURL + unset LIBCURL_CPPFLAGS + fi + fi + + unset _libcurl_try_link + unset _libcurl_version_parse + unset _libcurl_config + unset _libcurl_feature + unset _libcurl_features + unset _libcurl_protocol + unset _libcurl_protocols + unset _libcurl_version + unset _libcurl_ldflags + fi + + if test x$_libcurl_with = xno || test x$libcurl_cv_lib_curl_usable != xyes ; then + # This is the IF-NO path + ifelse([$4],,:,[$4]) + else + # This is the IF-YES path + ifelse([$3],,:,[$3]) + fi + + unset _libcurl_with +])dnl + +# Copyright (C) 2002-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.13' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.13.3], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.13.3])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Figure out how to run the assembler. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AS +# ---------- +AC_DEFUN([AM_PROG_AS], +[# By default we simply use the C compiler to build assembly code. +AC_REQUIRE([AC_PROG_CC]) +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS +AC_ARG_VAR([CCAS], [assembler compiler command (defaults to CC)]) +AC_ARG_VAR([CCASFLAGS], [assembler compiler flags (defaults to CFLAGS)]) +_AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[dnl Rely on autoconf to set up CDPATH properly. +AC_PREREQ([2.50])dnl +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`AS_DIRNAME("$mf")` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`AS_DIRNAME(["$file"])` + AS_MKDIR_P([$dirpart/$fdir]) + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done + done +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking +# is enabled. FIXME. This creates each '.P' file that we will +# need in order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) +]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- +# From Jim Meyering + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAINTAINER_MODE([DEFAULT-MODE]) +# ---------------------------------- +# Control maintainer-specific portions of Makefiles. +# Default is to disable them, unless 'enable' is passed literally. +# For symmetry, 'disable' may be passed as well. Anyway, the user +# can override the default with the --enable/--disable switch. +AC_DEFUN([AM_MAINTAINER_MODE], +[m4_case(m4_default([$1], [disable]), + [enable], [m4_define([am_maintainer_other], [disable])], + [disable], [m4_define([am_maintainer_other], [enable])], + [m4_define([am_maintainer_other], [enable]) + m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) +AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode's default is 'disable' unless 'enable' is passed + AC_ARG_ENABLE([maintainer-mode], + [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], + am_maintainer_other[ make rules and dependencies not useful + (and sometimes confusing) to the casual installer])], + [USE_MAINTAINER_MODE=$enableval], + [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) + AC_MSG_RESULT([$USE_MAINTAINER_MODE]) + AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST([MAINT])dnl +] +) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check to see how make treats includes. +AC_DEFUN([AM_MAKE_INCLUDE], +[am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +AC_MSG_CHECKING([for style of include used by $am_make]) +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi +AC_SUBST([am__include]) +AC_SUBST([am__quote]) +AC_MSG_RESULT([$_am_result]) +rm -f confinc confmf +]) + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_CC_C_O +# -------------- +# Like AC_PROG_CC_C_O, but changed for automake. +AC_DEFUN([AM_PROG_CC_C_O], +[AC_REQUIRE([AC_PROG_CC_C_O])dnl +AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +# FIXME: we rely on the cache variable name because +# there is no other way. +set dummy $CC +am_cc=`echo $[2] | sed ['s/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/']` +eval am_t=\$ac_cv_prog_cc_${am_cc}_c_o +if test "$am_t" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +dnl Make sure AC_PROG_CC is never called again, or it will override our +dnl setting of CC. +m4_define([AC_PROG_CC], + [m4_fatal([AC_PROG_CC cannot be called after AM_PROG_CC_C_O])]) +]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..8261a2c --- /dev/null +++ b/autogen.sh @@ -0,0 +1 @@ +aclocal && autoheader && automake --add-missing --gnu --copy && autoconf diff --git a/blake.c b/blake.c new file mode 100644 index 0000000..a9043e9 --- /dev/null +++ b/blake.c @@ -0,0 +1,1120 @@ +/* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */ +/* + * BLAKE implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include +#include + +#include "sph_blake.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BLAKE +#define SPH_SMALL_FOOTPRINT_BLAKE 1 +#endif + +#if SPH_SMALL_FOOTPRINT_BLAKE +#define SPH_COMPACT_BLAKE_32 1 +#endif + +#if SPH_64 && (SPH_SMALL_FOOTPRINT_BLAKE || !SPH_64_TRUE) +#define SPH_COMPACT_BLAKE_64 1 +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +static const sph_u32 IV224[8] = { + SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), + SPH_C32(0x3070DD17), SPH_C32(0xF70E5939), + SPH_C32(0xFFC00B31), SPH_C32(0x68581511), + SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4) +}; + +static const sph_u32 IV256[8] = { + SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), + SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A), + SPH_C32(0x510E527F), SPH_C32(0x9B05688C), + SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19) +}; + +#if SPH_64 + +static const sph_u64 IV384[8] = { + SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507), + SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939), + SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511), + SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4) +}; + +static const sph_u64 IV512[8] = { + SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B), + SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1), + SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F), + SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179) +}; + +#endif + +#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64 + +static const unsigned sigma[16][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } +}; + +/* + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 14 10 4 8 9 15 13 6 1 12 0 2 11 7 5 3 + 11 8 12 0 5 2 15 13 10 14 3 6 7 1 9 4 + 7 9 3 1 13 12 11 14 2 6 5 10 4 0 15 8 + 9 0 5 7 2 4 10 15 14 1 11 12 6 8 3 13 + 2 12 6 10 0 11 8 3 4 13 7 5 15 14 1 9 + 12 5 1 15 14 13 4 10 0 7 6 3 9 2 8 11 + 13 11 7 14 12 1 3 9 5 0 15 4 8 6 2 10 + 6 15 14 9 11 3 0 8 12 2 13 7 1 4 10 5 + 10 2 8 4 7 6 1 5 15 11 9 14 3 12 13 0 +*/ +#endif + +#define Z00 0 +#define Z01 1 +#define Z02 2 +#define Z03 3 +#define Z04 4 +#define Z05 5 +#define Z06 6 +#define Z07 7 +#define Z08 8 +#define Z09 9 +#define Z0A A +#define Z0B B +#define Z0C C +#define Z0D D +#define Z0E E +#define Z0F F + +#define Z10 E +#define Z11 A +#define Z12 4 +#define Z13 8 +#define Z14 9 +#define Z15 F +#define Z16 D +#define Z17 6 +#define Z18 1 +#define Z19 C +#define Z1A 0 +#define Z1B 2 +#define Z1C B +#define Z1D 7 +#define Z1E 5 +#define Z1F 3 + +#define Z20 B +#define Z21 8 +#define Z22 C +#define Z23 0 +#define Z24 5 +#define Z25 2 +#define Z26 F +#define Z27 D +#define Z28 A +#define Z29 E +#define Z2A 3 +#define Z2B 6 +#define Z2C 7 +#define Z2D 1 +#define Z2E 9 +#define Z2F 4 + +#define Z30 7 +#define Z31 9 +#define Z32 3 +#define Z33 1 +#define Z34 D +#define Z35 C +#define Z36 B +#define Z37 E +#define Z38 2 +#define Z39 6 +#define Z3A 5 +#define Z3B A +#define Z3C 4 +#define Z3D 0 +#define Z3E F +#define Z3F 8 + +#define Z40 9 +#define Z41 0 +#define Z42 5 +#define Z43 7 +#define Z44 2 +#define Z45 4 +#define Z46 A +#define Z47 F +#define Z48 E +#define Z49 1 +#define Z4A B +#define Z4B C +#define Z4C 6 +#define Z4D 8 +#define Z4E 3 +#define Z4F D + +#define Z50 2 +#define Z51 C +#define Z52 6 +#define Z53 A +#define Z54 0 +#define Z55 B +#define Z56 8 +#define Z57 3 +#define Z58 4 +#define Z59 D +#define Z5A 7 +#define Z5B 5 +#define Z5C F +#define Z5D E +#define Z5E 1 +#define Z5F 9 + +#define Z60 C +#define Z61 5 +#define Z62 1 +#define Z63 F +#define Z64 E +#define Z65 D +#define Z66 4 +#define Z67 A +#define Z68 0 +#define Z69 7 +#define Z6A 6 +#define Z6B 3 +#define Z6C 9 +#define Z6D 2 +#define Z6E 8 +#define Z6F B + +#define Z70 D +#define Z71 B +#define Z72 7 +#define Z73 E +#define Z74 C +#define Z75 1 +#define Z76 3 +#define Z77 9 +#define Z78 5 +#define Z79 0 +#define Z7A F +#define Z7B 4 +#define Z7C 8 +#define Z7D 6 +#define Z7E 2 +#define Z7F A + +#define Z80 6 +#define Z81 F +#define Z82 E +#define Z83 9 +#define Z84 B +#define Z85 3 +#define Z86 0 +#define Z87 8 +#define Z88 C +#define Z89 2 +#define Z8A D +#define Z8B 7 +#define Z8C 1 +#define Z8D 4 +#define Z8E A +#define Z8F 5 + +#define Z90 A +#define Z91 2 +#define Z92 8 +#define Z93 4 +#define Z94 7 +#define Z95 6 +#define Z96 1 +#define Z97 5 +#define Z98 F +#define Z99 B +#define Z9A 9 +#define Z9B E +#define Z9C 3 +#define Z9D C +#define Z9E D +#define Z9F 0 + +#define Mx(r, i) Mx_(Z ## r ## i) +#define Mx_(n) Mx__(n) +#define Mx__(n) M ## n + +#define CSx(r, i) CSx_(Z ## r ## i) +#define CSx_(n) CSx__(n) +#define CSx__(n) CS ## n + +#define CS0 SPH_C32(0x243F6A88) +#define CS1 SPH_C32(0x85A308D3) +#define CS2 SPH_C32(0x13198A2E) +#define CS3 SPH_C32(0x03707344) +#define CS4 SPH_C32(0xA4093822) +#define CS5 SPH_C32(0x299F31D0) +#define CS6 SPH_C32(0x082EFA98) +#define CS7 SPH_C32(0xEC4E6C89) +#define CS8 SPH_C32(0x452821E6) +#define CS9 SPH_C32(0x38D01377) +#define CSA SPH_C32(0xBE5466CF) +#define CSB SPH_C32(0x34E90C6C) +#define CSC SPH_C32(0xC0AC29B7) +#define CSD SPH_C32(0xC97C50DD) +#define CSE SPH_C32(0x3F84D5B5) +#define CSF SPH_C32(0xB5470917) + +#if SPH_COMPACT_BLAKE_32 + +static const sph_u32 CS[16] = { + SPH_C32(0x243F6A88), SPH_C32(0x85A308D3), + SPH_C32(0x13198A2E), SPH_C32(0x03707344), + SPH_C32(0xA4093822), SPH_C32(0x299F31D0), + SPH_C32(0x082EFA98), SPH_C32(0xEC4E6C89), + SPH_C32(0x452821E6), SPH_C32(0x38D01377), + SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C), + SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD), + SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917) +}; + +#endif + +#if SPH_64 + +#define CBx(r, i) CBx_(Z ## r ## i) +#define CBx_(n) CBx__(n) +#define CBx__(n) CB ## n + +#define CB0 SPH_C64(0x243F6A8885A308D3) +#define CB1 SPH_C64(0x13198A2E03707344) +#define CB2 SPH_C64(0xA4093822299F31D0) +#define CB3 SPH_C64(0x082EFA98EC4E6C89) +#define CB4 SPH_C64(0x452821E638D01377) +#define CB5 SPH_C64(0xBE5466CF34E90C6C) +#define CB6 SPH_C64(0xC0AC29B7C97C50DD) +#define CB7 SPH_C64(0x3F84D5B5B5470917) +#define CB8 SPH_C64(0x9216D5D98979FB1B) +#define CB9 SPH_C64(0xD1310BA698DFB5AC) +#define CBA SPH_C64(0x2FFD72DBD01ADFB7) +#define CBB SPH_C64(0xB8E1AFED6A267E96) +#define CBC SPH_C64(0xBA7C9045F12C7F99) +#define CBD SPH_C64(0x24A19947B3916CF7) +#define CBE SPH_C64(0x0801F2E2858EFC16) +#define CBF SPH_C64(0x636920D871574E69) + +#if SPH_COMPACT_BLAKE_64 + +static const sph_u64 CB[16] = { + SPH_C64(0x243F6A8885A308D3), SPH_C64(0x13198A2E03707344), + SPH_C64(0xA4093822299F31D0), SPH_C64(0x082EFA98EC4E6C89), + SPH_C64(0x452821E638D01377), SPH_C64(0xBE5466CF34E90C6C), + SPH_C64(0xC0AC29B7C97C50DD), SPH_C64(0x3F84D5B5B5470917), + SPH_C64(0x9216D5D98979FB1B), SPH_C64(0xD1310BA698DFB5AC), + SPH_C64(0x2FFD72DBD01ADFB7), SPH_C64(0xB8E1AFED6A267E96), + SPH_C64(0xBA7C9045F12C7F99), SPH_C64(0x24A19947B3916CF7), + SPH_C64(0x0801F2E2858EFC16), SPH_C64(0x636920D871574E69) +}; + +#endif + +#endif + +#define GS(m0, m1, c0, c1, a, b, c, d) do { \ + a = SPH_T32(a + b + (m0 ^ c1)); \ + d = SPH_ROTR32(d ^ a, 16); \ + c = SPH_T32(c + d); \ + b = SPH_ROTR32(b ^ c, 12); \ + a = SPH_T32(a + b + (m1 ^ c0)); \ + d = SPH_ROTR32(d ^ a, 8); \ + c = SPH_T32(c + d); \ + b = SPH_ROTR32(b ^ c, 7); \ + } while (0) + +#if SPH_COMPACT_BLAKE_32 + +#define ROUND_S(r) do { \ + GS(M[sigma[r][0x0]], M[sigma[r][0x1]], \ + CS[sigma[r][0x0]], CS[sigma[r][0x1]], V0, V4, V8, VC); \ + GS(M[sigma[r][0x2]], M[sigma[r][0x3]], \ + CS[sigma[r][0x2]], CS[sigma[r][0x3]], V1, V5, V9, VD); \ + GS(M[sigma[r][0x4]], M[sigma[r][0x5]], \ + CS[sigma[r][0x4]], CS[sigma[r][0x5]], V2, V6, VA, VE); \ + GS(M[sigma[r][0x6]], M[sigma[r][0x7]], \ + CS[sigma[r][0x6]], CS[sigma[r][0x7]], V3, V7, VB, VF); \ + GS(M[sigma[r][0x8]], M[sigma[r][0x9]], \ + CS[sigma[r][0x8]], CS[sigma[r][0x9]], V0, V5, VA, VF); \ + GS(M[sigma[r][0xA]], M[sigma[r][0xB]], \ + CS[sigma[r][0xA]], CS[sigma[r][0xB]], V1, V6, VB, VC); \ + GS(M[sigma[r][0xC]], M[sigma[r][0xD]], \ + CS[sigma[r][0xC]], CS[sigma[r][0xD]], V2, V7, V8, VD); \ + GS(M[sigma[r][0xE]], M[sigma[r][0xF]], \ + CS[sigma[r][0xE]], CS[sigma[r][0xF]], V3, V4, V9, VE); \ + } while (0) + +#else + +#define ROUND_S(r) do { \ + GS(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \ + GS(Mx(r, 2), Mx(r, 3), CSx(r, 2), CSx(r, 3), V1, V5, V9, VD); \ + GS(Mx(r, 4), Mx(r, 5), CSx(r, 4), CSx(r, 5), V2, V6, VA, VE); \ + GS(Mx(r, 6), Mx(r, 7), CSx(r, 6), CSx(r, 7), V3, V7, VB, VF); \ + GS(Mx(r, 8), Mx(r, 9), CSx(r, 8), CSx(r, 9), V0, V5, VA, VF); \ + GS(Mx(r, A), Mx(r, B), CSx(r, A), CSx(r, B), V1, V6, VB, VC); \ + GS(Mx(r, C), Mx(r, D), CSx(r, C), CSx(r, D), V2, V7, V8, VD); \ + GS(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \ + } while (0) + +#endif + +#if SPH_64 + +#define GB(m0, m1, c0, c1, a, b, c, d) do { \ + a = SPH_T64(a + b + (m0 ^ c1)); \ + d = SPH_ROTR64(d ^ a, 32); \ + c = SPH_T64(c + d); \ + b = SPH_ROTR64(b ^ c, 25); \ + a = SPH_T64(a + b + (m1 ^ c0)); \ + d = SPH_ROTR64(d ^ a, 16); \ + c = SPH_T64(c + d); \ + b = SPH_ROTR64(b ^ c, 11); \ + } while (0) + +#if SPH_COMPACT_BLAKE_64 + +#define ROUND_B(r) do { \ + GB(M[sigma[r][0x0]], M[sigma[r][0x1]], \ + CB[sigma[r][0x0]], CB[sigma[r][0x1]], V0, V4, V8, VC); \ + GB(M[sigma[r][0x2]], M[sigma[r][0x3]], \ + CB[sigma[r][0x2]], CB[sigma[r][0x3]], V1, V5, V9, VD); \ + GB(M[sigma[r][0x4]], M[sigma[r][0x5]], \ + CB[sigma[r][0x4]], CB[sigma[r][0x5]], V2, V6, VA, VE); \ + GB(M[sigma[r][0x6]], M[sigma[r][0x7]], \ + CB[sigma[r][0x6]], CB[sigma[r][0x7]], V3, V7, VB, VF); \ + GB(M[sigma[r][0x8]], M[sigma[r][0x9]], \ + CB[sigma[r][0x8]], CB[sigma[r][0x9]], V0, V5, VA, VF); \ + GB(M[sigma[r][0xA]], M[sigma[r][0xB]], \ + CB[sigma[r][0xA]], CB[sigma[r][0xB]], V1, V6, VB, VC); \ + GB(M[sigma[r][0xC]], M[sigma[r][0xD]], \ + CB[sigma[r][0xC]], CB[sigma[r][0xD]], V2, V7, V8, VD); \ + GB(M[sigma[r][0xE]], M[sigma[r][0xF]], \ + CB[sigma[r][0xE]], CB[sigma[r][0xF]], V3, V4, V9, VE); \ + } while (0) + +#else + +#define ROUND_B(r) do { \ + GB(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \ + GB(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \ + GB(Mx(r, 4), Mx(r, 5), CBx(r, 4), CBx(r, 5), V2, V6, VA, VE); \ + GB(Mx(r, 6), Mx(r, 7), CBx(r, 6), CBx(r, 7), V3, V7, VB, VF); \ + GB(Mx(r, 8), Mx(r, 9), CBx(r, 8), CBx(r, 9), V0, V5, VA, VF); \ + GB(Mx(r, A), Mx(r, B), CBx(r, A), CBx(r, B), V1, V6, VB, VC); \ + GB(Mx(r, C), Mx(r, D), CBx(r, C), CBx(r, D), V2, V7, V8, VD); \ + GB(Mx(r, E), Mx(r, F), CBx(r, E), CBx(r, F), V3, V4, V9, VE); \ + } while (0) + +#endif + +#endif + +#define DECL_STATE32 \ + sph_u32 H0, H1, H2, H3, H4, H5, H6, H7; \ + sph_u32 S0, S1, S2, S3, T0, T1; + +#define READ_STATE32(state) do { \ + H0 = (state)->H[0]; \ + H1 = (state)->H[1]; \ + H2 = (state)->H[2]; \ + H3 = (state)->H[3]; \ + H4 = (state)->H[4]; \ + H5 = (state)->H[5]; \ + H6 = (state)->H[6]; \ + H7 = (state)->H[7]; \ + S0 = (state)->S[0]; \ + S1 = (state)->S[1]; \ + S2 = (state)->S[2]; \ + S3 = (state)->S[3]; \ + T0 = (state)->T0; \ + T1 = (state)->T1; \ + } while (0) + +#define WRITE_STATE32(state) do { \ + (state)->H[0] = H0; \ + (state)->H[1] = H1; \ + (state)->H[2] = H2; \ + (state)->H[3] = H3; \ + (state)->H[4] = H4; \ + (state)->H[5] = H5; \ + (state)->H[6] = H6; \ + (state)->H[7] = H7; \ + (state)->S[0] = S0; \ + (state)->S[1] = S1; \ + (state)->S[2] = S2; \ + (state)->S[3] = S3; \ + (state)->T0 = T0; \ + (state)->T1 = T1; \ + } while (0) + +#if SPH_COMPACT_BLAKE_32 + +#define COMPRESS32 do { \ + sph_u32 M[16]; \ + sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \ + sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \ + unsigned r; \ + V0 = H0; \ + V1 = H1; \ + V2 = H2; \ + V3 = H3; \ + V4 = H4; \ + V5 = H5; \ + V6 = H6; \ + V7 = H7; \ + V8 = S0 ^ CS0; \ + V9 = S1 ^ CS1; \ + VA = S2 ^ CS2; \ + VB = S3 ^ CS3; \ + VC = T0 ^ CS4; \ + VD = T0 ^ CS5; \ + VE = T1 ^ CS6; \ + VF = T1 ^ CS7; \ + M[0x0] = sph_dec32be_aligned(buf + 0); \ + M[0x1] = sph_dec32be_aligned(buf + 4); \ + M[0x2] = sph_dec32be_aligned(buf + 8); \ + M[0x3] = sph_dec32be_aligned(buf + 12); \ + M[0x4] = sph_dec32be_aligned(buf + 16); \ + M[0x5] = sph_dec32be_aligned(buf + 20); \ + M[0x6] = sph_dec32be_aligned(buf + 24); \ + M[0x7] = sph_dec32be_aligned(buf + 28); \ + M[0x8] = sph_dec32be_aligned(buf + 32); \ + M[0x9] = sph_dec32be_aligned(buf + 36); \ + M[0xA] = sph_dec32be_aligned(buf + 40); \ + M[0xB] = sph_dec32be_aligned(buf + 44); \ + M[0xC] = sph_dec32be_aligned(buf + 48); \ + M[0xD] = sph_dec32be_aligned(buf + 52); \ + M[0xE] = sph_dec32be_aligned(buf + 56); \ + M[0xF] = sph_dec32be_aligned(buf + 60); \ + for (r = 0; r < 14; r ++) \ + ROUND_S(r); \ + H0 ^= S0 ^ V0 ^ V8; \ + H1 ^= S1 ^ V1 ^ V9; \ + H2 ^= S2 ^ V2 ^ VA; \ + H3 ^= S3 ^ V3 ^ VB; \ + H4 ^= S0 ^ V4 ^ VC; \ + H5 ^= S1 ^ V5 ^ VD; \ + H6 ^= S2 ^ V6 ^ VE; \ + H7 ^= S3 ^ V7 ^ VF; \ + } while (0) + +#else + +#define COMPRESS32 do { \ + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \ + sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \ + sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \ + sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \ + V0 = H0; \ + V1 = H1; \ + V2 = H2; \ + V3 = H3; \ + V4 = H4; \ + V5 = H5; \ + V6 = H6; \ + V7 = H7; \ + V8 = S0 ^ CS0; \ + V9 = S1 ^ CS1; \ + VA = S2 ^ CS2; \ + VB = S3 ^ CS3; \ + VC = T0 ^ CS4; \ + VD = T0 ^ CS5; \ + VE = T1 ^ CS6; \ + VF = T1 ^ CS7; \ + M0 = sph_dec32be_aligned(buf + 0); \ + M1 = sph_dec32be_aligned(buf + 4); \ + M2 = sph_dec32be_aligned(buf + 8); \ + M3 = sph_dec32be_aligned(buf + 12); \ + M4 = sph_dec32be_aligned(buf + 16); \ + M5 = sph_dec32be_aligned(buf + 20); \ + M6 = sph_dec32be_aligned(buf + 24); \ + M7 = sph_dec32be_aligned(buf + 28); \ + M8 = sph_dec32be_aligned(buf + 32); \ + M9 = sph_dec32be_aligned(buf + 36); \ + MA = sph_dec32be_aligned(buf + 40); \ + MB = sph_dec32be_aligned(buf + 44); \ + MC = sph_dec32be_aligned(buf + 48); \ + MD = sph_dec32be_aligned(buf + 52); \ + ME = sph_dec32be_aligned(buf + 56); \ + MF = sph_dec32be_aligned(buf + 60); \ + ROUND_S(0); \ + ROUND_S(1); \ + ROUND_S(2); \ + ROUND_S(3); \ + ROUND_S(4); \ + ROUND_S(5); \ + ROUND_S(6); \ + ROUND_S(7); \ + ROUND_S(8); \ + ROUND_S(9); \ + ROUND_S(0); \ + ROUND_S(1); \ + ROUND_S(2); \ + ROUND_S(3); \ + H0 ^= S0 ^ V0 ^ V8; \ + H1 ^= S1 ^ V1 ^ V9; \ + H2 ^= S2 ^ V2 ^ VA; \ + H3 ^= S3 ^ V3 ^ VB; \ + H4 ^= S0 ^ V4 ^ VC; \ + H5 ^= S1 ^ V5 ^ VD; \ + H6 ^= S2 ^ V6 ^ VE; \ + H7 ^= S3 ^ V7 ^ VF; \ + } while (0) + +#endif + +#if SPH_64 + +#define DECL_STATE64 \ + sph_u64 H0, H1, H2, H3, H4, H5, H6, H7; \ + sph_u64 S0, S1, S2, S3, T0, T1; + +#define READ_STATE64(state) do { \ + H0 = (state)->H[0]; \ + H1 = (state)->H[1]; \ + H2 = (state)->H[2]; \ + H3 = (state)->H[3]; \ + H4 = (state)->H[4]; \ + H5 = (state)->H[5]; \ + H6 = (state)->H[6]; \ + H7 = (state)->H[7]; \ + S0 = (state)->S[0]; \ + S1 = (state)->S[1]; \ + S2 = (state)->S[2]; \ + S3 = (state)->S[3]; \ + T0 = (state)->T0; \ + T1 = (state)->T1; \ + } while (0) + +#define WRITE_STATE64(state) do { \ + (state)->H[0] = H0; \ + (state)->H[1] = H1; \ + (state)->H[2] = H2; \ + (state)->H[3] = H3; \ + (state)->H[4] = H4; \ + (state)->H[5] = H5; \ + (state)->H[6] = H6; \ + (state)->H[7] = H7; \ + (state)->S[0] = S0; \ + (state)->S[1] = S1; \ + (state)->S[2] = S2; \ + (state)->S[3] = S3; \ + (state)->T0 = T0; \ + (state)->T1 = T1; \ + } while (0) + +#if SPH_COMPACT_BLAKE_64 + +#define COMPRESS64 do { \ + sph_u64 M[16]; \ + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; \ + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; \ + unsigned r; \ + V0 = H0; \ + V1 = H1; \ + V2 = H2; \ + V3 = H3; \ + V4 = H4; \ + V5 = H5; \ + V6 = H6; \ + V7 = H7; \ + V8 = S0 ^ CB0; \ + V9 = S1 ^ CB1; \ + VA = S2 ^ CB2; \ + VB = S3 ^ CB3; \ + VC = T0 ^ CB4; \ + VD = T0 ^ CB5; \ + VE = T1 ^ CB6; \ + VF = T1 ^ CB7; \ + M[0x0] = sph_dec64be_aligned(buf + 0); \ + M[0x1] = sph_dec64be_aligned(buf + 8); \ + M[0x2] = sph_dec64be_aligned(buf + 16); \ + M[0x3] = sph_dec64be_aligned(buf + 24); \ + M[0x4] = sph_dec64be_aligned(buf + 32); \ + M[0x5] = sph_dec64be_aligned(buf + 40); \ + M[0x6] = sph_dec64be_aligned(buf + 48); \ + M[0x7] = sph_dec64be_aligned(buf + 56); \ + M[0x8] = sph_dec64be_aligned(buf + 64); \ + M[0x9] = sph_dec64be_aligned(buf + 72); \ + M[0xA] = sph_dec64be_aligned(buf + 80); \ + M[0xB] = sph_dec64be_aligned(buf + 88); \ + M[0xC] = sph_dec64be_aligned(buf + 96); \ + M[0xD] = sph_dec64be_aligned(buf + 104); \ + M[0xE] = sph_dec64be_aligned(buf + 112); \ + M[0xF] = sph_dec64be_aligned(buf + 120); \ + for (r = 0; r < 16; r ++) \ + ROUND_B(r); \ + H0 ^= S0 ^ V0 ^ V8; \ + H1 ^= S1 ^ V1 ^ V9; \ + H2 ^= S2 ^ V2 ^ VA; \ + H3 ^= S3 ^ V3 ^ VB; \ + H4 ^= S0 ^ V4 ^ VC; \ + H5 ^= S1 ^ V5 ^ VD; \ + H6 ^= S2 ^ V6 ^ VE; \ + H7 ^= S3 ^ V7 ^ VF; \ + } while (0) + +#else + +#define COMPRESS64 do { \ + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \ + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \ + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; \ + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; \ + V0 = H0; \ + V1 = H1; \ + V2 = H2; \ + V3 = H3; \ + V4 = H4; \ + V5 = H5; \ + V6 = H6; \ + V7 = H7; \ + V8 = S0 ^ CB0; \ + V9 = S1 ^ CB1; \ + VA = S2 ^ CB2; \ + VB = S3 ^ CB3; \ + VC = T0 ^ CB4; \ + VD = T0 ^ CB5; \ + VE = T1 ^ CB6; \ + VF = T1 ^ CB7; \ + M0 = sph_dec64be_aligned(buf + 0); \ + M1 = sph_dec64be_aligned(buf + 8); \ + M2 = sph_dec64be_aligned(buf + 16); \ + M3 = sph_dec64be_aligned(buf + 24); \ + M4 = sph_dec64be_aligned(buf + 32); \ + M5 = sph_dec64be_aligned(buf + 40); \ + M6 = sph_dec64be_aligned(buf + 48); \ + M7 = sph_dec64be_aligned(buf + 56); \ + M8 = sph_dec64be_aligned(buf + 64); \ + M9 = sph_dec64be_aligned(buf + 72); \ + MA = sph_dec64be_aligned(buf + 80); \ + MB = sph_dec64be_aligned(buf + 88); \ + MC = sph_dec64be_aligned(buf + 96); \ + MD = sph_dec64be_aligned(buf + 104); \ + ME = sph_dec64be_aligned(buf + 112); \ + MF = sph_dec64be_aligned(buf + 120); \ + ROUND_B(0); \ + ROUND_B(1); \ + ROUND_B(2); \ + ROUND_B(3); \ + ROUND_B(4); \ + ROUND_B(5); \ + ROUND_B(6); \ + ROUND_B(7); \ + ROUND_B(8); \ + ROUND_B(9); \ + ROUND_B(0); \ + ROUND_B(1); \ + ROUND_B(2); \ + ROUND_B(3); \ + ROUND_B(4); \ + ROUND_B(5); \ + H0 ^= S0 ^ V0 ^ V8; \ + H1 ^= S1 ^ V1 ^ V9; \ + H2 ^= S2 ^ V2 ^ VA; \ + H3 ^= S3 ^ V3 ^ VB; \ + H4 ^= S0 ^ V4 ^ VC; \ + H5 ^= S1 ^ V5 ^ VD; \ + H6 ^= S2 ^ V6 ^ VE; \ + H7 ^= S3 ^ V7 ^ VF; \ + } while (0) + +#endif + +#endif + +static const sph_u32 salt_zero_small[4] = { 0, 0, 0, 0 }; + +static void +blake32_init(sph_blake_small_context *sc, + const sph_u32 *iv, const sph_u32 *salt) +{ + memcpy(sc->H, iv, 8 * sizeof(sph_u32)); + memcpy(sc->S, salt, 4 * sizeof(sph_u32)); + sc->T0 = sc->T1 = 0; + sc->ptr = 0; +} + +static void +blake32(sph_blake_small_context *sc, const void *data, size_t len) +{ + unsigned char *buf; + size_t ptr; + DECL_STATE32 + + buf = sc->buf; + ptr = sc->ptr; + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE32(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data = (const unsigned char *)data + clen; + len -= clen; + if (ptr == sizeof sc->buf) { + if ((T0 = SPH_T32(T0 + 512)) < 512) + T1 = SPH_T32(T1 + 1); + COMPRESS32; + ptr = 0; + } + } + WRITE_STATE32(sc); + sc->ptr = ptr; +} + +static void +blake32_close(sph_blake_small_context *sc, + unsigned ub, unsigned n, void *dst, size_t out_size_w32) +{ + union { + unsigned char buf[64]; + sph_u32 dummy; + } u; + size_t ptr, k; + unsigned bit_len; + unsigned z; + sph_u32 th, tl; + unsigned char *out; + + ptr = sc->ptr; + bit_len = ((unsigned)ptr << 3) + n; + z = 0x80 >> n; + u.buf[ptr] = ((ub & -z) | z) & 0xFF; + tl = sc->T0 + bit_len; + th = sc->T1; + if (ptr == 0 && n == 0) { + sc->T0 = SPH_C32(0xFFFFFE00); + sc->T1 = SPH_C32(0xFFFFFFFF); + } else if (sc->T0 == 0) { + sc->T0 = SPH_C32(0xFFFFFE00) + bit_len; + sc->T1 = SPH_T32(sc->T1 - 1); + } else { + sc->T0 -= 512 - bit_len; + } + if (bit_len <= 446) { + memset(u.buf + ptr + 1, 0, 55 - ptr); + if (out_size_w32 == 8) + u.buf[55] |= 1; + sph_enc32be_aligned(u.buf + 56, th); + sph_enc32be_aligned(u.buf + 60, tl); + blake32(sc, u.buf + ptr, 64 - ptr); + } else { + memset(u.buf + ptr + 1, 0, 63 - ptr); + blake32(sc, u.buf + ptr, 64 - ptr); + sc->T0 = SPH_C32(0xFFFFFE00); + sc->T1 = SPH_C32(0xFFFFFFFF); + memset(u.buf, 0, 56); + if (out_size_w32 == 8) + u.buf[55] = 1; + sph_enc32be_aligned(u.buf + 56, th); + sph_enc32be_aligned(u.buf + 60, tl); + blake32(sc, u.buf, 64); + } + out = dst; + for (k = 0; k < out_size_w32; k ++) + sph_enc32be(out + (k << 2), sc->H[k]); +} + +#if SPH_64 + +static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 }; + +static void +blake64_init(sph_blake_big_context *sc, + const sph_u64 *iv, const sph_u64 *salt) +{ + memcpy(sc->H, iv, 8 * sizeof(sph_u64)); + memcpy(sc->S, salt, 4 * sizeof(sph_u64)); + sc->T0 = sc->T1 = 0; + sc->ptr = 0; +} + +static void +blake64(sph_blake_big_context *sc, const void *data, size_t len) +{ + unsigned char *buf; + size_t ptr; + DECL_STATE64 + + buf = sc->buf; + ptr = sc->ptr; + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE64(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data = (const unsigned char *)data + clen; + len -= clen; + if (ptr == sizeof sc->buf) { + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + T1 = SPH_T64(T1 + 1); + COMPRESS64; + ptr = 0; + } + } + WRITE_STATE64(sc); + sc->ptr = ptr; +} + +static void +blake64_close(sph_blake_big_context *sc, + unsigned ub, unsigned n, void *dst, size_t out_size_w64) +{ + union { + unsigned char buf[128]; + sph_u64 dummy; + } u; + size_t ptr, k; + unsigned bit_len; + unsigned z; + sph_u64 th, tl; + unsigned char *out; + + ptr = sc->ptr; + bit_len = ((unsigned)ptr << 3) + n; + z = 0x80 >> n; + u.buf[ptr] = ((ub & -z) | z) & 0xFF; + tl = sc->T0 + bit_len; + th = sc->T1; + if (ptr == 0 && n == 0) { + sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00); + sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF); + } else if (sc->T0 == 0) { + sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + bit_len; + sc->T1 = SPH_T64(sc->T1 - 1); + } else { + sc->T0 -= 1024 - bit_len; + } + if (bit_len <= 894) { + memset(u.buf + ptr + 1, 0, 111 - ptr); + if (out_size_w64 == 8) + u.buf[111] |= 1; + sph_enc64be_aligned(u.buf + 112, th); + sph_enc64be_aligned(u.buf + 120, tl); + blake64(sc, u.buf + ptr, 128 - ptr); + } else { + memset(u.buf + ptr + 1, 0, 127 - ptr); + blake64(sc, u.buf + ptr, 128 - ptr); + sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00); + sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF); + memset(u.buf, 0, 112); + if (out_size_w64 == 8) + u.buf[111] = 1; + sph_enc64be_aligned(u.buf + 112, th); + sph_enc64be_aligned(u.buf + 120, tl); + blake64(sc, u.buf, 128); + } + out = dst; + for (k = 0; k < out_size_w64; k ++) + sph_enc64be(out + (k << 3), sc->H[k]); +} + +#endif + +/* see sph_blake.h */ +void +sph_blake224_init(void *cc) +{ + blake32_init(cc, IV224, salt_zero_small); +} + +/* see sph_blake.h */ +void +sph_blake224(void *cc, const void *data, size_t len) +{ + blake32(cc, data, len); +} + +/* see sph_blake.h */ +void +sph_blake224_close(void *cc, void *dst) +{ + sph_blake224_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_blake.h */ +void +sph_blake224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + blake32_close(cc, ub, n, dst, 7); + sph_blake224_init(cc); +} + +/* see sph_blake.h */ +void +sph_blake256_init(void *cc) +{ + blake32_init(cc, IV256, salt_zero_small); +} + +/* see sph_blake.h */ +void +sph_blake256(void *cc, const void *data, size_t len) +{ + blake32(cc, data, len); +} + +/* see sph_blake.h */ +void +sph_blake256_close(void *cc, void *dst) +{ + sph_blake256_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_blake.h */ +void +sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + blake32_close(cc, ub, n, dst, 8); + sph_blake256_init(cc); +} + +#if SPH_64 + +/* see sph_blake.h */ +void +sph_blake384_init(void *cc) +{ + blake64_init(cc, IV384, salt_zero_big); +} + +/* see sph_blake.h */ +void +sph_blake384(void *cc, const void *data, size_t len) +{ + blake64(cc, data, len); +} + +/* see sph_blake.h */ +void +sph_blake384_close(void *cc, void *dst) +{ + sph_blake384_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_blake.h */ +void +sph_blake384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + blake64_close(cc, ub, n, dst, 6); + sph_blake384_init(cc); +} + +/* see sph_blake.h */ +void +sph_blake512_init(void *cc) +{ + blake64_init(cc, IV512, salt_zero_big); +} + +/* see sph_blake.h */ +void +sph_blake512(void *cc, const void *data, size_t len) +{ + blake64(cc, data, len); +} + +/* see sph_blake.h */ +void +sph_blake512_close(void *cc, void *dst) +{ + sph_blake512_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_blake.h */ +void +sph_blake512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + blake64_close(cc, ub, n, dst, 8); + sph_blake512_init(cc); +} + +#endif + +#ifdef __cplusplus +} +#endif diff --git a/ccminer.sln b/ccminer.sln new file mode 100644 index 0000000..a28278f --- /dev/null +++ b/ccminer.sln @@ -0,0 +1,26 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ccminer", "ccminer.vcxproj", "{36DC07F9-A4A6-4877-A146-1B960083CF6F}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Debug|Win32.ActiveCfg = Debug|Win32 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Debug|Win32.Build.0 = Debug|Win32 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Debug|x64.ActiveCfg = Debug|x64 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Debug|x64.Build.0 = Debug|x64 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Release|Win32.ActiveCfg = Release|Win32 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Release|Win32.Build.0 = Release|Win32 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Release|x64.ActiveCfg = Release|x64 + {36DC07F9-A4A6-4877-A146-1B960083CF6F}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/ccminer.vcxproj b/ccminer.vcxproj new file mode 100644 index 0000000..b721b6c --- /dev/null +++ b/ccminer.vcxproj @@ -0,0 +1,291 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {36DC07F9-A4A6-4877-A146-1B960083CF6F} + ccminer + + + + Application + true + MultiByte + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + true + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + .;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + + + true + Console + cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MTd.lib;ssleay32MTd.lib;%(AdditionalDependencies) + ..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Debug;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + true + + + 128 + + + true + true + compute_35,sm_35 + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + .;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0_x64\include;..\OpenSSL-Win64\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + + + true + Console + cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MTd.lib;ssleay32MTd.lib;%(AdditionalDependencies) + ..\pthreads\Pre-built.2\lib\x64;..\curl-7.29.0_x64\build\lib\Debug;..\OpenSSL-Win64\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + true + + + 128 + + + true + true + compute_35,sm_35 + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + + Level3 + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + .;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + + + true + true + true + Console + cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MT.lib;ssleay32MT.lib;%(AdditionalDependencies) + ..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Release;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + true + + + 128 + + + true + true + compute_35,sm_35 + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + + Level3 + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + .;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0_x64\include;..\OpenSSL-Win64\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + + + true + true + true + Console + cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MT.lib;ssleay32MT.lib;%(AdditionalDependencies) + ..\pthreads\Pre-built.2\lib\x64;..\curl-7.29.0_x64\build\lib\Release;..\OpenSSL-Win64\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + true + + + 128 + + + true + true + compute_35,sm_35 + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + + + + + + + + + + + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + + + + + + + + + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + + + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + /TP %(AdditionalOptions) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + compute_10,sm_10 + compute_10,sm_10 + compute_10,sm_10 + compute_10,sm_10 + + + + + + + + + + \ No newline at end of file diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters new file mode 100644 index 0000000..f3b22ce --- /dev/null +++ b/ccminer.vcxproj.filters @@ -0,0 +1,188 @@ + + + + + {2450a9c7-a97a-49e1-ba19-c8dbc5a4e3e7} + + + {c53ce808-c5c5-4c6c-99a2-3947090c62f1} + + + {5a45c1bf-81d2-4bc6-97b5-714e34f51a82} + + + {431cec61-9376-4de9-aae9-04c4250652e7} + + + {cc8bb259-5332-4a45-ba81-f4840a55b604} + + + {89362bd8-4690-4f0c-a4f7-6b2fa67a1f34} + + + {6c3cd392-b6b8-424c-87d2-10e33dbd4b41} + + + {5a31b6f4-4943-4b22-b69a-230f3cc96269} + + + {a0f072d0-a831-4c23-8d64-7a026521df9c} + + + {fe39ded0-754b-415f-a284-038a15a0aa55} + + + {17b56151-79ec-4a32-bac3-9d94ae7f68fe} + + + + + Source Files\CUDA\jansson + + + Source Files\CUDA\jansson + + + Source Files\CUDA\jansson + + + Source Files\CUDA\jansson + + + Source Files\CUDA\jansson + + + Source Files\CUDA\jansson + + + Source Files\getopt + + + Source Files\gettimeofday + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files\compat\sys + + + Header Files\compat + + + Header Files\compat + + + Header Files\compat\getopt + + + Header Files\compat + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\CUDA + + + Header Files\CUDA + + + Header Files\CUDA + + + Header Files\CUDA + + + Header Files\CUDA + + + Header Files\CUDA + + + Header Files + + + Header Files + + + + + Source Files\CUDA + + + Source Files\CUDA + + + Source Files\CUDA + + + Source Files\CUDA + + + Source Files\CUDA + + + Source Files\CUDA + + + Source Files\CUDA + + + Source Files\CUDA + + + \ No newline at end of file diff --git a/compat.h b/compat.h new file mode 100644 index 0000000..ac7b8b9 --- /dev/null +++ b/compat.h @@ -0,0 +1,24 @@ +#ifndef __COMPAT_H__ +#define __COMPAT_H__ + +#ifdef WIN32 + +#include + +static __inline void sleep(int secs) +{ + Sleep(secs * 1000); +} + +enum { + PRIO_PROCESS = 0, +}; + +static __inline int setpriority(int which, int who, int prio) +{ + return -!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE /*THREAD_PRIORITY_TIME_CRITICAL*/); +} + +#endif /* WIN32 */ + +#endif /* __COMPAT_H__ */ diff --git a/compat/Makefile.am b/compat/Makefile.am new file mode 100644 index 0000000..9401c8e --- /dev/null +++ b/compat/Makefile.am @@ -0,0 +1,7 @@ + +if WANT_JANSSON +SUBDIRS = jansson +else +SUBDIRS = +endif + diff --git a/compat/Makefile.in b/compat/Makefile.in new file mode 100644 index 0000000..d1d76d9 --- /dev/null +++ b/compat/Makefile.in @@ -0,0 +1,601 @@ +# Makefile.in generated by automake 1.13.3 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = compat +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/cpuminer-config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = jansson +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CUDA_CFLAGS = @CUDA_CFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CUDA_LIBS = @CUDA_LIBS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JANSSON_LIBS = @JANSSON_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBCURL = @LIBCURL@ +LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NVCC = @NVCC@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PTHREAD_FLAGS = @PTHREAD_FLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +WS2_LIBS = @WS2_LIBS@ +_libcurl_config = @_libcurl_config@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +@WANT_JANSSON_FALSE@SUBDIRS = +@WANT_JANSSON_TRUE@SUBDIRS = jansson +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu compat/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu compat/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic cscopelist-am ctags ctags-am \ + distclean distclean-generic distclean-tags distdir dvi dvi-am \ + html html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ + pdf-am ps ps-am tags tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/compat/getopt/getopt.h b/compat/getopt/getopt.h new file mode 100644 index 0000000..068cc24 --- /dev/null +++ b/compat/getopt/getopt.h @@ -0,0 +1,93 @@ +/* $Id: getopt.h,v 1.1 2009/10/16 19:50:28 rodney Exp rodney $ */ +/* $OpenBSD: getopt.h,v 1.1 2002/12/03 20:24:29 millert Exp $ */ +/* $NetBSD: getopt.h,v 1.4 2000/07/07 10:43:54 ad Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GETOPT_H_ +#define _GETOPT_H_ + +#if 0 +#include +#endif + +/* + * GNU-like getopt_long() and 4.4BSD getsubopt()/optreset extensions + */ +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +struct option { + /* name of long option */ + const char *name; + /* + * one of no_argument, required_argument, and optional_argument: + * whether option takes an argument + */ + int has_arg; + /* if not NULL, set *flag to val when option found */ + int *flag; + /* if flag not NULL, value to set *flag to; else return value */ + int val; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +int getopt_long(int, char * const *, const char *, + const struct option *, int *); +int getopt_long_only(int, char * const *, const char *, + const struct option *, int *); +#ifndef _GETOPT_DEFINED +#define _GETOPT_DEFINED +int getopt(int, char * const *, const char *); +int getsubopt(char **, char * const *, char **); + +extern char *optarg; /* getopt(3) external variables */ +extern int opterr; +extern int optind; +extern int optopt; +extern int optreset; +extern char *suboptarg; /* getsubopt(3) external variable */ +#endif /* _GETOPT_DEFINED */ + +#ifdef __cplusplus +} +#endif +#endif /* !_GETOPT_H_ */ diff --git a/compat/getopt/getopt_long.c b/compat/getopt/getopt_long.c new file mode 100644 index 0000000..90fc0cf --- /dev/null +++ b/compat/getopt/getopt_long.c @@ -0,0 +1,554 @@ +/* $Id: getopt_long.c,v 1.1 2009/10/16 19:50:28 rodney Exp rodney $ */ +/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */ +/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */ + +/* + * Copyright (c) 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#ifndef lint +static const char rcsid[]="$Id: getopt_long.c,v 1.1 2009/10/16 19:50:28 rodney Exp rodney $"; +#endif /* lint */ +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if 0 +#include +#endif +#include +#include +#include +#include + +#ifdef _WIN32 + +/* Windows needs warnx(). We change the definition though: + * 1. (another) global is defined, opterrmsg, which holds the error message + * 2. errors are always printed out on stderr w/o the program name + * Note that opterrmsg always gets set no matter what opterr is set to. The + * error message will not be printed if opterr is 0 as usual. + */ + +#include +#include +#include + +char opterrmsg[128]; /* last error message is stored here */ + +static void warnx(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + if (fmt != NULL) + _vsnprintf(opterrmsg, 128, fmt, ap); + else + opterrmsg[0]='\0'; + va_end(ap); + fprintf(stderr, opterrmsg); + fprintf(stderr, "\n"); +} + +#endif /*_WIN32*/ + +#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ + +#ifdef REPLACE_GETOPT +int opterr = 1; /* if error message should be printed */ +int optind = 1; /* index into parent argv vector */ +int optopt = '?'; /* character checked for validity */ +int optreset; /* reset getopt */ +char *optarg; /* argument associated with option */ +#endif + +#define PRINT_ERROR ((opterr) && (*options != ':')) + +#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ +#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ +#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ + +/* return values */ +#define BADCH (int)'?' +#define BADARG ((*options == ':') ? (int)':' : (int)'?') +#define INORDER (int)1 + +#define EMSG "" + +static int getopt_internal(int, char * const *, const char *, + const struct option *, int *, int); +static int parse_long_options(char * const *, const char *, + const struct option *, int *, int); +static int gcd(int, int); +static void permute_args(int, int, int, char * const *); + +static char *place = EMSG; /* option letter processing */ + +/* XXX: set optreset to 1 rather than these two */ +static int nonopt_start = -1; /* first non option argument (for permute) */ +static int nonopt_end = -1; /* first option after non options (for permute) */ + +/* Error messages */ +static const char recargchar[] = "option requires an argument -- %c"; +static const char recargstring[] = "option requires an argument -- %s"; +static const char ambig[] = "ambiguous option -- %.*s"; +static const char noarg[] = "option doesn't take an argument -- %.*s"; +static const char illoptchar[] = "unknown option -- %c"; +static const char illoptstring[] = "unknown option -- %s"; + +/* + * Compute the greatest common divisor of a and b. + */ +static int +gcd(int a, int b) +{ + int c; + + c = a % b; + while (c != 0) { + a = b; + b = c; + c = a % b; + } + + return (b); +} + +/* + * Exchange the block from nonopt_start to nonopt_end with the block + * from nonopt_end to opt_end (keeping the same order of arguments + * in each block). + */ +static void +permute_args(int panonopt_start, int panonopt_end, int opt_end, + char * const *nargv) +{ + int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; + char *swap; + + /* + * compute lengths of blocks and number and size of cycles + */ + nnonopts = panonopt_end - panonopt_start; + nopts = opt_end - panonopt_end; + ncycle = gcd(nnonopts, nopts); + cyclelen = (opt_end - panonopt_start) / ncycle; + + for (i = 0; i < ncycle; i++) { + cstart = panonopt_end+i; + pos = cstart; + for (j = 0; j < cyclelen; j++) { + if (pos >= panonopt_end) + pos -= nnonopts; + else + pos += nopts; + swap = nargv[pos]; + /* LINTED const cast */ + ((char **) nargv)[pos] = nargv[cstart]; + /* LINTED const cast */ + ((char **)nargv)[cstart] = swap; + } + } +} + +/* + * parse_long_options -- + * Parse long options in argc/argv argument vector. + * Returns -1 if short_too is set and the option does not match long_options. + */ +static int +parse_long_options(char * const *nargv, const char *options, + const struct option *long_options, int *idx, int short_too) +{ + char *current_argv, *has_equal; + size_t current_argv_len; + int i, match; + + current_argv = place; + match = -1; + + optind++; + + if ((has_equal = strchr(current_argv, '=')) != NULL) { + /* argument found (--option=arg) */ + current_argv_len = has_equal - current_argv; + has_equal++; + } else + current_argv_len = strlen(current_argv); + + for (i = 0; long_options[i].name; i++) { + /* find matching long option */ + if (strncmp(current_argv, long_options[i].name, + current_argv_len)) + continue; + + if (strlen(long_options[i].name) == current_argv_len) { + /* exact match */ + match = i; + break; + } + /* + * If this is a known short option, don't allow + * a partial match of a single character. + */ + if (short_too && current_argv_len == 1) + continue; + + if (match == -1) /* partial match */ + match = i; + else { + /* ambiguous abbreviation */ + if (PRINT_ERROR) + warnx(ambig, (int)current_argv_len, + current_argv); + optopt = 0; + return (BADCH); + } + } + if (match != -1) { /* option found */ + if (long_options[match].has_arg == no_argument + && has_equal) { + if (PRINT_ERROR) + warnx(noarg, (int)current_argv_len, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + return (BADARG); + } + if (long_options[match].has_arg == required_argument || + long_options[match].has_arg == optional_argument) { + if (has_equal) + optarg = has_equal; + else if (long_options[match].has_arg == + required_argument) { + /* + * optional argument doesn't use next nargv + */ + optarg = nargv[optind++]; + } + } + if ((long_options[match].has_arg == required_argument) + && (optarg == NULL)) { + /* + * Missing argument; leading ':' indicates no error + * should be generated. + */ + if (PRINT_ERROR) + warnx(recargstring, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + --optind; + return (BADARG); + } + } else { /* unknown option */ + if (short_too) { + --optind; + return (-1); + } + if (PRINT_ERROR) + warnx(illoptstring, current_argv); + optopt = 0; + return (BADCH); + } + if (idx) + *idx = match; + if (long_options[match].flag) { + *long_options[match].flag = long_options[match].val; + return (0); + } else + return (long_options[match].val); +} + +/* + * getopt_internal -- + * Parse argc/argv argument vector. Called by user level routines. + */ +static int +getopt_internal(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx, int flags) +{ + char *oli; /* option letter list index */ + int optchar, short_too; + static int posixly_correct = -1; + + if (options == NULL) + return (-1); + + /* + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + */ + if (posixly_correct == -1) + posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); + if (posixly_correct || *options == '+') + flags &= ~FLAG_PERMUTE; + else if (*options == '-') + flags |= FLAG_ALLARGS; + if (*options == '+' || *options == '-') + options++; + + /* + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ + if (optind == 0) + optind = optreset = 1; + + optarg = NULL; + if (optreset) + nonopt_start = nonopt_end = -1; +start: + if (optreset || !*place) { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc) { /* end of argument vector */ + place = EMSG; + if (nonopt_end != -1) { + /* do permutation, if we have to */ + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + else if (nonopt_start != -1) { + /* + * If we skipped non-options, set optind + * to the first of them. + */ + optind = nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + if (*(place = nargv[optind]) != '-' || + (place[1] == '\0' && strchr(options, '-') == NULL)) { + place = EMSG; /* found non-option */ + if (flags & FLAG_ALLARGS) { + /* + * GNU extension: + * return non-option as argument to option 1 + */ + optarg = nargv[optind++]; + return (INORDER); + } + if (!(flags & FLAG_PERMUTE)) { + /* + * If no permutation wanted, stop parsing + * at first non-option. + */ + return (-1); + } + /* do permutation */ + if (nonopt_start == -1) + nonopt_start = optind; + else if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + nonopt_start = optind - + (nonopt_end - nonopt_start); + nonopt_end = -1; + } + optind++; + /* process next argument */ + goto start; + } + if (nonopt_start != -1 && nonopt_end == -1) + nonopt_end = optind; + + /* + * If we have "-" do nothing, if "--" we are done. + */ + if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { + optind++; + place = EMSG; + /* + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ + if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + } + + /* + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ + if (long_options != NULL && place != nargv[optind] && + (*place == '-' || (flags & FLAG_LONGONLY))) { + short_too = 0; + if (*place == '-') + place++; /* --foo long option */ + else if (*place != ':' && strchr(options, *place) != NULL) + short_too = 1; /* could be short option too */ + + optchar = parse_long_options(nargv, options, long_options, + idx, short_too); + if (optchar != -1) { + place = EMSG; + return (optchar); + } + } + + if ((optchar = (int)*place++) == (int)':' || + (optchar == (int)'-' && *place != '\0') || + (oli = strchr(options, optchar)) == NULL) { + /* + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ + if (optchar == (int)'-' && *place == '\0') + return (-1); + if (!*place) + ++optind; + if (PRINT_ERROR) + warnx(illoptchar, optchar); + optopt = optchar; + return (BADCH); + } + if (long_options != NULL && optchar == 'W' && oli[1] == ';') { + /* -W long-option */ + if (*place) /* no space */ + /* NOTHING */; + else if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else /* white space */ + place = nargv[optind]; + optchar = parse_long_options(nargv, options, long_options, + idx, 0); + place = EMSG; + return (optchar); + } + if (*++oli != ':') { /* doesn't take argument */ + if (!*place) + ++optind; + } else { /* takes (optional) argument */ + optarg = NULL; + if (*place) /* no white space */ + optarg = place; + else if (oli[1] != ':') { /* arg not optional */ + if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else + optarg = nargv[optind]; + } + place = EMSG; + ++optind; + } + /* dump back option letter */ + return (optchar); +} + +#ifdef REPLACE_GETOPT +/* + * getopt -- + * Parse argc/argv argument vector. + * + * [eventually this will replace the BSD getopt] + */ +int +getopt(int nargc, char * const *nargv, const char *options) +{ + + /* + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ + return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); +} +#endif /* REPLACE_GETOPT */ + +/* + * getopt_long -- + * Parse argc/argv argument vector. + */ +int +getopt_long(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE)); +} + +/* + * getopt_long_only -- + * Parse argc/argv argument vector. + */ +int +getopt_long_only(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE|FLAG_LONGONLY)); +} diff --git a/compat/gettimeofday.c b/compat/gettimeofday.c new file mode 100644 index 0000000..da17893 --- /dev/null +++ b/compat/gettimeofday.c @@ -0,0 +1,83 @@ +#include < time.h > +#include //I've ommited this line. +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +struct timezone +{ + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + FILETIME ft; + unsigned __int64 tmpres = 0; + static int tzflag; + + if (NULL != tv) + { + GetSystemTimeAsFileTime(&ft); + + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + + /*converting file time to unix epoch*/ + tmpres /= 10; /*convert into microseconds*/ + tmpres -= DELTA_EPOCH_IN_MICROSECS; + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + + if (NULL != tz) + { + if (!tzflag) + { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; + } + + return 0; +} + +void usleep(__int64 waitTime) +{ + if (waitTime > 0) + { + if (waitTime > 100) + { + // use a waitable timer for larger intervals > 0.1ms + + HANDLE timer; + LARGE_INTEGER ft; + + ft.QuadPart = -(10*waitTime); // Convert to 100 nanosecond interval, negative value indicates relative time + + timer = CreateWaitableTimer(NULL, TRUE, NULL); + SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0); + WaitForSingleObject(timer, INFINITE); + CloseHandle(timer); + } + else + { + // use a polling loop for short intervals <= 100ms + + LARGE_INTEGER perfCnt, start, now; + __int64 elapsed; + + QueryPerformanceFrequency(&perfCnt); + QueryPerformanceCounter(&start); + do { + QueryPerformanceCounter((LARGE_INTEGER*) &now); + elapsed = (__int64)((now.QuadPart - start.QuadPart) / (float)perfCnt.QuadPart * 1000 * 1000); + } while ( elapsed < waitTime ); + } + } +} diff --git a/compat/inttypes.h b/compat/inttypes.h new file mode 100644 index 0000000..f07d50f --- /dev/null +++ b/compat/inttypes.h @@ -0,0 +1,2 @@ +#pragma once +#include diff --git a/compat/jansson/Makefile.am b/compat/jansson/Makefile.am new file mode 100644 index 0000000..ff38e51 --- /dev/null +++ b/compat/jansson/Makefile.am @@ -0,0 +1,18 @@ + +noinst_LIBRARIES = libjansson.a + +libjansson_a_SOURCES = \ + config.h \ + dump.c \ + hashtable.c \ + hashtable.h \ + jansson.h \ + jansson_private.h \ + load.c \ + strbuffer.c \ + strbuffer.h \ + utf.c \ + utf.h \ + util.h \ + value.c + diff --git a/compat/jansson/Makefile.in b/compat/jansson/Makefile.in new file mode 100644 index 0000000..f5e30ac --- /dev/null +++ b/compat/jansson/Makefile.in @@ -0,0 +1,571 @@ +# Makefile.in generated by automake 1.13.3 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = compat/jansson +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/cpuminer-config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LIBRARIES = $(noinst_LIBRARIES) +AR = ar +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +libjansson_a_AR = $(AR) $(ARFLAGS) +libjansson_a_LIBADD = +am_libjansson_a_OBJECTS = dump.$(OBJEXT) hashtable.$(OBJEXT) \ + load.$(OBJEXT) strbuffer.$(OBJEXT) utf.$(OBJEXT) \ + value.$(OBJEXT) +libjansson_a_OBJECTS = $(am_libjansson_a_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libjansson_a_SOURCES) +DIST_SOURCES = $(libjansson_a_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CUDA_CFLAGS = @CUDA_CFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CUDA_LIBS = @CUDA_LIBS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JANSSON_LIBS = @JANSSON_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBCURL = @LIBCURL@ +LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NVCC = @NVCC@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PTHREAD_FLAGS = @PTHREAD_FLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +WS2_LIBS = @WS2_LIBS@ +_libcurl_config = @_libcurl_config@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +noinst_LIBRARIES = libjansson.a +libjansson_a_SOURCES = \ + config.h \ + dump.c \ + hashtable.c \ + hashtable.h \ + jansson.h \ + jansson_private.h \ + load.c \ + strbuffer.c \ + strbuffer.h \ + utf.c \ + utf.h \ + util.h \ + value.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu compat/jansson/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu compat/jansson/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +libjansson.a: $(libjansson_a_OBJECTS) $(libjansson_a_DEPENDENCIES) $(EXTRA_libjansson_a_DEPENDENCIES) + $(AM_V_at)-rm -f libjansson.a + $(AM_V_AR)$(libjansson_a_AR) libjansson.a $(libjansson_a_OBJECTS) $(libjansson_a_LIBADD) + $(AM_V_at)$(RANLIB) libjansson.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dump.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashtable.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/value.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-noinstLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/compat/jansson/config.h b/compat/jansson/config.h new file mode 100644 index 0000000..f11075a --- /dev/null +++ b/compat/jansson/config.h @@ -0,0 +1,73 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "jansson" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "petri@digip.org" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "jansson" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "jansson 1.3" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "jansson" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.3" + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "1.3" + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +/* Define to the type of a signed integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +/* #undef int32_t */ diff --git a/compat/jansson/dump.c b/compat/jansson/dump.c new file mode 100644 index 0000000..a8c9cc6 --- /dev/null +++ b/compat/jansson/dump.c @@ -0,0 +1,460 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include "jansson_private.h" +#include "strbuffer.h" +#include "utf.h" + +#define MAX_INTEGER_STR_LENGTH 100 +#define MAX_REAL_STR_LENGTH 100 + +typedef int (*dump_func)(const char *buffer, int size, void *data); + +struct string +{ + char *buffer; + int length; + int size; +}; + +static int dump_to_strbuffer(const char *buffer, int size, void *data) +{ + return strbuffer_append_bytes((strbuffer_t *)data, buffer, size); +} + +static int dump_to_file(const char *buffer, int size, void *data) +{ + FILE *dest = (FILE *)data; + if(fwrite(buffer, size, 1, dest) != 1) + return -1; + return 0; +} + +/* 256 spaces (the maximum indentation size) */ +static char whitespace[] = " "; + +static int dump_indent(unsigned long flags, int depth, int space, dump_func dump, void *data) +{ + if(JSON_INDENT(flags) > 0) + { + int i, ws_count = JSON_INDENT(flags); + + if(dump("\n", 1, data)) + return -1; + + for(i = 0; i < depth; i++) + { + if(dump(whitespace, ws_count, data)) + return -1; + } + } + else if(space && !(flags & JSON_COMPACT)) + { + return dump(" ", 1, data); + } + return 0; +} + +static int dump_string(const char *str, int ascii, dump_func dump, void *data) +{ + const char *pos, *end; + int32_t codepoint; + + if(dump("\"", 1, data)) + return -1; + + end = pos = str; + while(1) + { + const char *text; + char seq[13]; + int length; + + while(*end) + { + end = utf8_iterate(pos, &codepoint); + if(!end) + return -1; + + /* mandatory escape or control char */ + if(codepoint == '\\' || codepoint == '"' || codepoint < 0x20) + break; + + /* non-ASCII */ + if(ascii && codepoint > 0x7F) + break; + + pos = end; + } + + if(pos != str) { + if(dump(str, pos - str, data)) + return -1; + } + + if(end == pos) + break; + + /* handle \, ", and control codes */ + length = 2; + switch(codepoint) + { + case '\\': text = "\\\\"; break; + case '\"': text = "\\\""; break; + case '\b': text = "\\b"; break; + case '\f': text = "\\f"; break; + case '\n': text = "\\n"; break; + case '\r': text = "\\r"; break; + case '\t': text = "\\t"; break; + default: + { + /* codepoint is in BMP */ + if(codepoint < 0x10000) + { + sprintf(seq, "\\u%04x", codepoint); + length = 6; + } + + /* not in BMP -> construct a UTF-16 surrogate pair */ + else + { + int32_t first, last; + + codepoint -= 0x10000; + first = 0xD800 | ((codepoint & 0xffc00) >> 10); + last = 0xDC00 | (codepoint & 0x003ff); + + sprintf(seq, "\\u%04x\\u%04x", first, last); + length = 12; + } + + text = seq; + break; + } + } + + if(dump(text, length, data)) + return -1; + + str = pos = end; + } + + return dump("\"", 1, data); +} + +static int object_key_compare_keys(const void *key1, const void *key2) +{ + return strcmp((*(const object_key_t **)key1)->key, + (*(const object_key_t **)key2)->key); +} + +static int object_key_compare_serials(const void *key1, const void *key2) +{ + return (*(const object_key_t **)key1)->serial - + (*(const object_key_t **)key2)->serial; +} + +static int do_dump(const json_t *json, unsigned long flags, int depth, + dump_func dump, void *data) +{ + int ascii = flags & JSON_ENSURE_ASCII ? 1 : 0; + + switch(json_typeof(json)) { + case JSON_NULL: + return dump("null", 4, data); + + case JSON_TRUE: + return dump("true", 4, data); + + case JSON_FALSE: + return dump("false", 5, data); + + case JSON_INTEGER: + { + char buffer[MAX_INTEGER_STR_LENGTH]; + int size; + + size = snprintf(buffer, MAX_INTEGER_STR_LENGTH, "%d", json_integer_value(json)); + if(size >= MAX_INTEGER_STR_LENGTH) + return -1; + + return dump(buffer, size, data); + } + + case JSON_REAL: + { + char buffer[MAX_REAL_STR_LENGTH]; + int size; + + size = snprintf(buffer, MAX_REAL_STR_LENGTH, "%.17g", + json_real_value(json)); + if(size >= MAX_REAL_STR_LENGTH) + return -1; + + /* Make sure there's a dot or 'e' in the output. Otherwise + a real is converted to an integer when decoding */ + if(strchr(buffer, '.') == NULL && + strchr(buffer, 'e') == NULL) + { + if(size + 2 >= MAX_REAL_STR_LENGTH) { + /* No space to append ".0" */ + return -1; + } + buffer[size] = '.'; + buffer[size + 1] = '0'; + size += 2; + } + + return dump(buffer, size, data); + } + + case JSON_STRING: + return dump_string(json_string_value(json), ascii, dump, data); + + case JSON_ARRAY: + { + int i; + int n; + json_array_t *array; + + /* detect circular references */ + array = json_to_array(json); + if(array->visited) + goto array_error; + array->visited = 1; + + n = json_array_size(json); + + if(dump("[", 1, data)) + goto array_error; + if(n == 0) { + array->visited = 0; + return dump("]", 1, data); + } + if(dump_indent(flags, depth + 1, 0, dump, data)) + goto array_error; + + for(i = 0; i < n; ++i) { + if(do_dump(json_array_get(json, i), flags, depth + 1, + dump, data)) + goto array_error; + + if(i < n - 1) + { + if(dump(",", 1, data) || + dump_indent(flags, depth + 1, 1, dump, data)) + goto array_error; + } + else + { + if(dump_indent(flags, depth, 0, dump, data)) + goto array_error; + } + } + + array->visited = 0; + return dump("]", 1, data); + + array_error: + array->visited = 0; + return -1; + } + + case JSON_OBJECT: + { + json_object_t *object; + void *iter; + const char *separator; + int separator_length; + + if(flags & JSON_COMPACT) { + separator = ":"; + separator_length = 1; + } + else { + separator = ": "; + separator_length = 2; + } + + /* detect circular references */ + object = json_to_object(json); + if(object->visited) + goto object_error; + object->visited = 1; + + iter = json_object_iter((json_t *)json); + + if(dump("{", 1, data)) + goto object_error; + if(!iter) { + object->visited = 0; + return dump("}", 1, data); + } + if(dump_indent(flags, depth + 1, 0, dump, data)) + goto object_error; + + if(flags & JSON_SORT_KEYS || flags & JSON_PRESERVE_ORDER) + { + const object_key_t **keys; + unsigned int size; + unsigned int i; + int (*cmp_func)(const void *, const void *); + + size = json_object_size(json); + keys = malloc(size * sizeof(object_key_t *)); + if(!keys) + goto object_error; + + i = 0; + while(iter) + { + keys[i] = jsonp_object_iter_fullkey(iter); + iter = json_object_iter_next((json_t *)json, iter); + i++; + } + assert(i == size); + + if(flags & JSON_SORT_KEYS) + cmp_func = object_key_compare_keys; + else + cmp_func = object_key_compare_serials; + + qsort((void*)keys, size, sizeof(object_key_t *), cmp_func); + + for(i = 0; i < size; i++) + { + const char *key; + json_t *value; + + key = keys[i]->key; + value = json_object_get(json, key); + assert(value); + + dump_string(key, ascii, dump, data); + if(dump(separator, separator_length, data) || + do_dump(value, flags, depth + 1, dump, data)) + { + free((void*)keys); + goto object_error; + } + + if(i < size - 1) + { + if(dump(",", 1, data) || + dump_indent(flags, depth + 1, 1, dump, data)) + { + free((void*)keys); + goto object_error; + } + } + else + { + if(dump_indent(flags, depth, 0, dump, data)) + { + free((void*)keys); + goto object_error; + } + } + } + + free((void*)keys); + } + else + { + /* Don't sort keys */ + + while(iter) + { + void *next = json_object_iter_next((json_t *)json, iter); + + dump_string(json_object_iter_key(iter), ascii, dump, data); + if(dump(separator, separator_length, data) || + do_dump(json_object_iter_value(iter), flags, depth + 1, + dump, data)) + goto object_error; + + if(next) + { + if(dump(",", 1, data) || + dump_indent(flags, depth + 1, 1, dump, data)) + goto object_error; + } + else + { + if(dump_indent(flags, depth, 0, dump, data)) + goto object_error; + } + + iter = next; + } + } + + object->visited = 0; + return dump("}", 1, data); + + object_error: + object->visited = 0; + return -1; + } + + default: + /* not reached */ + return -1; + } +} + + +char *json_dumps(const json_t *json, unsigned long flags) +{ + strbuffer_t strbuff; + char *result; + + if(!json_is_array(json) && !json_is_object(json)) + return NULL; + + if(strbuffer_init(&strbuff)) + return NULL; + + if(do_dump(json, flags, 0, dump_to_strbuffer, (void *)&strbuff)) { + strbuffer_close(&strbuff); + return NULL; + } + + result = strdup(strbuffer_value(&strbuff)); + strbuffer_close(&strbuff); + + return result; +} + +int json_dumpf(const json_t *json, FILE *output, unsigned long flags) +{ + if(!json_is_array(json) && !json_is_object(json)) + return -1; + + return do_dump(json, flags, 0, dump_to_file, (void *)output); +} + +int json_dump_file(const json_t *json, const char *path, unsigned long flags) +{ + int result; + + FILE *output = fopen(path, "w"); + if(!output) + return -1; + + result = json_dumpf(json, output, flags); + + fclose(output); + return result; +} diff --git a/compat/jansson/hashtable.c b/compat/jansson/hashtable.c new file mode 100644 index 0000000..791f9ac --- /dev/null +++ b/compat/jansson/hashtable.c @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include + +#include +#include "hashtable.h" + +#ifdef WIN32 +#define inline __inline +#endif + +typedef struct hashtable_list list_t; +typedef struct hashtable_pair pair_t; +typedef struct hashtable_bucket bucket_t; + +#define container_of(ptr_, type_, member_) \ + ((type_ *)((char *)ptr_ - (size_t)&((type_ *)0)->member_)) + +#define list_to_pair(list_) container_of(list_, pair_t, list) + +static inline void list_init(list_t *list) +{ + list->next = list; + list->prev = list; +} + +static inline void list_insert(list_t *list, list_t *node) +{ + node->next = list; + node->prev = list->prev; + list->prev->next = node; + list->prev = node; +} + +static inline void list_remove(list_t *list) +{ + list->prev->next = list->next; + list->next->prev = list->prev; +} + +static inline int bucket_is_empty(hashtable_t *hashtable, bucket_t *bucket) +{ + return bucket->first == &hashtable->list && bucket->first == bucket->last; +} + +static void insert_to_bucket(hashtable_t *hashtable, bucket_t *bucket, + list_t *list) +{ + if(bucket_is_empty(hashtable, bucket)) + { + list_insert(&hashtable->list, list); + bucket->first = bucket->last = list; + } + else + { + list_insert(bucket->first, list); + bucket->first = list; + } +} + +static unsigned int primes[] = { + 5, 13, 23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, + 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, + 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, + 805306457, 1610612741 +}; +static const unsigned int num_primes = sizeof(primes) / sizeof(unsigned int); + +static inline unsigned int num_buckets(hashtable_t *hashtable) +{ + return primes[hashtable->num_buckets]; +} + + +static pair_t *hashtable_find_pair(hashtable_t *hashtable, bucket_t *bucket, + const void *key, unsigned int hash) +{ + list_t *list; + pair_t *pair; + + if(bucket_is_empty(hashtable, bucket)) + return NULL; + + list = bucket->first; + while(1) + { + pair = list_to_pair(list); + if(pair->hash == hash && hashtable->cmp_keys(pair->key, key)) + return pair; + + if(list == bucket->last) + break; + + list = list->next; + } + + return NULL; +} + +/* returns 0 on success, -1 if key was not found */ +static int hashtable_do_del(hashtable_t *hashtable, + const void *key, unsigned int hash) +{ + pair_t *pair; + bucket_t *bucket; + unsigned int index; + + index = hash % num_buckets(hashtable); + bucket = &hashtable->buckets[index]; + + pair = hashtable_find_pair(hashtable, bucket, key, hash); + if(!pair) + return -1; + + if(&pair->list == bucket->first && &pair->list == bucket->last) + bucket->first = bucket->last = &hashtable->list; + + else if(&pair->list == bucket->first) + bucket->first = pair->list.next; + + else if(&pair->list == bucket->last) + bucket->last = pair->list.prev; + + list_remove(&pair->list); + + if(hashtable->free_key) + hashtable->free_key(pair->key); + if(hashtable->free_value) + hashtable->free_value(pair->value); + + free(pair); + hashtable->size--; + + return 0; +} + +static void hashtable_do_clear(hashtable_t *hashtable) +{ + list_t *list, *next; + pair_t *pair; + + for(list = hashtable->list.next; list != &hashtable->list; list = next) + { + next = list->next; + pair = list_to_pair(list); + if(hashtable->free_key) + hashtable->free_key(pair->key); + if(hashtable->free_value) + hashtable->free_value(pair->value); + free(pair); + } +} + +static int hashtable_do_rehash(hashtable_t *hashtable) +{ + list_t *list, *next; + pair_t *pair; + unsigned int i, index, new_size; + + free(hashtable->buckets); + + hashtable->num_buckets++; + new_size = num_buckets(hashtable); + + hashtable->buckets = malloc(new_size * sizeof(bucket_t)); + if(!hashtable->buckets) + return -1; + + for(i = 0; i < num_buckets(hashtable); i++) + { + hashtable->buckets[i].first = hashtable->buckets[i].last = + &hashtable->list; + } + + list = hashtable->list.next; + list_init(&hashtable->list); + + for(; list != &hashtable->list; list = next) { + next = list->next; + pair = list_to_pair(list); + index = pair->hash % new_size; + insert_to_bucket(hashtable, &hashtable->buckets[index], &pair->list); + } + + return 0; +} + + +hashtable_t *hashtable_create(key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value) +{ + hashtable_t *hashtable = malloc(sizeof(hashtable_t)); + if(!hashtable) + return NULL; + + if(hashtable_init(hashtable, hash_key, cmp_keys, free_key, free_value)) + { + free(hashtable); + return NULL; + } + + return hashtable; +} + +void hashtable_destroy(hashtable_t *hashtable) +{ + hashtable_close(hashtable); + free(hashtable); +} + +int hashtable_init(hashtable_t *hashtable, + key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value) +{ + unsigned int i; + + hashtable->size = 0; + hashtable->num_buckets = 0; /* index to primes[] */ + hashtable->buckets = malloc(num_buckets(hashtable) * sizeof(bucket_t)); + if(!hashtable->buckets) + return -1; + + list_init(&hashtable->list); + + hashtable->hash_key = hash_key; + hashtable->cmp_keys = cmp_keys; + hashtable->free_key = free_key; + hashtable->free_value = free_value; + + for(i = 0; i < num_buckets(hashtable); i++) + { + hashtable->buckets[i].first = hashtable->buckets[i].last = + &hashtable->list; + } + + return 0; +} + +void hashtable_close(hashtable_t *hashtable) +{ + hashtable_do_clear(hashtable); + free(hashtable->buckets); +} + +int hashtable_set(hashtable_t *hashtable, void *key, void *value) +{ + pair_t *pair; + bucket_t *bucket; + unsigned int hash, index; + + /* rehash if the load ratio exceeds 1 */ + if(hashtable->size >= num_buckets(hashtable)) + if(hashtable_do_rehash(hashtable)) + return -1; + + hash = hashtable->hash_key(key); + index = hash % num_buckets(hashtable); + bucket = &hashtable->buckets[index]; + pair = hashtable_find_pair(hashtable, bucket, key, hash); + + if(pair) + { + if(hashtable->free_key) + hashtable->free_key(key); + if(hashtable->free_value) + hashtable->free_value(pair->value); + pair->value = value; + } + else + { + pair = malloc(sizeof(pair_t)); + if(!pair) + return -1; + + pair->key = key; + pair->value = value; + pair->hash = hash; + list_init(&pair->list); + + insert_to_bucket(hashtable, bucket, &pair->list); + + hashtable->size++; + } + return 0; +} + +void *hashtable_get(hashtable_t *hashtable, const void *key) +{ + pair_t *pair; + unsigned int hash; + bucket_t *bucket; + + hash = hashtable->hash_key(key); + bucket = &hashtable->buckets[hash % num_buckets(hashtable)]; + + pair = hashtable_find_pair(hashtable, bucket, key, hash); + if(!pair) + return NULL; + + return pair->value; +} + +int hashtable_del(hashtable_t *hashtable, const void *key) +{ + unsigned int hash = hashtable->hash_key(key); + return hashtable_do_del(hashtable, key, hash); +} + +void hashtable_clear(hashtable_t *hashtable) +{ + unsigned int i; + + hashtable_do_clear(hashtable); + + for(i = 0; i < num_buckets(hashtable); i++) + { + hashtable->buckets[i].first = hashtable->buckets[i].last = + &hashtable->list; + } + + list_init(&hashtable->list); + hashtable->size = 0; +} + +void *hashtable_iter(hashtable_t *hashtable) +{ + return hashtable_iter_next(hashtable, &hashtable->list); +} + +void *hashtable_iter_at(hashtable_t *hashtable, const void *key) +{ + pair_t *pair; + unsigned int hash; + bucket_t *bucket; + + hash = hashtable->hash_key(key); + bucket = &hashtable->buckets[hash % num_buckets(hashtable)]; + + pair = hashtable_find_pair(hashtable, bucket, key, hash); + if(!pair) + return NULL; + + return &pair->list; +} + +void *hashtable_iter_next(hashtable_t *hashtable, void *iter) +{ + list_t *list = (list_t *)iter; + if(list->next == &hashtable->list) + return NULL; + return list->next; +} + +void *hashtable_iter_key(void *iter) +{ + pair_t *pair = list_to_pair((list_t *)iter); + return pair->key; +} + +void *hashtable_iter_value(void *iter) +{ + pair_t *pair = list_to_pair((list_t *)iter); + return pair->value; +} + +void hashtable_iter_set(hashtable_t *hashtable, void *iter, void *value) +{ + pair_t *pair = list_to_pair((list_t *)iter); + + if(hashtable->free_value) + hashtable->free_value(pair->value); + + pair->value = value; +} diff --git a/compat/jansson/hashtable.h b/compat/jansson/hashtable.h new file mode 100644 index 0000000..52f8549 --- /dev/null +++ b/compat/jansson/hashtable.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef HASHTABLE_H +#define HASHTABLE_H + +typedef unsigned int (*key_hash_fn)(const void *key); +typedef int (*key_cmp_fn)(const void *key1, const void *key2); +typedef void (*free_fn)(void *key); + +struct hashtable_list { + struct hashtable_list *prev; + struct hashtable_list *next; +}; + +struct hashtable_pair { + void *key; + void *value; + unsigned int hash; + struct hashtable_list list; +}; + +struct hashtable_bucket { + struct hashtable_list *first; + struct hashtable_list *last; +}; + +typedef struct hashtable { + unsigned int size; + struct hashtable_bucket *buckets; + unsigned int num_buckets; /* index to primes[] */ + struct hashtable_list list; + + key_hash_fn hash_key; + key_cmp_fn cmp_keys; /* returns non-zero for equal keys */ + free_fn free_key; + free_fn free_value; +} hashtable_t; + +/** + * hashtable_create - Create a hashtable object + * + * @hash_key: The key hashing function + * @cmp_keys: The key compare function. Returns non-zero for equal and + * zero for unequal unequal keys + * @free_key: If non-NULL, called for a key that is no longer referenced. + * @free_value: If non-NULL, called for a value that is no longer referenced. + * + * Returns a new hashtable object that should be freed with + * hashtable_destroy when it's no longer used, or NULL on failure (out + * of memory). + */ +hashtable_t *hashtable_create(key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value); + +/** + * hashtable_destroy - Destroy a hashtable object + * + * @hashtable: The hashtable + * + * Destroys a hashtable created with hashtable_create(). + */ +void hashtable_destroy(hashtable_t *hashtable); + +/** + * hashtable_init - Initialize a hashtable object + * + * @hashtable: The (statically allocated) hashtable object + * @hash_key: The key hashing function + * @cmp_keys: The key compare function. Returns non-zero for equal and + * zero for unequal unequal keys + * @free_key: If non-NULL, called for a key that is no longer referenced. + * @free_value: If non-NULL, called for a value that is no longer referenced. + * + * Initializes a statically allocated hashtable object. The object + * should be cleared with hashtable_close when it's no longer used. + * + * Returns 0 on success, -1 on error (out of memory). + */ +int hashtable_init(hashtable_t *hashtable, + key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value); + +/** + * hashtable_close - Release all resources used by a hashtable object + * + * @hashtable: The hashtable + * + * Destroys a statically allocated hashtable object. + */ +void hashtable_close(hashtable_t *hashtable); + +/** + * hashtable_set - Add/modify value in hashtable + * + * @hashtable: The hashtable object + * @key: The key + * @value: The value + * + * If a value with the given key already exists, its value is replaced + * with the new value. + * + * Key and value are "stealed" in the sense that hashtable frees them + * automatically when they are no longer used. The freeing is + * accomplished by calling free_key and free_value functions that were + * supplied to hashtable_new. In case one or both of the free + * functions is NULL, the corresponding item is not "stealed". + * + * Returns 0 on success, -1 on failure (out of memory). + */ +int hashtable_set(hashtable_t *hashtable, void *key, void *value); + +/** + * hashtable_get - Get a value associated with a key + * + * @hashtable: The hashtable object + * @key: The key + * + * Returns value if it is found, or NULL otherwise. + */ +void *hashtable_get(hashtable_t *hashtable, const void *key); + +/** + * hashtable_del - Remove a value from the hashtable + * + * @hashtable: The hashtable object + * @key: The key + * + * Returns 0 on success, or -1 if the key was not found. + */ +int hashtable_del(hashtable_t *hashtable, const void *key); + +/** + * hashtable_clear - Clear hashtable + * + * @hashtable: The hashtable object + * + * Removes all items from the hashtable. + */ +void hashtable_clear(hashtable_t *hashtable); + +/** + * hashtable_iter - Iterate over hashtable + * + * @hashtable: The hashtable object + * + * Returns an opaque iterator to the first element in the hashtable. + * The iterator should be passed to hashtable_iter_* functions. + * The hashtable items are not iterated over in any particular order. + * + * There's no need to free the iterator in any way. The iterator is + * valid as long as the item that is referenced by the iterator is not + * deleted. Other values may be added or deleted. In particular, + * hashtable_iter_next() may be called on an iterator, and after that + * the key/value pair pointed by the old iterator may be deleted. + */ +void *hashtable_iter(hashtable_t *hashtable); + +/** + * hashtable_iter_at - Return an iterator at a specific key + * + * @hashtable: The hashtable object + * @key: The key that the iterator should point to + * + * Like hashtable_iter() but returns an iterator pointing to a + * specific key. + */ +void *hashtable_iter_at(hashtable_t *hashtable, const void *key); + +/** + * hashtable_iter_next - Advance an iterator + * + * @hashtable: The hashtable object + * @iter: The iterator + * + * Returns a new iterator pointing to the next element in the + * hashtable or NULL if the whole hastable has been iterated over. + */ +void *hashtable_iter_next(hashtable_t *hashtable, void *iter); + +/** + * hashtable_iter_key - Retrieve the key pointed by an iterator + * + * @iter: The iterator + */ +void *hashtable_iter_key(void *iter); + +/** + * hashtable_iter_value - Retrieve the value pointed by an iterator + * + * @iter: The iterator + */ +void *hashtable_iter_value(void *iter); + +/** + * hashtable_iter_set - Set the value pointed by an iterator + * + * @iter: The iterator + * @value: The value to set + */ +void hashtable_iter_set(hashtable_t *hashtable, void *iter, void *value); + +#endif diff --git a/compat/jansson/jansson.h b/compat/jansson/jansson.h new file mode 100644 index 0000000..781896c --- /dev/null +++ b/compat/jansson/jansson.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef JANSSON_H +#define JANSSON_H + +#include + +#ifdef WIN32 +#define snprintf(...) _snprintf(__VA_ARGS__) +#define strdup(x) _strdup(x) +#endif + +#ifdef WIN32 +#define inline __inline +#endif + +#ifndef __cplusplus +#define JSON_INLINE inline +#else +#define JSON_INLINE inline +extern "C" { +#endif + +/* types */ + +typedef enum { + JSON_OBJECT, + JSON_ARRAY, + JSON_STRING, + JSON_INTEGER, + JSON_REAL, + JSON_TRUE, + JSON_FALSE, + JSON_NULL +} json_type; + +typedef struct { + json_type type; + unsigned long refcount; +} json_t; + +#define json_typeof(json) ((json)->type) +#define json_is_object(json) (json && json_typeof(json) == JSON_OBJECT) +#define json_is_array(json) (json && json_typeof(json) == JSON_ARRAY) +#define json_is_string(json) (json && json_typeof(json) == JSON_STRING) +#define json_is_integer(json) (json && json_typeof(json) == JSON_INTEGER) +#define json_is_real(json) (json && json_typeof(json) == JSON_REAL) +#define json_is_number(json) (json_is_integer(json) || json_is_real(json)) +#define json_is_true(json) (json && json_typeof(json) == JSON_TRUE) +#define json_is_false(json) (json && json_typeof(json) == JSON_FALSE) +#define json_is_boolean(json) (json_is_true(json) || json_is_false(json)) +#define json_is_null(json) (json && json_typeof(json) == JSON_NULL) + +/* construction, destruction, reference counting */ + +json_t *json_object(void); +json_t *json_array(void); +json_t *json_string(const char *value); +json_t *json_string_nocheck(const char *value); +json_t *json_integer(int value); +json_t *json_real(double value); +json_t *json_true(void); +json_t *json_false(void); +json_t *json_null(void); + +static JSON_INLINE +json_t *json_incref(json_t *json) +{ + if(json && json->refcount != (unsigned int)-1) + ++json->refcount; + return json; +} + +/* do not call json_delete directly */ +void json_delete(json_t *json); + +static JSON_INLINE +void json_decref(json_t *json) +{ + if(json && json->refcount != (unsigned int)-1 && --json->refcount == 0) + json_delete(json); +} + + +/* getters, setters, manipulation */ + +unsigned int json_object_size(const json_t *object); +json_t *json_object_get(const json_t *object, const char *key); +int json_object_set_new(json_t *object, const char *key, json_t *value); +int json_object_set_new_nocheck(json_t *object, const char *key, json_t *value); +int json_object_del(json_t *object, const char *key); +int json_object_clear(json_t *object); +int json_object_update(json_t *object, json_t *other); +void *json_object_iter(json_t *object); +void *json_object_iter_at(json_t *object, const char *key); +void *json_object_iter_next(json_t *object, void *iter); +const char *json_object_iter_key(void *iter); +json_t *json_object_iter_value(void *iter); +int json_object_iter_set_new(json_t *object, void *iter, json_t *value); + +static JSON_INLINE +int json_object_set(json_t *object, const char *key, json_t *value) +{ + return json_object_set_new(object, key, json_incref(value)); +} + +static JSON_INLINE +int json_object_set_nocheck(json_t *object, const char *key, json_t *value) +{ + return json_object_set_new_nocheck(object, key, json_incref(value)); +} + +static inline +int json_object_iter_set(json_t *object, void *iter, json_t *value) +{ + return json_object_iter_set_new(object, iter, json_incref(value)); +} + +unsigned int json_array_size(const json_t *array); +json_t *json_array_get(const json_t *array, unsigned int index); +int json_array_set_new(json_t *array, unsigned int index, json_t *value); +int json_array_append_new(json_t *array, json_t *value); +int json_array_insert_new(json_t *array, unsigned int index, json_t *value); +int json_array_remove(json_t *array, unsigned int index); +int json_array_clear(json_t *array); +int json_array_extend(json_t *array, json_t *other); + +static JSON_INLINE +int json_array_set(json_t *array, unsigned int index, json_t *value) +{ + return json_array_set_new(array, index, json_incref(value)); +} + +static JSON_INLINE +int json_array_append(json_t *array, json_t *value) +{ + return json_array_append_new(array, json_incref(value)); +} + +static JSON_INLINE +int json_array_insert(json_t *array, unsigned int index, json_t *value) +{ + return json_array_insert_new(array, index, json_incref(value)); +} + +const char *json_string_value(const json_t *string); +int json_integer_value(const json_t *integer); +double json_real_value(const json_t *real); +double json_number_value(const json_t *json); + +int json_string_set(json_t *string, const char *value); +int json_string_set_nocheck(json_t *string, const char *value); +int json_integer_set(json_t *integer, int value); +int json_real_set(json_t *real, double value); + + +/* equality */ + +int json_equal(json_t *value1, json_t *value2); + + +/* copying */ + +json_t *json_copy(json_t *value); +json_t *json_deep_copy(json_t *value); + + +/* loading, printing */ + +#define JSON_ERROR_TEXT_LENGTH 160 + +typedef struct { + char text[JSON_ERROR_TEXT_LENGTH]; + int line; +} json_error_t; + +json_t *json_loads(const char *input, json_error_t *error); +json_t *json_loadf(FILE *input, json_error_t *error); +json_t *json_load_file(const char *path, json_error_t *error); + +#define JSON_INDENT(n) (n & 0xFF) +#define JSON_COMPACT 0x100 +#define JSON_ENSURE_ASCII 0x200 +#define JSON_SORT_KEYS 0x400 +#define JSON_PRESERVE_ORDER 0x800 + +char *json_dumps(const json_t *json, unsigned long flags); +int json_dumpf(const json_t *json, FILE *output, unsigned long flags); +int json_dump_file(const json_t *json, const char *path, unsigned long flags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/compat/jansson/jansson_private.h b/compat/jansson/jansson_private.h new file mode 100644 index 0000000..3a3ed75 --- /dev/null +++ b/compat/jansson/jansson_private.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef JANSSON_PRIVATE_H +#define JANSSON_PRIVATE_H + +#include "jansson.h" +#include "hashtable.h" + +#define container_of(ptr_, type_, member_) \ + ((type_ *)((char *)ptr_ - (size_t)&((type_ *)0)->member_)) + +typedef struct { + json_t json; + hashtable_t hashtable; + unsigned long serial; + int visited; +} json_object_t; + +typedef struct { + json_t json; + unsigned int size; + unsigned int entries; + json_t **table; + int visited; +} json_array_t; + +typedef struct { + json_t json; + char *value; +} json_string_t; + +typedef struct { + json_t json; + double value; +} json_real_t; + +typedef struct { + json_t json; + int value; +} json_integer_t; + +#define json_to_object(json_) container_of(json_, json_object_t, json) +#define json_to_array(json_) container_of(json_, json_array_t, json) +#define json_to_string(json_) container_of(json_, json_string_t, json) +#define json_to_real(json_) container_of(json_, json_real_t, json) +#define json_to_integer(json_) container_of(json_, json_integer_t, json) + +typedef struct { + unsigned long serial; + char key[]; +} object_key_t; + +const object_key_t *jsonp_object_iter_fullkey(void *iter); + +#endif diff --git a/compat/jansson/load.c b/compat/jansson/load.c new file mode 100644 index 0000000..ee56fbe --- /dev/null +++ b/compat/jansson/load.c @@ -0,0 +1,879 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "jansson_private.h" +#include "strbuffer.h" +#include "utf.h" + +#define TOKEN_INVALID -1 +#define TOKEN_EOF 0 +#define TOKEN_STRING 256 +#define TOKEN_INTEGER 257 +#define TOKEN_REAL 258 +#define TOKEN_TRUE 259 +#define TOKEN_FALSE 260 +#define TOKEN_NULL 261 + +/* read one byte from stream, return EOF on end of file */ +typedef int (*get_func)(void *data); + +/* return non-zero if end of file has been reached */ +typedef int (*eof_func)(void *data); + +typedef struct { + get_func get; + eof_func eof; + void *data; + int stream_pos; + char buffer[5]; + int buffer_pos; +} stream_t; + + +typedef struct { + stream_t stream; + strbuffer_t saved_text; + int token; + int line, column; + union { + char *string; + int integer; + double real; + } value; +} lex_t; + + +/*** error reporting ***/ + +static void error_init(json_error_t *error) +{ + if(error) + { + error->text[0] = '\0'; + error->line = -1; + } +} + +static void error_set(json_error_t *error, const lex_t *lex, + const char *msg, ...) +{ + va_list ap; + char text[JSON_ERROR_TEXT_LENGTH]; + + if(!error || error->text[0] != '\0') { + /* error already set */ + return; + } + + va_start(ap, msg); + vsnprintf(text, JSON_ERROR_TEXT_LENGTH, msg, ap); + va_end(ap); + + if(lex) + { + const char *saved_text = strbuffer_value(&lex->saved_text); + error->line = lex->line; + if(saved_text && saved_text[0]) + { + if(lex->saved_text.length <= 20) { + snprintf(error->text, JSON_ERROR_TEXT_LENGTH, + "%s near '%s'", text, saved_text); + } + else + snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text); + } + else + { + snprintf(error->text, JSON_ERROR_TEXT_LENGTH, + "%s near end of file", text); + } + } + else + { + error->line = -1; + snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text); + } +} + + +/*** lexical analyzer ***/ + +static void +stream_init(stream_t *stream, get_func get, eof_func eof, void *data) +{ + stream->get = get; + stream->eof = eof; + stream->data = data; + stream->stream_pos = 0; + stream->buffer[0] = '\0'; + stream->buffer_pos = 0; +} + +static char stream_get(stream_t *stream, json_error_t *error) +{ + char c; + + if(!stream->buffer[stream->buffer_pos]) + { + stream->buffer[0] = stream->get(stream->data); + stream->buffer_pos = 0; + + c = stream->buffer[0]; + + if((unsigned char)c >= 0x80 && c != (char)EOF) + { + /* multi-byte UTF-8 sequence */ + int i, count; + + count = utf8_check_first(c); + if(!count) + goto out; + + assert(count >= 2); + + for(i = 1; i < count; i++) + stream->buffer[i] = stream->get(stream->data); + + if(!utf8_check_full(stream->buffer, count, NULL)) + goto out; + + stream->stream_pos += count; + stream->buffer[count] = '\0'; + } + else { + stream->buffer[1] = '\0'; + stream->stream_pos++; + } + } + + return stream->buffer[stream->buffer_pos++]; + +out: + error_set(error, NULL, "unable to decode byte 0x%x at position %d", + (unsigned char)c, stream->stream_pos); + + stream->buffer[0] = EOF; + stream->buffer[1] = '\0'; + stream->buffer_pos = 1; + + return EOF; +} + +static void stream_unget(stream_t *stream, char c) +{ + assert(stream->buffer_pos > 0); + stream->buffer_pos--; + assert(stream->buffer[stream->buffer_pos] == c); +} + + +static int lex_get(lex_t *lex, json_error_t *error) +{ + return stream_get(&lex->stream, error); +} + +static int lex_eof(lex_t *lex) +{ + return lex->stream.eof(lex->stream.data); +} + +static void lex_save(lex_t *lex, char c) +{ + strbuffer_append_byte(&lex->saved_text, c); +} + +static int lex_get_save(lex_t *lex, json_error_t *error) +{ + char c = stream_get(&lex->stream, error); + lex_save(lex, c); + return c; +} + +static void lex_unget_unsave(lex_t *lex, char c) +{ + char d; + stream_unget(&lex->stream, c); + d = strbuffer_pop(&lex->saved_text); + assert(c == d); +} + +static void lex_save_cached(lex_t *lex) +{ + while(lex->stream.buffer[lex->stream.buffer_pos] != '\0') + { + lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); + lex->stream.buffer_pos++; + } +} + +/* assumes that str points to 'u' plus at least 4 valid hex digits */ +static int32_t decode_unicode_escape(const char *str) +{ + int i; + int32_t value = 0; + + assert(str[0] == 'u'); + + for(i = 1; i <= 4; i++) { + char c = str[i]; + value <<= 4; + if(isdigit(c)) + value += c - '0'; + else if(islower(c)) + value += c - 'a' + 10; + else if(isupper(c)) + value += c - 'A' + 10; + else + assert(0); + } + + return value; +} + +static void lex_scan_string(lex_t *lex, json_error_t *error) +{ + char c; + const char *p; + char *t; + int i; + + lex->value.string = NULL; + lex->token = TOKEN_INVALID; + + c = lex_get_save(lex, error); + + while(c != '"') { + if(c == (char)EOF) { + lex_unget_unsave(lex, c); + if(lex_eof(lex)) + error_set(error, lex, "premature end of input"); + goto out; + } + + else if((unsigned char)c <= 0x1F) { + /* control character */ + lex_unget_unsave(lex, c); + if(c == '\n') + error_set(error, lex, "unexpected newline", c); + else + error_set(error, lex, "control character 0x%x", c); + goto out; + } + + else if(c == '\\') { + c = lex_get_save(lex, error); + if(c == 'u') { + c = lex_get_save(lex, error); + for(i = 0; i < 4; i++) { + if(!isxdigit(c)) { + lex_unget_unsave(lex, c); + error_set(error, lex, "invalid escape"); + goto out; + } + c = lex_get_save(lex, error); + } + } + else if(c == '"' || c == '\\' || c == '/' || c == 'b' || + c == 'f' || c == 'n' || c == 'r' || c == 't') + c = lex_get_save(lex, error); + else { + lex_unget_unsave(lex, c); + error_set(error, lex, "invalid escape"); + goto out; + } + } + else + c = lex_get_save(lex, error); + } + + /* the actual value is at most of the same length as the source + string, because: + - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte + - a single \uXXXX escape (length 6) is converted to at most 3 bytes + - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair + are converted to 4 bytes + */ + lex->value.string = malloc(lex->saved_text.length + 1); + if(!lex->value.string) { + /* this is not very nice, since TOKEN_INVALID is returned */ + goto out; + } + + /* the target */ + t = lex->value.string; + + /* + 1 to skip the " */ + p = strbuffer_value(&lex->saved_text) + 1; + + while(*p != '"') { + if(*p == '\\') { + p++; + if(*p == 'u') { + char buffer[4]; + int length; + int32_t value; + + value = decode_unicode_escape(p); + p += 5; + + if(0xD800 <= value && value <= 0xDBFF) { + /* surrogate pair */ + if(*p == '\\' && *(p + 1) == 'u') { + int32_t value2 = decode_unicode_escape(++p); + p += 5; + + if(0xDC00 <= value2 && value2 <= 0xDFFF) { + /* valid second surrogate */ + value = + ((value - 0xD800) << 10) + + (value2 - 0xDC00) + + 0x10000; + } + else { + /* invalid second surrogate */ + error_set(error, lex, + "invalid Unicode '\\u%04X\\u%04X'", + value, value2); + goto out; + } + } + else { + /* no second surrogate */ + error_set(error, lex, "invalid Unicode '\\u%04X'", + value); + goto out; + } + } + else if(0xDC00 <= value && value <= 0xDFFF) { + error_set(error, lex, "invalid Unicode '\\u%04X'", value); + goto out; + } + else if(value == 0) + { + error_set(error, lex, "\\u0000 is not allowed"); + goto out; + } + + if(utf8_encode(value, buffer, &length)) + assert(0); + + memcpy(t, buffer, length); + t += length; + } + else { + switch(*p) { + case '"': case '\\': case '/': + *t = *p; break; + case 'b': *t = '\b'; break; + case 'f': *t = '\f'; break; + case 'n': *t = '\n'; break; + case 'r': *t = '\r'; break; + case 't': *t = '\t'; break; + default: assert(0); + } + t++; + p++; + } + } + else + *(t++) = *(p++); + } + *t = '\0'; + lex->token = TOKEN_STRING; + return; + +out: + free(lex->value.string); +} + +static int lex_scan_number(lex_t *lex, char c, json_error_t *error) +{ + const char *saved_text; + char *end; + double value; + + lex->token = TOKEN_INVALID; + + if(c == '-') + c = lex_get_save(lex, error); + + if(c == '0') { + c = lex_get_save(lex, error); + if(isdigit(c)) { + lex_unget_unsave(lex, c); + goto out; + } + } + else if(isdigit(c)) { + c = lex_get_save(lex, error); + while(isdigit(c)) + c = lex_get_save(lex, error); + } + else { + lex_unget_unsave(lex, c); + goto out; + } + + if(c != '.' && c != 'E' && c != 'e') { + long value; + + lex_unget_unsave(lex, c); + + saved_text = strbuffer_value(&lex->saved_text); + value = strtol(saved_text, &end, 10); + assert(end == saved_text + lex->saved_text.length); + + if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) { + error_set(error, lex, "too big integer"); + goto out; + } + else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) { + error_set(error, lex, "too big negative integer"); + goto out; + } + + lex->token = TOKEN_INTEGER; + lex->value.integer = (int)value; + return 0; + } + + if(c == '.') { + c = lex_get(lex, error); + if(!isdigit(c)) + goto out; + lex_save(lex, c); + + c = lex_get_save(lex, error); + while(isdigit(c)) + c = lex_get_save(lex, error); + } + + if(c == 'E' || c == 'e') { + c = lex_get_save(lex, error); + if(c == '+' || c == '-') + c = lex_get_save(lex, error); + + if(!isdigit(c)) { + lex_unget_unsave(lex, c); + goto out; + } + + c = lex_get_save(lex, error); + while(isdigit(c)) + c = lex_get_save(lex, error); + } + + lex_unget_unsave(lex, c); + + saved_text = strbuffer_value(&lex->saved_text); + value = strtod(saved_text, &end); + assert(end == saved_text + lex->saved_text.length); + + if(errno == ERANGE && value != 0) { + error_set(error, lex, "real number overflow"); + goto out; + } + + lex->token = TOKEN_REAL; + lex->value.real = value; + return 0; + +out: + return -1; +} + +static int lex_scan(lex_t *lex, json_error_t *error) +{ + char c; + + strbuffer_clear(&lex->saved_text); + + if(lex->token == TOKEN_STRING) { + free(lex->value.string); + lex->value.string = NULL; + } + + c = lex_get(lex, error); + while(c == ' ' || c == '\t' || c == '\n' || c == '\r') + { + if(c == '\n') + lex->line++; + + c = lex_get(lex, error); + } + + if(c == (char)EOF) { + if(lex_eof(lex)) + lex->token = TOKEN_EOF; + else + lex->token = TOKEN_INVALID; + goto out; + } + + lex_save(lex, c); + + if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') + lex->token = c; + + else if(c == '"') + lex_scan_string(lex, error); + + else if(isdigit(c) || c == '-') { + if(lex_scan_number(lex, c, error)) + goto out; + } + + else if(isupper(c) || islower(c)) { + /* eat up the whole identifier for clearer error messages */ + const char *saved_text; + + c = lex_get_save(lex, error); + while(isupper(c) || islower(c)) + c = lex_get_save(lex, error); + lex_unget_unsave(lex, c); + + saved_text = strbuffer_value(&lex->saved_text); + + if(strcmp(saved_text, "true") == 0) + lex->token = TOKEN_TRUE; + else if(strcmp(saved_text, "false") == 0) + lex->token = TOKEN_FALSE; + else if(strcmp(saved_text, "null") == 0) + lex->token = TOKEN_NULL; + else + lex->token = TOKEN_INVALID; + } + + else { + /* save the rest of the input UTF-8 sequence to get an error + message of valid UTF-8 */ + lex_save_cached(lex); + lex->token = TOKEN_INVALID; + } + +out: + return lex->token; +} + +static char *lex_steal_string(lex_t *lex) +{ + char *result = NULL; + if(lex->token == TOKEN_STRING) + { + result = lex->value.string; + lex->value.string = NULL; + } + return result; +} + +static int lex_init(lex_t *lex, get_func get, eof_func eof, void *data) +{ + stream_init(&lex->stream, get, eof, data); + if(strbuffer_init(&lex->saved_text)) + return -1; + + lex->token = TOKEN_INVALID; + lex->line = 1; + + return 0; +} + +static void lex_close(lex_t *lex) +{ + if(lex->token == TOKEN_STRING) + free(lex->value.string); + strbuffer_close(&lex->saved_text); +} + + +/*** parser ***/ + +static json_t *parse_value(lex_t *lex, json_error_t *error); + +static json_t *parse_object(lex_t *lex, json_error_t *error) +{ + json_t *object = json_object(); + if(!object) + return NULL; + + lex_scan(lex, error); + if(lex->token == '}') + return object; + + while(1) { + char *key; + json_t *value; + + if(lex->token != TOKEN_STRING) { + error_set(error, lex, "string or '}' expected"); + goto error; + } + + key = lex_steal_string(lex); + if(!key) + return NULL; + + lex_scan(lex, error); + if(lex->token != ':') { + free(key); + error_set(error, lex, "':' expected"); + goto error; + } + + lex_scan(lex, error); + value = parse_value(lex, error); + if(!value) { + free(key); + goto error; + } + + if(json_object_set_nocheck(object, key, value)) { + free(key); + json_decref(value); + goto error; + } + + json_decref(value); + free(key); + + lex_scan(lex, error); + if(lex->token != ',') + break; + + lex_scan(lex, error); + } + + if(lex->token != '}') { + error_set(error, lex, "'}' expected"); + goto error; + } + + return object; + +error: + json_decref(object); + return NULL; +} + +static json_t *parse_array(lex_t *lex, json_error_t *error) +{ + json_t *array = json_array(); + if(!array) + return NULL; + + lex_scan(lex, error); + if(lex->token == ']') + return array; + + while(lex->token) { + json_t *elem = parse_value(lex, error); + if(!elem) + goto error; + + if(json_array_append(array, elem)) { + json_decref(elem); + goto error; + } + json_decref(elem); + + lex_scan(lex, error); + if(lex->token != ',') + break; + + lex_scan(lex, error); + } + + if(lex->token != ']') { + error_set(error, lex, "']' expected"); + goto error; + } + + return array; + +error: + json_decref(array); + return NULL; +} + +static json_t *parse_value(lex_t *lex, json_error_t *error) +{ + json_t *json; + + switch(lex->token) { + case TOKEN_STRING: { + json = json_string_nocheck(lex->value.string); + break; + } + + case TOKEN_INTEGER: { + json = json_integer(lex->value.integer); + break; + } + + case TOKEN_REAL: { + json = json_real(lex->value.real); + break; + } + + case TOKEN_TRUE: + json = json_true(); + break; + + case TOKEN_FALSE: + json = json_false(); + break; + + case TOKEN_NULL: + json = json_null(); + break; + + case '{': + json = parse_object(lex, error); + break; + + case '[': + json = parse_array(lex, error); + break; + + case TOKEN_INVALID: + error_set(error, lex, "invalid token"); + return NULL; + + default: + error_set(error, lex, "unexpected token"); + return NULL; + } + + if(!json) + return NULL; + + return json; +} + +static json_t *parse_json(lex_t *lex, json_error_t *error) +{ + error_init(error); + + lex_scan(lex, error); + if(lex->token != '[' && lex->token != '{') { + error_set(error, lex, "'[' or '{' expected"); + return NULL; + } + + return parse_value(lex, error); +} + +typedef struct +{ + const char *data; + int pos; +} string_data_t; + +static int string_get(void *data) +{ + char c; + string_data_t *stream = (string_data_t *)data; + c = stream->data[stream->pos]; + if(c == '\0') + return EOF; + else + { + stream->pos++; + return c; + } +} + +static int string_eof(void *data) +{ + string_data_t *stream = (string_data_t *)data; + return (stream->data[stream->pos] == '\0'); +} + +json_t *json_loads(const char *string, json_error_t *error) +{ + lex_t lex; + json_t *result; + + string_data_t stream_data = { + string, + 0 + }; + + if(lex_init(&lex, string_get, string_eof, (void *)&stream_data)) + return NULL; + + result = parse_json(&lex, error); + if(!result) + goto out; + + lex_scan(&lex, error); + if(lex.token != TOKEN_EOF) { + error_set(error, &lex, "end of file expected"); + json_decref(result); + result = NULL; + } + +out: + lex_close(&lex); + return result; +} + +json_t *json_loadf(FILE *input, json_error_t *error) +{ + lex_t lex; + json_t *result; + + if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input)) + return NULL; + + result = parse_json(&lex, error); + if(!result) + goto out; + + lex_scan(&lex, error); + if(lex.token != TOKEN_EOF) { + error_set(error, &lex, "end of file expected"); + json_decref(result); + result = NULL; + } + +out: + lex_close(&lex); + return result; +} + +json_t *json_load_file(const char *path, json_error_t *error) +{ + json_t *result; + FILE *fp; + + error_init(error); + + fp = fopen(path, "r"); + if(!fp) + { + error_set(error, NULL, "unable to open %s: %s", + path, strerror(errno)); + return NULL; + } + + result = json_loadf(fp, error); + + fclose(fp); + return result; +} diff --git a/compat/jansson/strbuffer.c b/compat/jansson/strbuffer.c new file mode 100644 index 0000000..0019645 --- /dev/null +++ b/compat/jansson/strbuffer.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#define _GNU_SOURCE +#include +#include +#include "strbuffer.h" +#include "util.h" + +#define STRBUFFER_MIN_SIZE 16 +#define STRBUFFER_FACTOR 2 + +int strbuffer_init(strbuffer_t *strbuff) +{ + strbuff->size = STRBUFFER_MIN_SIZE; + strbuff->length = 0; + + strbuff->value = malloc(strbuff->size); + if(!strbuff->value) + return -1; + + /* initialize to empty */ + strbuff->value[0] = '\0'; + return 0; +} + +void strbuffer_close(strbuffer_t *strbuff) +{ + free(strbuff->value); + strbuff->size = 0; + strbuff->length = 0; + strbuff->value = NULL; +} + +void strbuffer_clear(strbuffer_t *strbuff) +{ + strbuff->length = 0; + strbuff->value[0] = '\0'; +} + +const char *strbuffer_value(const strbuffer_t *strbuff) +{ + return strbuff->value; +} + +char *strbuffer_steal_value(strbuffer_t *strbuff) +{ + char *result = strbuff->value; + strbuffer_init(strbuff); + return result; +} + +int strbuffer_append(strbuffer_t *strbuff, const char *string) +{ + return strbuffer_append_bytes(strbuff, string, strlen(string)); +} + +int strbuffer_append_byte(strbuffer_t *strbuff, char byte) +{ + return strbuffer_append_bytes(strbuff, &byte, 1); +} + +int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, int size) +{ + if(strbuff->length + size >= strbuff->size) + { + strbuff->size = max(strbuff->size * STRBUFFER_FACTOR, + strbuff->length + size + 1); + + strbuff->value = realloc(strbuff->value, strbuff->size); + if(!strbuff->value) + return -1; + } + + memcpy(strbuff->value + strbuff->length, data, size); + strbuff->length += size; + strbuff->value[strbuff->length] = '\0'; + + return 0; +} + +char strbuffer_pop(strbuffer_t *strbuff) +{ + if(strbuff->length > 0) { + char c = strbuff->value[--strbuff->length]; + strbuff->value[strbuff->length] = '\0'; + return c; + } + else + return '\0'; +} diff --git a/compat/jansson/strbuffer.h b/compat/jansson/strbuffer.h new file mode 100644 index 0000000..816594a --- /dev/null +++ b/compat/jansson/strbuffer.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef STRBUFFER_H +#define STRBUFFER_H + +typedef struct { + char *value; + int length; /* bytes used */ + int size; /* bytes allocated */ +} strbuffer_t; + +int strbuffer_init(strbuffer_t *strbuff); +void strbuffer_close(strbuffer_t *strbuff); + +void strbuffer_clear(strbuffer_t *strbuff); + +const char *strbuffer_value(const strbuffer_t *strbuff); +char *strbuffer_steal_value(strbuffer_t *strbuff); + +int strbuffer_append(strbuffer_t *strbuff, const char *string); +int strbuffer_append_byte(strbuffer_t *strbuff, char byte); +int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, int size); + +char strbuffer_pop(strbuffer_t *strbuff); + +#endif diff --git a/compat/jansson/utf.c b/compat/jansson/utf.c new file mode 100644 index 0000000..2b64450 --- /dev/null +++ b/compat/jansson/utf.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include +#include "utf.h" + +int utf8_encode(int32_t codepoint, char *buffer, int *size) +{ + if(codepoint < 0) + return -1; + else if(codepoint < 0x80) + { + buffer[0] = (char)codepoint; + *size = 1; + } + else if(codepoint < 0x800) + { + buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6); + buffer[1] = 0x80 + ((codepoint & 0x03F)); + *size = 2; + } + else if(codepoint < 0x10000) + { + buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12); + buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6); + buffer[2] = 0x80 + ((codepoint & 0x003F)); + *size = 3; + } + else if(codepoint <= 0x10FFFF) + { + buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18); + buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12); + buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6); + buffer[3] = 0x80 + ((codepoint & 0x00003F)); + *size = 4; + } + else + return -1; + + return 0; +} + +int utf8_check_first(char byte) +{ + unsigned char u = (unsigned char)byte; + + if(u < 0x80) + return 1; + + if(0x80 <= u && u <= 0xBF) { + /* second, third or fourth byte of a multi-byte + sequence, i.e. a "continuation byte" */ + return 0; + } + else if(u == 0xC0 || u == 0xC1) { + /* overlong encoding of an ASCII byte */ + return 0; + } + else if(0xC2 <= u && u <= 0xDF) { + /* 2-byte sequence */ + return 2; + } + + else if(0xE0 <= u && u <= 0xEF) { + /* 3-byte sequence */ + return 3; + } + else if(0xF0 <= u && u <= 0xF4) { + /* 4-byte sequence */ + return 4; + } + else { /* u >= 0xF5 */ + /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid + UTF-8 */ + return 0; + } +} + +int utf8_check_full(const char *buffer, int size, int32_t *codepoint) +{ + int i; + int32_t value = 0; + unsigned char u = (unsigned char)buffer[0]; + + if(size == 2) + { + value = u & 0x1F; + } + else if(size == 3) + { + value = u & 0xF; + } + else if(size == 4) + { + value = u & 0x7; + } + else + return 0; + + for(i = 1; i < size; i++) + { + u = (unsigned char)buffer[i]; + + if(u < 0x80 || u > 0xBF) { + /* not a continuation byte */ + return 0; + } + + value = (value << 6) + (u & 0x3F); + } + + if(value > 0x10FFFF) { + /* not in Unicode range */ + return 0; + } + + else if(0xD800 <= value && value <= 0xDFFF) { + /* invalid code point (UTF-16 surrogate halves) */ + return 0; + } + + else if((size == 2 && value < 0x80) || + (size == 3 && value < 0x800) || + (size == 4 && value < 0x10000)) { + /* overlong encoding */ + return 0; + } + + if(codepoint) + *codepoint = value; + + return 1; +} + +const char *utf8_iterate(const char *buffer, int32_t *codepoint) +{ + int count; + int32_t value; + + if(!*buffer) + return buffer; + + count = utf8_check_first(buffer[0]); + if(count <= 0) + return NULL; + + if(count == 1) + value = (unsigned char)buffer[0]; + else + { + if(!utf8_check_full(buffer, count, &value)) + return NULL; + } + + if(codepoint) + *codepoint = value; + + return buffer + count; +} + +int utf8_check_string(const char *string, int length) +{ + int i; + + if(length == -1) + length = strlen(string); + + for(i = 0; i < length; i++) + { + int count = utf8_check_first(string[i]); + if(count == 0) + return 0; + else if(count > 1) + { + if(i + count > length) + return 0; + + if(!utf8_check_full(&string[i], count, NULL)) + return 0; + + i += count - 1; + } + } + + return 1; +} diff --git a/compat/jansson/utf.h b/compat/jansson/utf.h new file mode 100644 index 0000000..8e95296 --- /dev/null +++ b/compat/jansson/utf.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef UTF_H +#define UTF_H + +#include + +#ifdef HAVE_INTTYPES_H +/* inttypes.h includes stdint.h in a standard environment, so there's +no need to include stdint.h separately. If inttypes.h doesn't define +int32_t, it's defined in config.h. */ +#include +#endif + +int utf8_encode(int codepoint, char *buffer, int *size); + +int utf8_check_first(char byte); +int utf8_check_full(const char *buffer, int size, int32_t *codepoint); +const char *utf8_iterate(const char *buffer, int32_t *codepoint); + +int utf8_check_string(const char *string, int length); + +#endif diff --git a/compat/jansson/util.h b/compat/jansson/util.h new file mode 100644 index 0000000..33e5d62 --- /dev/null +++ b/compat/jansson/util.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef UTIL_H +#define UTIL_H + +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#endif diff --git a/compat/jansson/value.c b/compat/jansson/value.c new file mode 100644 index 0000000..591b89e --- /dev/null +++ b/compat/jansson/value.c @@ -0,0 +1,976 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#define _GNU_SOURCE + +#include + +#include +#include + +#include +#include "hashtable.h" +#include "jansson_private.h" +#include "utf.h" +#include "util.h" + + +static inline void json_init(json_t *json, json_type type) +{ + json->type = type; + json->refcount = 1; +} + + +/*** object ***/ + +/* This macro just returns a pointer that's a few bytes backwards from + string. This makes it possible to pass a pointer to object_key_t + when only the string inside it is used, without actually creating + an object_key_t instance. */ +#define string_to_key(string) container_of(string, object_key_t, key) + +static unsigned int hash_key(const void *ptr) +{ + const char *str = ((const object_key_t *)ptr)->key; + + unsigned int hash = 5381; + unsigned int c; + + while((c = (unsigned int)*str)) + { + hash = ((hash << 5) + hash) + c; + str++; + } + + return hash; +} + +static int key_equal(const void *ptr1, const void *ptr2) +{ + return strcmp(((const object_key_t *)ptr1)->key, + ((const object_key_t *)ptr2)->key) == 0; +} + +static void value_decref(void *value) +{ + json_decref((json_t *)value); +} + +json_t *json_object(void) +{ + json_object_t *object = malloc(sizeof(json_object_t)); + if(!object) + return NULL; + json_init(&object->json, JSON_OBJECT); + + if(hashtable_init(&object->hashtable, hash_key, key_equal, + free, value_decref)) + { + free(object); + return NULL; + } + + object->serial = 0; + object->visited = 0; + + return &object->json; +} + +static void json_delete_object(json_object_t *object) +{ + hashtable_close(&object->hashtable); + free(object); +} + +unsigned int json_object_size(const json_t *json) +{ + json_object_t *object; + + if(!json_is_object(json)) + return -1; + + object = json_to_object(json); + return object->hashtable.size; +} + +json_t *json_object_get(const json_t *json, const char *key) +{ + json_object_t *object; + + if(!json_is_object(json)) + return NULL; + + object = json_to_object(json); + return hashtable_get(&object->hashtable, string_to_key(key)); +} + +int json_object_set_new_nocheck(json_t *json, const char *key, json_t *value) +{ + json_object_t *object; + object_key_t *k; + + if(!key || !value) + return -1; + + if(!json_is_object(json) || json == value) + { + json_decref(value); + return -1; + } + object = json_to_object(json); + + k = malloc(sizeof(object_key_t) + strlen(key) + 1); + if(!k) + return -1; + + k->serial = object->serial++; + strcpy(k->key, key); + + if(hashtable_set(&object->hashtable, k, value)) + { + json_decref(value); + return -1; + } + + return 0; +} + +int json_object_set_new(json_t *json, const char *key, json_t *value) +{ + if(!key || !utf8_check_string(key, -1)) + { + json_decref(value); + return -1; + } + + return json_object_set_new_nocheck(json, key, value); +} + +int json_object_del(json_t *json, const char *key) +{ + json_object_t *object; + + if(!json_is_object(json)) + return -1; + + object = json_to_object(json); + return hashtable_del(&object->hashtable, string_to_key(key)); +} + +int json_object_clear(json_t *json) +{ + json_object_t *object; + + if(!json_is_object(json)) + return -1; + + object = json_to_object(json); + hashtable_clear(&object->hashtable); + + return 0; +} + +int json_object_update(json_t *object, json_t *other) +{ + void *iter; + + if(!json_is_object(object) || !json_is_object(other)) + return -1; + + iter = json_object_iter(other); + while(iter) { + const char *key; + json_t *value; + + key = json_object_iter_key(iter); + value = json_object_iter_value(iter); + + if(json_object_set_nocheck(object, key, value)) + return -1; + + iter = json_object_iter_next(other, iter); + } + + return 0; +} + +void *json_object_iter(json_t *json) +{ + json_object_t *object; + + if(!json_is_object(json)) + return NULL; + + object = json_to_object(json); + return hashtable_iter(&object->hashtable); +} + +void *json_object_iter_at(json_t *json, const char *key) +{ + json_object_t *object; + + if(!key || !json_is_object(json)) + return NULL; + + object = json_to_object(json); + return hashtable_iter_at(&object->hashtable, string_to_key(key)); +} + +void *json_object_iter_next(json_t *json, void *iter) +{ + json_object_t *object; + + if(!json_is_object(json) || iter == NULL) + return NULL; + + object = json_to_object(json); + return hashtable_iter_next(&object->hashtable, iter); +} + +const object_key_t *jsonp_object_iter_fullkey(void *iter) +{ + if(!iter) + return NULL; + + return hashtable_iter_key(iter); +} + +const char *json_object_iter_key(void *iter) +{ + if(!iter) + return NULL; + + return jsonp_object_iter_fullkey(iter)->key; +} + +json_t *json_object_iter_value(void *iter) +{ + if(!iter) + return NULL; + + return (json_t *)hashtable_iter_value(iter); +} + +int json_object_iter_set_new(json_t *json, void *iter, json_t *value) +{ + json_object_t *object; + + if(!json_is_object(json) || !iter || !value) + return -1; + + object = json_to_object(json); + hashtable_iter_set(&object->hashtable, iter, value); + + return 0; +} + +static int json_object_equal(json_t *object1, json_t *object2) +{ + void *iter; + + if(json_object_size(object1) != json_object_size(object2)) + return 0; + + iter = json_object_iter(object1); + while(iter) + { + const char *key; + json_t *value1, *value2; + + key = json_object_iter_key(iter); + value1 = json_object_iter_value(iter); + value2 = json_object_get(object2, key); + + if(!json_equal(value1, value2)) + return 0; + + iter = json_object_iter_next(object1, iter); + } + + return 1; +} + +static json_t *json_object_copy(json_t *object) +{ + json_t *result; + void *iter; + + result = json_object(); + if(!result) + return NULL; + + iter = json_object_iter(object); + while(iter) + { + const char *key; + json_t *value; + + key = json_object_iter_key(iter); + value = json_object_iter_value(iter); + json_object_set_nocheck(result, key, value); + + iter = json_object_iter_next(object, iter); + } + + return result; +} + +static json_t *json_object_deep_copy(json_t *object) +{ + json_t *result; + void *iter; + + result = json_object(); + if(!result) + return NULL; + + iter = json_object_iter(object); + while(iter) + { + const char *key; + json_t *value; + + key = json_object_iter_key(iter); + value = json_object_iter_value(iter); + json_object_set_new_nocheck(result, key, json_deep_copy(value)); + + iter = json_object_iter_next(object, iter); + } + + return result; +} + + +/*** array ***/ + +json_t *json_array(void) +{ + json_array_t *array = malloc(sizeof(json_array_t)); + if(!array) + return NULL; + json_init(&array->json, JSON_ARRAY); + + array->entries = 0; + array->size = 8; + + array->table = malloc(array->size * sizeof(json_t *)); + if(!array->table) { + free(array); + return NULL; + } + + array->visited = 0; + + return &array->json; +} + +static void json_delete_array(json_array_t *array) +{ + unsigned int i; + + for(i = 0; i < array->entries; i++) + json_decref(array->table[i]); + + free(array->table); + free(array); +} + +unsigned int json_array_size(const json_t *json) +{ + if(!json_is_array(json)) + return 0; + + return json_to_array(json)->entries; +} + +json_t *json_array_get(const json_t *json, unsigned int index) +{ + json_array_t *array; + if(!json_is_array(json)) + return NULL; + array = json_to_array(json); + + if(index >= array->entries) + return NULL; + + return array->table[index]; +} + +int json_array_set_new(json_t *json, unsigned int index, json_t *value) +{ + json_array_t *array; + + if(!value) + return -1; + + if(!json_is_array(json) || json == value) + { + json_decref(value); + return -1; + } + array = json_to_array(json); + + if(index >= array->entries) + { + json_decref(value); + return -1; + } + + json_decref(array->table[index]); + array->table[index] = value; + + return 0; +} + +static void array_move(json_array_t *array, unsigned int dest, + unsigned int src, unsigned int count) +{ + memmove(&array->table[dest], &array->table[src], count * sizeof(json_t *)); +} + +static void array_copy(json_t **dest, unsigned int dpos, + json_t **src, unsigned int spos, + unsigned int count) +{ + memcpy(&dest[dpos], &src[spos], count * sizeof(json_t *)); +} + +static json_t **json_array_grow(json_array_t *array, + unsigned int amount, + int copy) +{ + unsigned int new_size; + json_t **old_table, **new_table; + + if(array->entries + amount <= array->size) + return array->table; + + old_table = array->table; + + new_size = max(array->size + amount, array->size * 2); + new_table = malloc(new_size * sizeof(json_t *)); + if(!new_table) + return NULL; + + array->size = new_size; + array->table = new_table; + + if(copy) { + array_copy(array->table, 0, old_table, 0, array->entries); + free(old_table); + return array->table; + } + + return old_table; +} + +int json_array_append_new(json_t *json, json_t *value) +{ + json_array_t *array; + + if(!value) + return -1; + + if(!json_is_array(json) || json == value) + { + json_decref(value); + return -1; + } + array = json_to_array(json); + + if(!json_array_grow(array, 1, 1)) { + json_decref(value); + return -1; + } + + array->table[array->entries] = value; + array->entries++; + + return 0; +} + +int json_array_insert_new(json_t *json, unsigned int index, json_t *value) +{ + json_array_t *array; + json_t **old_table; + + if(!value) + return -1; + + if(!json_is_array(json) || json == value) { + json_decref(value); + return -1; + } + array = json_to_array(json); + + if(index > array->entries) { + json_decref(value); + return -1; + } + + old_table = json_array_grow(array, 1, 0); + if(!old_table) { + json_decref(value); + return -1; + } + + if(old_table != array->table) { + array_copy(array->table, 0, old_table, 0, index); + array_copy(array->table, index + 1, old_table, index, + array->entries - index); + free(old_table); + } + else + array_move(array, index + 1, index, array->entries - index); + + array->table[index] = value; + array->entries++; + + return 0; +} + +int json_array_remove(json_t *json, unsigned int index) +{ + json_array_t *array; + + if(!json_is_array(json)) + return -1; + array = json_to_array(json); + + if(index >= array->entries) + return -1; + + json_decref(array->table[index]); + + array_move(array, index, index + 1, array->entries - index); + array->entries--; + + return 0; +} + +int json_array_clear(json_t *json) +{ + json_array_t *array; + unsigned int i; + + if(!json_is_array(json)) + return -1; + array = json_to_array(json); + + for(i = 0; i < array->entries; i++) + json_decref(array->table[i]); + + array->entries = 0; + return 0; +} + +int json_array_extend(json_t *json, json_t *other_json) +{ + json_array_t *array, *other; + unsigned int i; + + if(!json_is_array(json) || !json_is_array(other_json)) + return -1; + array = json_to_array(json); + other = json_to_array(other_json); + + if(!json_array_grow(array, other->entries, 1)) + return -1; + + for(i = 0; i < other->entries; i++) + json_incref(other->table[i]); + + array_copy(array->table, array->entries, other->table, 0, other->entries); + + array->entries += other->entries; + return 0; +} + +static int json_array_equal(json_t *array1, json_t *array2) +{ + unsigned int i, size; + + size = json_array_size(array1); + if(size != json_array_size(array2)) + return 0; + + for(i = 0; i < size; i++) + { + json_t *value1, *value2; + + value1 = json_array_get(array1, i); + value2 = json_array_get(array2, i); + + if(!json_equal(value1, value2)) + return 0; + } + + return 1; +} + +static json_t *json_array_copy(json_t *array) +{ + json_t *result; + unsigned int i; + + result = json_array(); + if(!result) + return NULL; + + for(i = 0; i < json_array_size(array); i++) + json_array_append(result, json_array_get(array, i)); + + return result; +} + +static json_t *json_array_deep_copy(json_t *array) +{ + json_t *result; + unsigned int i; + + result = json_array(); + if(!result) + return NULL; + + for(i = 0; i < json_array_size(array); i++) + json_array_append_new(result, json_deep_copy(json_array_get(array, i))); + + return result; +} + +/*** string ***/ + +json_t *json_string_nocheck(const char *value) +{ + json_string_t *string; + + if(!value) + return NULL; + + string = malloc(sizeof(json_string_t)); + if(!string) + return NULL; + json_init(&string->json, JSON_STRING); + + string->value = strdup(value); + if(!string->value) { + free(string); + return NULL; + } + + return &string->json; +} + +json_t *json_string(const char *value) +{ + if(!value || !utf8_check_string(value, -1)) + return NULL; + + return json_string_nocheck(value); +} + +const char *json_string_value(const json_t *json) +{ + if(!json_is_string(json)) + return NULL; + + return json_to_string(json)->value; +} + +int json_string_set_nocheck(json_t *json, const char *value) +{ + char *dup; + json_string_t *string; + + dup = strdup(value); + if(!dup) + return -1; + + string = json_to_string(json); + free(string->value); + string->value = dup; + + return 0; +} + +int json_string_set(json_t *json, const char *value) +{ + if(!value || !utf8_check_string(value, -1)) + return -1; + + return json_string_set_nocheck(json, value); +} + +static void json_delete_string(json_string_t *string) +{ + free(string->value); + free(string); +} + +static int json_string_equal(json_t *string1, json_t *string2) +{ + return strcmp(json_string_value(string1), json_string_value(string2)) == 0; +} + +static json_t *json_string_copy(json_t *string) +{ + return json_string_nocheck(json_string_value(string)); +} + + +/*** integer ***/ + +json_t *json_integer(int value) +{ + json_integer_t *integer = malloc(sizeof(json_integer_t)); + if(!integer) + return NULL; + json_init(&integer->json, JSON_INTEGER); + + integer->value = value; + return &integer->json; +} + +int json_integer_value(const json_t *json) +{ + if(!json_is_integer(json)) + return 0; + + return json_to_integer(json)->value; +} + +int json_integer_set(json_t *json, int value) +{ + if(!json_is_integer(json)) + return -1; + + json_to_integer(json)->value = value; + + return 0; +} + +static void json_delete_integer(json_integer_t *integer) +{ + free(integer); +} + +static int json_integer_equal(json_t *integer1, json_t *integer2) +{ + return json_integer_value(integer1) == json_integer_value(integer2); +} + +static json_t *json_integer_copy(json_t *integer) +{ + return json_integer(json_integer_value(integer)); +} + + +/*** real ***/ + +json_t *json_real(double value) +{ + json_real_t *real = malloc(sizeof(json_real_t)); + if(!real) + return NULL; + json_init(&real->json, JSON_REAL); + + real->value = value; + return &real->json; +} + +double json_real_value(const json_t *json) +{ + if(!json_is_real(json)) + return 0; + + return json_to_real(json)->value; +} + +int json_real_set(json_t *json, double value) +{ + if(!json_is_real(json)) + return 0; + + json_to_real(json)->value = value; + + return 0; +} + +static void json_delete_real(json_real_t *real) +{ + free(real); +} + +static int json_real_equal(json_t *real1, json_t *real2) +{ + return json_real_value(real1) == json_real_value(real2); +} + +static json_t *json_real_copy(json_t *real) +{ + return json_real(json_real_value(real)); +} + + +/*** number ***/ + +double json_number_value(const json_t *json) +{ + if(json_is_integer(json)) + return json_integer_value(json); + else if(json_is_real(json)) + return json_real_value(json); + else + return 0.0; +} + + +/*** simple values ***/ + +json_t *json_true(void) +{ + static json_t the_true = { + JSON_TRUE, + (unsigned int)-1 + }; + return &the_true; +} + + +json_t *json_false(void) +{ + static json_t the_false = { + JSON_FALSE, + (unsigned int)-1 + }; + return &the_false; +} + + +json_t *json_null(void) +{ + static json_t the_null = { + JSON_NULL, + (unsigned int)-1 + }; + return &the_null; +} + + +/*** deletion ***/ + +void json_delete(json_t *json) +{ + if(json_is_object(json)) + json_delete_object(json_to_object(json)); + + else if(json_is_array(json)) + json_delete_array(json_to_array(json)); + + else if(json_is_string(json)) + json_delete_string(json_to_string(json)); + + else if(json_is_integer(json)) + json_delete_integer(json_to_integer(json)); + + else if(json_is_real(json)) + json_delete_real(json_to_real(json)); + + /* json_delete is not called for true, false or null */ +} + + +/*** equality ***/ + +int json_equal(json_t *json1, json_t *json2) +{ + if(!json1 || !json2) + return 0; + + if(json_typeof(json1) != json_typeof(json2)) + return 0; + + /* this covers true, false and null as they are singletons */ + if(json1 == json2) + return 1; + + if(json_is_object(json1)) + return json_object_equal(json1, json2); + + if(json_is_array(json1)) + return json_array_equal(json1, json2); + + if(json_is_string(json1)) + return json_string_equal(json1, json2); + + if(json_is_integer(json1)) + return json_integer_equal(json1, json2); + + if(json_is_real(json1)) + return json_real_equal(json1, json2); + + return 0; +} + + +/*** copying ***/ + +json_t *json_copy(json_t *json) +{ + if(!json) + return NULL; + + if(json_is_object(json)) + return json_object_copy(json); + + if(json_is_array(json)) + return json_array_copy(json); + + if(json_is_string(json)) + return json_string_copy(json); + + if(json_is_integer(json)) + return json_integer_copy(json); + + if(json_is_real(json)) + return json_real_copy(json); + + if(json_is_true(json) || json_is_false(json) || json_is_null(json)) + return json; + + return NULL; +} + +json_t *json_deep_copy(json_t *json) +{ + if(!json) + return NULL; + + if(json_is_object(json)) + return json_object_deep_copy(json); + + if(json_is_array(json)) + return json_array_deep_copy(json); + + /* for the rest of the types, deep copying doesn't differ from + shallow copying */ + + if(json_is_string(json)) + return json_string_copy(json); + + if(json_is_integer(json)) + return json_integer_copy(json); + + if(json_is_real(json)) + return json_real_copy(json); + + if(json_is_true(json) || json_is_false(json) || json_is_null(json)) + return json; + + return NULL; +} diff --git a/compat/stdbool.h b/compat/stdbool.h new file mode 100644 index 0000000..31d0456 --- /dev/null +++ b/compat/stdbool.h @@ -0,0 +1,6 @@ +#pragma once + +#define false 0 +#define true 1 + +#define bool int diff --git a/compat/sys/time.h b/compat/sys/time.h new file mode 100644 index 0000000..0326e1d --- /dev/null +++ b/compat/sys/time.h @@ -0,0 +1,11 @@ +#pragma once +#ifdef __cplusplus +extern "C" +{ +#endif +int gettimeofday(struct timeval *tv, struct timezone *tz); +void usleep(__int64 usec); +#ifdef __cplusplus +} +#endif +typedef __int64 useconds_t; diff --git a/compat/thrust/CHANGELOG b/compat/thrust/CHANGELOG new file mode 100644 index 0000000..110c668 --- /dev/null +++ b/compat/thrust/CHANGELOG @@ -0,0 +1,662 @@ +####################################### +# Thrust v1.7.0 # +####################################### + +Summary + Thrust 1.7.0 introduces a new interface for controlling algorithm execution as + well as several new algorithms and performance improvements. With this new + interface, users may directly control how algorithms execute as well as details + such as the allocation of temporary storage. Key/value versions of thrust::merge + and the set operation algorithms have been added, as well stencil versions of + partitioning algorithms. thrust::tabulate has been introduced to tabulate the + values of functions taking integers. For 32b types, new CUDA merge and set + operations provide 2-15x faster performance while a new CUDA comparison sort + provides 1.3-4x faster performance. Finally, a new TBB reduce_by_key implementation + provides 80% faster performance. + +Breaking API Changes + Dispatch + Custom user backend systems' tag types must now inherit from the corresponding system's execution_policy template (e.g. thrust::cuda::execution_policy) instead + of the tag struct (e.g. thrust::cuda::tag). Otherwise, algorithm specializations will silently go unfound during dispatch. + See examples/minimal_custom_backend.cu and examples/cuda/fallback_allocator.cu for usage examples. + + thrust::advance and thrust::distance are no longer dispatched based on iterator system type and thus may no longer be customized. + + Iterators + iterator_facade and iterator_adaptor's Pointer template parameters have been eliminated. + iterator_adaptor has been moved into the thrust namespace (previously thrust::experimental::iterator_adaptor). + iterator_facade has been moved into the thrust namespace (previously thrust::experimental::iterator_facade). + iterator_core_access has been moved into the thrust namespace (previously thrust::experimental::iterator_core_access). + All iterators' nested pointer typedef (the type of the result of operator->) is now void instead of a pointer type to indicate that such expressions are currently impossible. + Floating point counting_iterators' nested difference_type typedef is now a signed integral type instead of a floating point type. + + Other + normal_distribution has been moved into the thrust::random namespace (previously thrust::random::experimental::normal_distribution). + Placeholder expressions may no longer include the comma operator. + +New Features + Execution Policies + Users may directly control the dispatch of algorithm invocations with optional execution policy arguments. + For example, instead of wrapping raw pointers allocated by cudaMalloc with thrust::device_ptr, the thrust::device execution_policy may be passed as an argument to an algorithm invocation to enable CUDA execution. + The following execution policies are supported in this version: + + thrust::host + thrust::device + thrust::cpp::par + thrust::cuda::par + thrust::omp::par + thrust::tbb::par + + Algorithms + free + get_temporary_buffer + malloc + merge_by_key + partition with stencil + partition_copy with stencil + return_temporary_buffer + set_difference_by_key + set_intersection_by_key + set_symmetric_difference_by_key + set_union_by_key + stable_partition with stencil + stable_partition_copy with stencil + tabulate + +New Examples + uninitialized_vector demonstrates how to use a custom allocator to avoid the automatic initialization of elements in thrust::device_vector. + +Other Enhancements + Authors of custom backend systems may manipulate arbitrary state during algorithm dispatch by incorporating it into their execution_policy parameter. + Users may control the allocation of temporary storage during algorithm execution by passing standard allocators as parameters via execution policies such as thrust::device. + THRUST_DEVICE_SYSTEM_CPP has been added as a compile-time target for the device backend. + CUDA merge performance is 2-15x faster. + CUDA comparison sort performance is 1.3-4x faster. + CUDA set operation performance is 1.5-15x faster. + TBB reduce_by_key performance is 80% faster. + Several algorithms have been parallelized with TBB. + Support for user allocators in vectors has been improved. + The sparse_vector example is now implemented with merge_by_key instead of sort_by_key. + Warnings have been eliminated in various contexts. + Warnings about __host__ or __device__-only functions called from __host__ __device__ functions have been eliminated in various contexts. + Documentation about algorithm requirements have been improved. + Simplified the minimal_custom_backend example. + Simplified the cuda/custom_temporary_allocation example. + Simplified the cuda/fallback_allocator example. + +Bug Fixes + #248 fix broken counting_iterator behavior with OpenMP + #231, #209 fix set operation failures with CUDA + #187 fix incorrect occupancy calculation with CUDA + #153 fix broken multigpu behavior with CUDA + #142 eliminate warning produced by thrust::random::taus88 and MSVC 2010 + #208 correctly initialize elements in temporary storage when necessary + #16 fix compilation error when sorting bool with CUDA + #10 fix ambiguous overloads of reinterpret_tag + +Known Issues + g++ versions 4.3 and lower may fail to dispatch thrust::get_temporary_buffer correctly causing infinite recursion in examples such as cuda/custom_temporary_allocation. + +Acknowledgments + Thanks to Sean Baxter, Bryan Catanzaro, and Manjunath Kudlur for contributing a faster merge implementation for CUDA. + Thanks to Sean Baxter for contributing a faster set operation implementation for CUDA. + Thanks to Cliff Woolley for contributing a correct occupancy calculation algorithm. + +####################################### +# Thrust v1.6.0 # +####################################### + +Summary + Thrust v1.6.0 provides an interface for customization and extension and a new + backend system based on the Threading Building Blocks library. With this + new interface, programmers may customize the behavior of specific algorithms + as well as control the allocation of temporary storage or invent entirely new + backends. These enhancements also allow multiple different backend systems + such as CUDA and OpenMP to coexist within a single program. Support for TBB + allows Thrust programs to integrate more naturally into applications which + may already employ the TBB task scheduler. + +Breaking API Changes + The header has been moved to + thrust::experimental::cuda::pinned_allocator has been moved to thrust::cuda::experimental::pinned_allocator + The macro THRUST_DEVICE_BACKEND has been renamed THRUST_DEVICE_SYSTEM + The macro THRUST_DEVICE_BACKEND_CUDA has been renamed THRUST_DEVICE_SYSTEM_CUDA + The macro THRUST_DEVICE_BACKEND_OMP has been renamed THRUST_DEVICE_SYSTEM_OMP + thrust::host_space_tag has been renamed thrust::host_system_tag + thrust::device_space_tag has been renamed thrust::device_system_tag + thrust::any_space_tag has been renamed thrust::any_system_tag + thrust::iterator_space has been renamed thrust::iterator_system + + +New Features + Backend Systems + Threading Building Blocks (TBB) is now supported + Functions + for_each_n + raw_reference_cast + Types + pointer + reference + +New Examples + cuda/custom_temporary_allocation + cuda/fallback_allocator + device_ptr + expand + minimal_custom_backend + raw_reference_cast + set_operations + +Other Enhancements + thrust::for_each now returns the end of the input range similar to most other algorithms + thrust::pair and thrust::tuple have swap functionality + all CUDA algorithms now support large data types + iterators may be dereferenced in user __device__ or __global__ functions + the safe use of different backend systems is now possible within a single binary + +Bug Fixes + #469 min_element and max_element algorithms no longer require a const comparison operator + +Known Issues + cudafe++.exe may crash when parsing TBB headers on Windows. + +####################################### +# Thrust v1.5.3 # +####################################### + +Summary + Small bug fixes + +Bug Fixes + Avoid warnings about potential race due to __shared__ non-POD variable + +####################################### +# Thrust v1.5.2 # +####################################### + +Summary + Small bug fixes + +Bug Fixes + Fixed warning about C-style initialization of structures + +####################################### +# Thrust v1.5.1 # +####################################### + +Summary + Small bug fixes + +Bug Fixes + Sorting data referenced by permutation_iterators on CUDA produces invalid results + +####################################### +# Thrust v1.5.0 # +####################################### + +Summary + Thrust v1.5.0 provides introduces new programmer productivity and performance + enhancements. New functionality for creating anonymous "lambda" functions has + been added. A faster host sort provides 2-10x faster performance for sorting + arithmetic types on (single-threaded) CPUs. A new OpenMP sort provides + 2.5x-3.0x speedup over the host sort using a quad-core CPU. When sorting + arithmetic types with the OpenMP backend the combined performance improvement + is 5.9x for 32-bit integers and ranges from 3.0x (64-bit types) to 14.2x + (8-bit types). A new CUDA reduce_by_key implementation provides 2-3x faster + performance. + +Breaking API Changes + device_ptr no longer unsafely converts to device_ptr without an + explicit cast. Use the expression + device_pointer_cast(static_cast(void_ptr.get())) + to convert, for example, device_ptr to device_ptr. + +New Features + Functions + stencil-less transform_if + + Types + lambda placeholders + +New Examples + lambda + +Other Enhancements + host sort is 2-10x faster for arithmetic types + OMP sort provides speedup over host sort + reduce_by_key is 2-3x faster + reduce_by_key no longer requires O(N) temporary storage + CUDA scan algorithms are 10-40% faster + host_vector and device_vector are now documented + out-of-memory exceptions now provide detailed information from CUDART + improved histogram example + device_reference now has a specialized swap + reduce_by_key and scan algorithms are compatible with discard_iterator + +Removed Functionality + +Bug Fixes + #44 allow host_vector to compile when value_type uses __align__ + #198 allow adjacent_difference to permit safe in-situ operation + #303 make thrust thread-safe + #313 avoid race conditions in device_vector::insert + #314 avoid unintended adl invocation when dispatching copy + #365 fix merge and set operation failures + +Known Issues + None + +Acknowledgments + Thanks to Manjunath Kudlur for contributing his Carbon library, from which the lambda functionality is derived. + Thanks to Jean-Francois Bastien for suggesting a fix for issue 303. + +####################################### +# Thrust v1.4.0 # +####################################### + +Summary + Thrust v1.4.0 provides support for CUDA 4.0 in addition to many feature + and performance improvements. New set theoretic algorithms operating on + sorted sequences have been added. Additionally, a new fancy iterator + allows discarding redundant or otherwise unnecessary output from + algorithms, conserving memory storage and bandwidth. + +Breaking API Changes + Eliminations + thrust/is_sorted.h + thrust/utility.h + thrust/set_intersection.h + thrust/experimental/cuda/ogl_interop_allocator.h and the functionality therein + thrust::deprecated::copy_when + thrust::deprecated::absolute_value + +New Features + Functions + copy_n + merge + set_difference + set_symmetric_difference + set_union + + Types + discard_iterator + + Device support + Compute Capability 2.1 GPUs + +New Examples + run_length_decoding + +Other Enhancements + Compilation warnings are substantially reduced in various contexts. + The compilation time of thrust::sort, thrust::stable_sort, thrust::sort_by_key, + and thrust::stable_sort_by_key are substantially reduced. + A fast sort implementation is used when sorting primitive types with thrust::greater. + The performance of thrust::set_intersection is improved. + The performance of thrust::fill is improved on SM 1.x devices. + A code example is now provided in each algorithm's documentation. + thrust::reverse now operates in-place + +Removed Functionality + thrust::deprecated::copy_when + thrust::deprecated::absolute_value + thrust::experimental::cuda::ogl_interop_allocator + thrust::gather and thrust::scatter from host to device and vice versa are no longer supported. + Operations which modify the elements of a thrust::device_vector are no longer + available from source code compiled without nvcc when the device backend is CUDA. + Instead, use the idiom from the cpp_interop example. + +Bug Fixes + #212 set_intersection works correctly for large input sizes. + #275 counting_iterator and constant_iterator work correctly with OpenMP as the + backend when compiling with optimization + #256 min and max correctly return their first argument as a tie-breaker + #248 NDEBUG is interpreted correctly + +Known Issues + nvcc may generate code containing warnings when compiling some Thrust algorithms. + When compiling with -arch=sm_1x, some Thrust algorithms may cause nvcc to issue + benign pointer advisories. + When compiling with -arch=sm_1x and -G, some Thrust algorithms may fail to execute correctly. + thrust::inclusive_scan, thrust::exclusive_scan, thrust::inclusive_scan_by_key, + and thrust::exclusive_scan_by_key are currently incompatible with thrust::discard_iterator. + +Acknowledgments + Thanks to David Tarjan for improving the performance of set_intersection. + Thanks to Duane Merrill for continued help with sort. + Thanks to Nathan Whitehead for help with CUDA Toolkit integration. + +####################################### +# Thrust v1.3.0 # +####################################### + +Summary + Thrust v1.3.0 provides support for CUDA 3.2 in addition to many feature + and performance enhancements. + + Performance of the sort and sort_by_key algorithms is improved by as much + as 3x in certain situations. The performance of stream compaction algorithms, + such as copy_if, is improved by as much as 2x. Reduction performance is + also improved, particularly for small input sizes. + + CUDA errors are now converted to runtime exceptions using the system_error + interface. Combined with a debug mode, also new in v1.3, runtime errors + can be located with greater precision. + + Lastly, a few header files have been consolidated or renamed for clarity. + See the deprecations section below for additional details. + + +Breaking API Changes + Promotions + thrust::experimental::inclusive_segmented_scan has been renamed thrust::inclusive_scan_by_key and exposes a different interface + thrust::experimental::exclusive_segmented_scan has been renamed thrust::exclusive_scan_by_key and exposes a different interface + thrust::experimental::partition_copy has been renamed thrust::partition_copy and exposes a different interface + thrust::next::gather has been renamed thrust::gather + thrust::next::gather_if has been renamed thrust::gather_if + thrust::unique_copy_by_key has been renamed thrust::unique_by_key_copy + Deprecations + thrust::copy_when has been renamed thrust::deprecated::copy_when + thrust::absolute_value has been renamed thrust::deprecated::absolute_value + The header thrust/set_intersection.h is now deprecated; use thrust/set_operations.h instead + The header thrust/utility.h is now deprecated; use thrust/swap.h instead + The header thrust/swap_ranges.h is now deprecated; use thrust/swap.h instead + Eliminations + thrust::deprecated::gather + thrust::deprecated::gather_if + thrust/experimental/arch.h and the functions therein + thrust/sorting/merge_sort.h + thrust/sorting/radix_sort.h + +New Features + Functions + exclusive_scan_by_key + find + find_if + find_if_not + inclusive_scan_by_key + is_partitioned + is_sorted_until + mismatch + partition_point + reverse + reverse_copy + stable_partition_copy + + Types + system_error and related types + experimental::cuda::ogl_interop_allocator + bit_and, bit_or, and bit_xor + + Device support + gf104-based GPUs + +New Examples + opengl_interop.cu + repeated_range.cu + simple_moving_average.cu + sparse_vector.cu + strided_range.cu + +Other Enhancements + Performance of thrust::sort and thrust::sort_by_key is substantially improved for primitive key types + Performance of thrust::copy_if is substantially improved + Performance of thrust::reduce and related reductions is improved + THRUST_DEBUG mode added + Callers of Thrust functions may detect error conditions by catching thrust::system_error, which derives from std::runtime_error + The number of compiler warnings generated by Thrust has been substantially reduced + Comparison sort now works correctly for input sizes > 32M + min & max usage no longer collides with definitions + Compiling against the OpenMP backend no longer requires nvcc + Performance of device_vector initialized in .cpp files is substantially improved in common cases + Performance of thrust::sort_by_key on the host is substantially improved + +Removed Functionality + nvcc 2.3 is no longer supported + +Bug Fixes + Debug device code now compiles correctly + thrust::uninitialized_copy and thrust::unintialized_fill now dispatch constructors on the device rather than the host + +Known Issues + #212 set_intersection is known to fail for large input sizes + partition_point is known to fail for 64b types with nvcc 3.2 + +Acknowledgments + Thanks to Duane Merrill for contributing a fast CUDA radix sort implementation + Thanks to Erich Elsen for contributing an implementation of find_if + Thanks to Andrew Corrigan for contributing changes which allow the OpenMP backend to compile in the absence of nvcc + Thanks to Andrew Corrigan, Cliff Wooley, David Coeurjolly, Janick Martinez Esturo, John Bowers, Maxim Naumov, Michael Garland, and Ryuta Suzuki for bug reports + Thanks to Cliff Woolley for help with testing + +####################################### +# Thrust v1.2.1 # +####################################### + +Summary + Small fixes for compatibility with CUDA 3.1 + +Known Issues + inclusive_scan & exclusive_scan may fail with very large types + the Microsoft compiler may fail to compile code using both sort and binary search algorithms + uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device + # 109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads + default_random_engine::discard is not accelerated with nvcc 2.3 + nvcc 3.1 may fail to compile code using types derived from thrust::subtract_with_carry_engine, such as thrust::ranlux24 & thrust::ranlux48. + +####################################### +# Thrust v1.2.0 # +####################################### + +Summary + Thrust v1.2 introduces support for compilation to multicore CPUs + and the Ocelot virtual machine, and several new facilities for + pseudo-random number generation. New algorithms such as set + intersection and segmented reduction have also been added. Lastly, + improvements to the robustness of the CUDA backend ensure + correctness across a broad set of (uncommon) use cases. + +Breaking API Changes + thrust::gather's interface was incorrect and has been removed. + The old interface is deprecated but will be preserved for Thrust + version 1.2 at thrust::deprecated::gather & + thrust::deprecated::gather_if. The new interface is provided at + thrust::next::gather & thrust::next::gather_if. The new interface + will be promoted to thrust:: in Thrust version 1.3. For more details, + please refer to this thread: + http://groups.google.com/group/thrust-users/browse_thread/thread/f5f0583cb97b51fd + + The thrust::sorting namespace has been deprecated in favor of the + top-level sorting functions, such as thrust::sort() and + thrust::sort_by_key(). + +New Features + Functions + reduce_by_key + set_intersection + tie + unique_copy + unique_by_key + unique_copy_by_key + + Types + Random Number Generation + discard_block_engine + default_random_engine + linear_congruential_engine + linear_feedback_shift_engine + minstd_rand + minstd_rand0 + normal_distribution (experimental) + ranlux24 + ranlux48 + ranlux24_base + ranlux48_base + subtract_with_carry_engine + taus88 + uniform_int_distribution + uniform_real_distribution + xor_combine_engine + Functionals + project1st + project2nd + + Fancy Iterators + permutation_iterator + reverse_iterator + + Device support + Add support for multicore CPUs via OpenMP + Add support for Fermi-class GPUs + Add support for Ocelot virtual machine + +New Examples + cpp_integration + histogram + mode + monte_carlo + monte_carlo_disjoint_sequences + padded_grid_reduction + permutation_iterator + row_sum + run_length_encoding + segmented_scan + stream_compaction + summary_statistics + transform_iterator + word_count + +Other Enhancements + vector functions operator!=, rbegin, crbegin, rend, crend, data, & shrink_to_fit + integer sorting performance is improved when max is large but (max - min) is small and when min is negative + performance of inclusive_scan() and exclusive_scan() is improved by 20-25% for primitive types + support for nvcc 3.0 + +Removed Functionality + removed support for equal between host & device sequences + removed support for gather() and scatter() between host & device sequences + +Bug Fixes + # 8 cause a compiler error if the required compiler is not found rather than a mysterious error at link time + # 42 device_ptr & device_reference are classes rather than structs, eliminating warnings on certain platforms + # 46 gather & scatter handle any space iterators correctly + # 51 thrust::experimental::arch functions gracefully handle unrecognized GPUs + # 52 avoid collisions with common user macros such as BLOCK_SIZE + # 62 provide better documentation for device_reference + # 68 allow built-in CUDA vector types to work with device_vector in pure C++ mode + # 102 eliminated a race condition in device_vector::erase + various compilation warnings eliminated + +Known Issues + inclusive_scan & exclusive_scan may fail with very large types + the Microsoft compiler may fail to compile code using both sort and binary search algorithms + uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device + # 109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads + default_random_engine::discard is not accelerated with nvcc 2.3 + +Acknowledgments + Thanks to Gregory Diamos for contributing a CUDA implementation of set_intersection + Thanks to Ryuta Suzuki & Gregory Diamos for rigorously testing Thrust's unit tests and examples against Ocelot + Thanks to Tom Bradley for contributing an implementation of normal_distribution + Thanks to Joseph Rhoads for contributing the example summary_statistics + +####################################### +# Thrust v1.1.1 # +####################################### + +Summary + Small fixes for compatibility with CUDA 2.3a and Mac OSX Snow Leopard. + +####################################### +# Thrust v1.1.0 # +####################################### + +Summary + Thrust v1.1 introduces fancy iterators, binary search functions, and + several specialized reduction functions. Experimental support for + segmented scan has also been added. + +Breaking API Changes + counting_iterator has been moved into the thrust namespace (previously thrust::experimental) + +New Features + Functions + copy_if + lower_bound + upper_bound + vectorized lower_bound + vectorized upper_bound + equal_range + binary_search + vectorized binary_search + all_of + any_of + none_of + minmax_element + advance + inclusive_segmented_scan (experimental) + exclusive_segmented_scan (experimental) + + Types + pair + tuple + device_malloc_allocator + + Fancy Iterators + constant_iterator + counting_iterator + transform_iterator + zip_iterator + +New Examples + computing the maximum absolute difference between vectors + computing the bounding box of a two-dimensional point set + sorting multiple arrays together (lexicographical sorting) + constructing a summed area table + using zip_iterator to mimic an array of structs + using constant_iterator to increment array values + +Other Enhancements + added pinned memory allocator (experimental) + added more methods to host_vector & device_vector (issue #4) + added variant of remove_if with a stencil argument (issue #29) + scan and reduce use cudaFuncGetAttributes to determine grid size + exceptions are reported when temporary device arrays cannot be allocated + +Bug Fixes + #5 make vector work for larger data types + #9 stable_partition_copy doesn't respect OutputIterator concept semantics + #10 scans should return OutputIterator + #16 make algorithms work for larger data types + #27 dispatch radix_sort even when comp=less is explicitly provided + +Known Issues + Using functors with Thrust entry points may not compile on Mac OSX with gcc-4.0.1 + uninitialized_copy & uninitialized_fill dispatch constructors on the host rather than the device. + inclusive_scan, inclusive_scan_by_key, exclusive_scan, and exclusive_scan_by_key may fail when used with large types with the CUDA 3.1 driver + + +####################################### +# Thrust v1.0.0 # +####################################### + +Breaking API changes + Rename top level namespace komrade to thrust. + Move partition_copy() & stable_partition_copy() into thrust::experimental namespace until we can easily provide the standard interface. + Rename range() to sequence() to avoid collision with Boost.Range. + Rename copy_if() to copy_when() due to semantic differences with C++0x copy_if(). + +New Features + Add C++0x style cbegin() & cend() methods to host_vector & device_vector. + Add transform_if function. + Add stencil versions of replace_if() & replace_copy_if(). + Allow counting_iterator to work with for_each(). + Allow types with constructors in comparison sort & reduce. + +Other Enhancements + merge_sort and stable_merge_sort are now 2 to 5x faster when executed on the parallel device. + +Bug fixes + Workaround an issue where an incremented iterator causes nvcc to crash. (Komrade issue #6) + Fix an issue where const_iterators could not be passed to transform. (Komrade issue #7) + diff --git a/compat/thrust/adjacent_difference.h b/compat/thrust/adjacent_difference.h new file mode 100644 index 0000000..772b5f9 --- /dev/null +++ b/compat/thrust/adjacent_difference.h @@ -0,0 +1,244 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file adjacent_difference.h + * \brief Compute difference between consecutive elements of a range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup transformations Transformations + * \{ + */ + + +/*! \p adjacent_difference calculates the differences of adjacent elements in the + * range [first, last). That is, \*first is assigned to + * \*result, and, for each iterator \p i in the range + * [first + 1, last), the difference of \*i and *(i - 1) + * is assigned to \*(result + (i - first)). + * + * This version of \p adjacent_difference uses operator- to calculate + * differences. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \return The iterator result + (last - first) + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \c x and \c y are objects of \p InputIterator's \c value_type, then \c x - \c is defined, + * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, + * and the return type of x - y is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \remark Note that \p result is permitted to be the same iterator as \p first. This is + * useful for computing differences "in place". + * + * The following code snippet demonstrates how to use \p adjacent_difference to compute + * the difference between adjacent elements of a range using the \p thrust::device execution policy: + * + * \code + * #include + * #include + * #include + * ... + * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; + * thrust::device_vector d_data(h_data, h_data + 8); + * thrust::device_vector d_result(8); + * + * thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin()); + * + * // d_result is now [1, 1, -1, 1, -1, 1, -1, 1] + * \endcode + * + * \see http://www.sgi.com/tech/stl/adjacent_difference.html + * \see inclusive_scan + */ +template +OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result); + +/*! \p adjacent_difference calculates the differences of adjacent elements in the + * range [first, last). That is, *first is assigned to + * \*result, and, for each iterator \p i in the range + * [first + 1, last), binary_op(\*i, \*(i - 1)) is assigned to + * \*(result + (i - first)). + * + * This version of \p adjacent_difference uses the binary function \p binary_op to + * calculate differences. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \param binary_op The binary function used to compute differences. + * \return The iterator result + (last - first) + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type, + * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam BinaryFunction's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. + * + * \remark Note that \p result is permitted to be the same iterator as \p first. This is + * useful for computing differences "in place". + * + * The following code snippet demonstrates how to use \p adjacent_difference to compute + * the sum between adjacent elements of a range using the \p thrust::device execution policy: + * + * \code + * #include + * #include + * #include + * #include + * ... + * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; + * thrust::device_vector d_data(h_data, h_data + 8); + * thrust::device_vector d_result(8); + * + * thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin(), thrust::plus()); + * + * // d_data is now [1, 3, 3, 3, 3, 3, 3, 3] + * \endcode + * + * \see http://www.sgi.com/tech/stl/adjacent_difference.html + * \see inclusive_scan + */ +template +OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op); + +/*! \p adjacent_difference calculates the differences of adjacent elements in the + * range [first, last). That is, \*first is assigned to + * \*result, and, for each iterator \p i in the range + * [first + 1, last), the difference of \*i and *(i - 1) + * is assigned to \*(result + (i - first)). + * + * This version of \p adjacent_difference uses operator- to calculate + * differences. + * + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \return The iterator result + (last - first) + * + * \tparam InputIterator is a model of Input Iterator, + * and \c x and \c y are objects of \p InputIterator's \c value_type, then \c x - \c is defined, + * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, + * and the return type of x - y is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \remark Note that \p result is permitted to be the same iterator as \p first. This is + * useful for computing differences "in place". + * + * The following code snippet demonstrates how to use \p adjacent_difference to compute + * the difference between adjacent elements of a range. + * + * \code + * #include + * #include + * ... + * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; + * thrust::device_vector d_data(h_data, h_data + 8); + * thrust::device_vector d_result(8); + * + * thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin()); + * + * // d_result is now [1, 1, -1, 1, -1, 1, -1, 1] + * \endcode + * + * \see http://www.sgi.com/tech/stl/adjacent_difference.html + * \see inclusive_scan + */ +template +OutputIterator adjacent_difference(InputIterator first, InputIterator last, + OutputIterator result); + +/*! \p adjacent_difference calculates the differences of adjacent elements in the + * range [first, last). That is, *first is assigned to + * \*result, and, for each iterator \p i in the range + * [first + 1, last), binary_op(\*i, \*(i - 1)) is assigned to + * \*(result + (i - first)). + * + * This version of \p adjacent_difference uses the binary function \p binary_op to + * calculate differences. + * + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \param binary_op The binary function used to compute differences. + * \return The iterator result + (last - first) + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type, + * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam BinaryFunction's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. + * + * \remark Note that \p result is permitted to be the same iterator as \p first. This is + * useful for computing differences "in place". + * + * The following code snippet demonstrates how to use \p adjacent_difference to compute + * the sum between adjacent elements of a range. + * + * \code + * #include + * #include + * #include + * ... + * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; + * thrust::device_vector d_data(h_data, h_data + 8); + * thrust::device_vector d_result(8); + * + * thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin(), thrust::plus()); + * + * // d_data is now [1, 3, 3, 3, 3, 3, 3, 3] + * \endcode + * + * \see http://www.sgi.com/tech/stl/adjacent_difference.html + * \see inclusive_scan + */ +template +OutputIterator adjacent_difference(InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op); + +/*! \} + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/advance.h b/compat/thrust/advance.h new file mode 100644 index 0000000..e7f60b0 --- /dev/null +++ b/compat/thrust/advance.h @@ -0,0 +1,73 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file advance.h + * \brief Advance an iterator by a given distance. + */ + +#pragma once + +#include + +namespace thrust +{ + + +/*! \addtogroup iterators + * \{ + */ + +/*! \p advance(i, n) increments the iterator \p i by the distance \p n. + * If n > 0 it is equivalent to executing ++i \p n + * times, and if n < 0 it is equivalent to executing --i + * \p n times. If n == 0, the call has no effect. + * + * \param i The iterator to be advanced. + * \param n The distance by which to advance the iterator. + * + * \tparam InputIterator is a model of Input Iterator. + * \tparam Distance is an integral type that is convertible to \p InputIterator's distance type. + * + * \pre \p n shall be negative only for bidirectional and random access iterators. + * + * The following code snippet demonstrates how to use \p advance to increment + * an iterator a given number of times. + * + * \code + * #include + * #include + * ... + * thrust::device_vector vec(13); + * thrust::device_vector::iterator iter = vec.begin(); + * + * thrust::advance(iter, 7); + * + * // iter - vec.begin() == 7 + * \endcode + * + * \see http://www.sgi.com/tech/stl/advance.html + */ +template +void advance(InputIterator& i, Distance n); + +/*! \} // end iterators + */ + +} // end thrust + +#include + diff --git a/compat/thrust/binary_search.h b/compat/thrust/binary_search.h new file mode 100644 index 0000000..d2ac5a6 --- /dev/null +++ b/compat/thrust/binary_search.h @@ -0,0 +1,1888 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file binary_search.h + * \brief Search for values in sorted ranges. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + + +/*! \addtogroup searching + * \ingroup algorithms + * \{ + */ + + +/*! \addtogroup binary_search Binary Search + * \ingroup searching + * \{ + */ + + +////////////////////// +// Scalar Functions // +////////////////////// + + +/*! \p lower_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the first position where value could be + * inserted without violating the ordering. This version of + * \p lower_bound uses operator< for comparison and returns + * the furthermost iterator \c i in [first, last) such that, + * for every iterator \c j in [first, i), *j < value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return The furthermost iterator \c i, such that *i < value. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::lower_bound(thrust::device, input.begin(), input.end(), 0); // returns input.begin() + * thrust::lower_bound(thrust::device, input.begin(), input.end(), 1); // returns input.begin() + 1 + * thrust::lower_bound(thrust::device, input.begin(), input.end(), 2); // returns input.begin() + 1 + * thrust::lower_bound(thrust::device, input.begin(), input.end(), 3); // returns input.begin() + 2 + * thrust::lower_bound(thrust::device, input.begin(), input.end(), 8); // returns input.begin() + 4 + * thrust::lower_bound(thrust::device, input.begin(), input.end(), 9); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value); + + +/*! \p lower_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the first position where value could be + * inserted without violating the ordering. This version of + * \p lower_bound uses operator< for comparison and returns + * the furthermost iterator \c i in [first, last) such that, + * for every iterator \c j in [first, i), *j < value. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return The furthermost iterator \c i, such that *i < value. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::lower_bound(input.begin(), input.end(), 0); // returns input.begin() + * thrust::lower_bound(input.begin(), input.end(), 1); // returns input.begin() + 1 + * thrust::lower_bound(input.begin(), input.end(), 2); // returns input.begin() + 1 + * thrust::lower_bound(input.begin(), input.end(), 3); // returns input.begin() + 2 + * thrust::lower_bound(input.begin(), input.end(), 8); // returns input.begin() + 4 + * thrust::lower_bound(input.begin(), input.end(), 9); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator lower_bound(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value); + + +/*! \p lower_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the first position where value could be + * inserted without violating the ordering. This version of + * \p lower_bound uses function object \c comp for comparison + * and returns the furthermost iterator \c i in [first, last) + * such that, for every iterator \c j in [first, i), + * comp(*j, value) is \c true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return The furthermost iterator \c i, such that comp(*i, value) is \c true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::lower_bound(input.begin(), input.end(), 0, thrust::less()); // returns input.begin() + * thrust::lower_bound(input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 + * thrust::lower_bound(input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 1 + * thrust::lower_bound(input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 + * thrust::lower_bound(input.begin(), input.end(), 8, thrust::less()); // returns input.begin() + 4 + * thrust::lower_bound(input.begin(), input.end(), 9, thrust::less()); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value, + StrictWeakOrdering comp); + + +/*! \p lower_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the first position where value could be + * inserted without violating the ordering. This version of + * \p lower_bound uses function object \c comp for comparison + * and returns the furthermost iterator \c i in [first, last) + * such that, for every iterator \c j in [first, i), + * comp(*j, value) is \c true. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return The furthermost iterator \c i, such that comp(*i, value) is \c true. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::lower_bound(input.begin(), input.end(), 0, thrust::less()); // returns input.begin() + * thrust::lower_bound(input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 + * thrust::lower_bound(input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 1 + * thrust::lower_bound(input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 + * thrust::lower_bound(input.begin(), input.end(), 8, thrust::less()); // returns input.begin() + 4 + * thrust::lower_bound(input.begin(), input.end(), 9, thrust::less()); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator lower_bound(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp); + + +/*! \p upper_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the last position where value could be + * inserted without violating the ordering. This version of + * \p upper_bound uses operator< for comparison and returns + * the furthermost iterator \c i in [first, last) such that, + * for every iterator \c j in [first, i), value < *j + * is \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return The furthermost iterator \c i, such that value < *i is \c false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p upper_bound + * to search for values in a ordered range using the \p thrust::device execution policy for parallelism: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 0); // returns input.begin() + 1 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 1); // returns input.begin() + 1 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 2); // returns input.begin() + 2 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 3); // returns input.begin() + 2 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 8); // returns input.end() + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 9); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p lower_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value); + + +/*! \p upper_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the last position where value could be + * inserted without violating the ordering. This version of + * \p upper_bound uses operator< for comparison and returns + * the furthermost iterator \c i in [first, last) such that, + * for every iterator \c j in [first, i), value < *j + * is \c false. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return The furthermost iterator \c i, such that value < *i is \c false. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p upper_bound + * to search for values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::upper_bound(input.begin(), input.end(), 0); // returns input.begin() + 1 + * thrust::upper_bound(input.begin(), input.end(), 1); // returns input.begin() + 1 + * thrust::upper_bound(input.begin(), input.end(), 2); // returns input.begin() + 2 + * thrust::upper_bound(input.begin(), input.end(), 3); // returns input.begin() + 2 + * thrust::upper_bound(input.begin(), input.end(), 8); // returns input.end() + * thrust::upper_bound(input.begin(), input.end(), 9); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p lower_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator upper_bound(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value); + + +/*! \p upper_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the last position where value could be + * inserted without violating the ordering. This version of + * \p upper_bound uses function object \c comp for comparison and returns + * the furthermost iterator \c i in [first, last) such that, + * for every iterator \c j in [first, i), comp(value, *j) + * is \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return The furthermost iterator \c i, such that comp(value, *i) is \c false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p upper_bound + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 0, thrust::less()); // returns input.begin() + 1 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 2 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 8, thrust::less()); // returns input.end() + * thrust::upper_bound(thrust::device, input.begin(), input.end(), 9, thrust::less()); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p lower_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value, + StrictWeakOrdering comp); + +/*! \p upper_bound is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * Specifically, it returns the last position where value could be + * inserted without violating the ordering. This version of + * \p upper_bound uses function object \c comp for comparison and returns + * the furthermost iterator \c i in [first, last) such that, + * for every iterator \c j in [first, i), comp(value, *j) + * is \c false. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return The furthermost iterator \c i, such that comp(value, *i) is \c false. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p upper_bound + * to search for values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::upper_bound(input.begin(), input.end(), 0, thrust::less()); // returns input.begin() + 1 + * thrust::upper_bound(input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 + * thrust::upper_bound(input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 2 + * thrust::upper_bound(input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 + * thrust::upper_bound(input.begin(), input.end(), 8, thrust::less()); // returns input.end() + * thrust::upper_bound(input.begin(), input.end(), 9, thrust::less()); // returns input.end() + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p lower_bound + * \see \p equal_range + * \see \p binary_search + */ +template +ForwardIterator upper_bound(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp); + + +/*! \p binary_search is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. Specifically, this version returns \c true if and only if + * there exists an iterator \c i in [first, last) such that + * *i < value and value < *i are both \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return \c true if an equivalent element exists in [first, last), otherwise \c false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::binary_search(thrust::device, input.begin(), input.end(), 0); // returns true + * thrust::binary_search(thrust::device, input.begin(), input.end(), 1); // returns false + * thrust::binary_search(thrust::device, input.begin(), input.end(), 2); // returns true + * thrust::binary_search(thrust::device, input.begin(), input.end(), 3); // returns false + * thrust::binary_search(thrust::device, input.begin(), input.end(), 8); // returns true + * thrust::binary_search(thrust::device, input.begin(), input.end(), 9); // returns false + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +bool binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value); + + +/*! \p binary_search is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. Specifically, this version returns \c true if and only if + * there exists an iterator \c i in [first, last) such that + * *i < value and value < *i are both \c false. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return \c true if an equivalent element exists in [first, last), otherwise \c false. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::binary_search(input.begin(), input.end(), 0); // returns true + * thrust::binary_search(input.begin(), input.end(), 1); // returns false + * thrust::binary_search(input.begin(), input.end(), 2); // returns true + * thrust::binary_search(input.begin(), input.end(), 3); // returns false + * thrust::binary_search(input.begin(), input.end(), 8); // returns true + * thrust::binary_search(input.begin(), input.end(), 9); // returns false + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +bool binary_search(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value); + + +/*! \p binary_search is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. Specifically, this version returns \c true if and only if + * there exists an iterator \c i in [first, last) such that + * comp(*i, value) and comp(value, *i) are both \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return \c true if an equivalent element exists in [first, last), otherwise \c false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::binary_search(thrust::device, input.begin(), input.end(), 0, thrust::less()); // returns true + * thrust::binary_search(thrust::device, input.begin(), input.end(), 1, thrust::less()); // returns false + * thrust::binary_search(thrust::device, input.begin(), input.end(), 2, thrust::less()); // returns true + * thrust::binary_search(thrust::device, input.begin(), input.end(), 3, thrust::less()); // returns false + * thrust::binary_search(thrust::device, input.begin(), input.end(), 8, thrust::less()); // returns true + * thrust::binary_search(thrust::device, input.begin(), input.end(), 9, thrust::less()); // returns false + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +bool binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp); + + +/*! \p binary_search is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. Specifically, this version returns \c true if and only if + * there exists an iterator \c i in [first, last) such that + * comp(*i, value) and comp(value, *i) are both \c false. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return \c true if an equivalent element exists in [first, last), otherwise \c false. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::binary_search(input.begin(), input.end(), 0, thrust::less()); // returns true + * thrust::binary_search(input.begin(), input.end(), 1, thrust::less()); // returns false + * thrust::binary_search(input.begin(), input.end(), 2, thrust::less()); // returns true + * thrust::binary_search(input.begin(), input.end(), 3, thrust::less()); // returns false + * thrust::binary_search(input.begin(), input.end(), 8, thrust::less()); // returns true + * thrust::binary_search(input.begin(), input.end(), 9, thrust::less()); // returns false + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +bool binary_search(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp); + + +/*! \p equal_range is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). The + * value returned by \p equal_range is essentially a combination of + * the values returned by \p lower_bound and \p upper_bound: it returns + * a \p pair of iterators \c i and \c j such that \c i is the first + * position where value could be inserted without violating the + * ordering and \c j is the last position where value could be inserted + * without violating the ordering. It follows that every element in the + * range [i, j) is equivalent to value, and that + * [i, j) is the largest subrange of [first, last) that + * has this property. + * + * This version of \p equal_range returns a \p pair of iterators + * [i, j), where \c i is the furthermost iterator in + * [first, last) such that, for every iterator \c k in + * [first, i), *k < value. \c j is the furthermost + * iterator in [first, last) such that, for every iterator + * \c k in [first, j), value < *k is \c false. + * For every iterator \c k in [i, j), neither + * value < *k nor *k < value is \c true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return A \p pair of iterators [i, j) that define the range of equivalent elements. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p equal_range + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::equal_range(thrust::device, input.begin(), input.end(), 0); // returns [input.begin(), input.begin() + 1) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 1); // returns [input.begin() + 1, input.begin() + 1) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 2); // returns [input.begin() + 1, input.begin() + 2) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 3); // returns [input.begin() + 2, input.begin() + 2) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 8); // returns [input.begin() + 4, input.end) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 9); // returns [input.end(), input.end) + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal_range.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p binary_search + */ +template +thrust::pair +equal_range(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value); + + +/*! \p equal_range is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). The + * value returned by \p equal_range is essentially a combination of + * the values returned by \p lower_bound and \p upper_bound: it returns + * a \p pair of iterators \c i and \c j such that \c i is the first + * position where value could be inserted without violating the + * ordering and \c j is the last position where value could be inserted + * without violating the ordering. It follows that every element in the + * range [i, j) is equivalent to value, and that + * [i, j) is the largest subrange of [first, last) that + * has this property. + * + * This version of \p equal_range returns a \p pair of iterators + * [i, j), where \c i is the furthermost iterator in + * [first, last) such that, for every iterator \c k in + * [first, i), *k < value. \c j is the furthermost + * iterator in [first, last) such that, for every iterator + * \c k in [first, j), value < *k is \c false. + * For every iterator \c k in [i, j), neither + * value < *k nor *k < value is \c true. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \return A \p pair of iterators [i, j) that define the range of equivalent elements. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam LessThanComparable is a model of LessThanComparable. + * + * The following code snippet demonstrates how to use \p equal_range + * to search for values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::equal_range(input.begin(), input.end(), 0); // returns [input.begin(), input.begin() + 1) + * thrust::equal_range(input.begin(), input.end(), 1); // returns [input.begin() + 1, input.begin() + 1) + * thrust::equal_range(input.begin(), input.end(), 2); // returns [input.begin() + 1, input.begin() + 2) + * thrust::equal_range(input.begin(), input.end(), 3); // returns [input.begin() + 2, input.begin() + 2) + * thrust::equal_range(input.begin(), input.end(), 8); // returns [input.begin() + 4, input.end) + * thrust::equal_range(input.begin(), input.end(), 9); // returns [input.end(), input.end) + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal_range.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p binary_search + */ +template +thrust::pair +equal_range(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value); + + +/*! \p equal_range is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). The + * value returned by \p equal_range is essentially a combination of + * the values returned by \p lower_bound and \p upper_bound: it returns + * a \p pair of iterators \c i and \c j such that \c i is the first + * position where value could be inserted without violating the + * ordering and \c j is the last position where value could be inserted + * without violating the ordering. It follows that every element in the + * range [i, j) is equivalent to value, and that + * [i, j) is the largest subrange of [first, last) that + * has this property. + * + * This version of \p equal_range returns a \p pair of iterators + * [i, j). \c i is the furthermost iterator in + * [first, last) such that, for every iterator \c k in + * [first, i), comp(*k, value) is \c true. + * \c j is the furthermost iterator in [first, last) such + * that, for every iterator \c k in [first, last), + * comp(value, *k) is \c false. For every iterator \c k + * in [i, j), neither comp(value, *k) nor + * comp(*k, value) is \c true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return A \p pair of iterators [i, j) that define the range of equivalent elements. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p equal_range + * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::equal_range(thrust::device, input.begin(), input.end(), 0, thrust::less()); // returns [input.begin(), input.begin() + 1) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 1, thrust::less()); // returns [input.begin() + 1, input.begin() + 1) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 2, thrust::less()); // returns [input.begin() + 1, input.begin() + 2) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 3, thrust::less()); // returns [input.begin() + 2, input.begin() + 2) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 8, thrust::less()); // returns [input.begin() + 4, input.end) + * thrust::equal_range(thrust::device, input.begin(), input.end(), 9, thrust::less()); // returns [input.end(), input.end) + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal_range.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p binary_search + */ +template +thrust::pair +equal_range(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp); + + +/*! \p equal_range is a version of binary search: it attempts to find + * the element value in an ordered range [first, last). The + * value returned by \p equal_range is essentially a combination of + * the values returned by \p lower_bound and \p upper_bound: it returns + * a \p pair of iterators \c i and \c j such that \c i is the first + * position where value could be inserted without violating the + * ordering and \c j is the last position where value could be inserted + * without violating the ordering. It follows that every element in the + * range [i, j) is equivalent to value, and that + * [i, j) is the largest subrange of [first, last) that + * has this property. + * + * This version of \p equal_range returns a \p pair of iterators + * [i, j). \c i is the furthermost iterator in + * [first, last) such that, for every iterator \c k in + * [first, i), comp(*k, value) is \c true. + * \c j is the furthermost iterator in [first, last) such + * that, for every iterator \c k in [first, last), + * comp(value, *k) is \c false. For every iterator \c k + * in [i, j), neither comp(value, *k) nor + * comp(*k, value) is \c true. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param value The value to be searched. + * \param comp The comparison operator. + * \return A \p pair of iterators [i, j) that define the range of equivalent elements. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam T is comparable to \p ForwardIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p equal_range + * to search for values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::equal_range(input.begin(), input.end(), 0, thrust::less()); // returns [input.begin(), input.begin() + 1) + * thrust::equal_range(input.begin(), input.end(), 1, thrust::less()); // returns [input.begin() + 1, input.begin() + 1) + * thrust::equal_range(input.begin(), input.end(), 2, thrust::less()); // returns [input.begin() + 1, input.begin() + 2) + * thrust::equal_range(input.begin(), input.end(), 3, thrust::less()); // returns [input.begin() + 2, input.begin() + 2) + * thrust::equal_range(input.begin(), input.end(), 8, thrust::less()); // returns [input.begin() + 4, input.end) + * thrust::equal_range(input.begin(), input.end(), 9, thrust::less()); // returns [input.end(), input.end) + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal_range.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p binary_search + */ +template +thrust::pair +equal_range(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp); + + +/*! \addtogroup vectorized_binary_search Vectorized Searches + * \ingroup binary_search + * \{ + */ + + +////////////////////// +// Vector Functions // +////////////////////// + + +/*! \p lower_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of first position where value could + * be inserted without violating the ordering. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::lower_bound(thrust::device, + * input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin()); + * + * // output is now [0, 1, 1, 2, 4, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result); + + +/*! \p lower_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of first position where value could + * be inserted without violating the ordering. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::lower_bound(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin()); + * + * // output is now [0, 1, 1, 2, 4, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator lower_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result); + + +/*! \p lower_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of first position where value could + * be inserted without violating the ordering. This version of + * \p lower_bound uses function object \c comp for comparison. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * \param comp The comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::lower_bound(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin(), + * thrust::less()); + * + * // output is now [0, 1, 1, 2, 4, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result, + StrictWeakOrdering comp); + + +/*! \p lower_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of first position where value could + * be inserted without violating the ordering. This version of + * \p lower_bound uses function object \c comp for comparison. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * \param comp The comparison operator. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::lower_bound(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin(), + * thrust::less()); + * + * // output is now [0, 1, 1, 2, 4, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/lower_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator lower_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result, + StrictWeakOrdering comp); + + +/*! \p upper_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of last position where value could + * be inserted without violating the ordering. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::upper_bound(thrust::device, + * input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin()); + * + * // output is now [1, 1, 2, 2, 5, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result); + + +/*! \p upper_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of last position where value could + * be inserted without violating the ordering. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::upper_bound(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin()); + * + * // output is now [1, 1, 2, 2, 5, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p upper_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator upper_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result); + + +/*! \p upper_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of first position where value could + * be inserted without violating the ordering. This version of + * \p upper_bound uses function object \c comp for comparison. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * \param comp The comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::upper_bound(thrust::device, + * input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin(), + * thrust::less()); + * + * // output is now [1, 1, 2, 2, 5, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p lower_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result, + StrictWeakOrdering comp); + + +/*! \p upper_bound is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * Specifically, it returns the index of first position where value could + * be inserted without violating the ordering. This version of + * \p upper_bound uses function object \c comp for comparison. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * \param comp The comparison operator. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator. + * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p lower_bound + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::upper_bound(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin(), + * thrust::less()); + * + * // output is now [1, 1, 2, 2, 5, 5] + * \endcode + * + * \see http://www.sgi.com/tech/stl/upper_bound.html + * \see \p lower_bound + * \see \p equal_range + * \see \p binary_search + */ +template +OutputIterator upper_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result, + StrictWeakOrdering comp); + + +/*! \p binary_search is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and bool is convertible to \c OutputIterator's \c value_type. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for multiple values in a ordered range using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::binary_search(thrust::device, + * input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin()); + * + * // output is now [true, false, true, false, true, false] + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result); + + +/*! \p binary_search is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and bool is convertible to \c OutputIterator's \c value_type. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::binary_search(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin()); + * + * // output is now [true, false, true, false, true, false] + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +OutputIterator binary_search(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result); + + +/*! \p binary_search is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. This version of \p binary_search uses function object + * \c comp for comparison. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * \param comp The comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and bool is convertible to \c OutputIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for multiple values in a ordered range using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::binary_search(thrust::device, + * input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin(), + * thrust::less()); + * + * // output is now [true, false, true, false, true, false] + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result, + StrictWeakOrdering comp); + + +/*! \p binary_search is a vectorized version of binary search: for each + * iterator \c v in [values_first, values_last) it attempts to + * find the value *v in an ordered range [first, last). + * It returns \c true if an element that is equivalent to \c value + * is present in [first, last) and \c false if no such element + * exists. This version of \p binary_search uses function object + * \c comp for comparison. + * + * \param first The beginning of the ordered sequence. + * \param last The end of the ordered sequence. + * \param values_first The beginning of the search values sequence. + * \param values_last The end of the search values sequence. + * \param result The beginning of the output sequence. + * \param comp The comparison operator. + * + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam InputIterator is a model of Input Iterator. + * and \c InputIterator's \c value_type is LessThanComparable. + * \tparam OutputIterator is a model of Output Iterator. + * and bool is convertible to \c OutputIterator's \c value_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p binary_search + * to search for multiple values in a ordered range. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(5); + * + * input[0] = 0; + * input[1] = 2; + * input[2] = 5; + * input[3] = 7; + * input[4] = 8; + * + * thrust::device_vector values(6); + * values[0] = 0; + * values[1] = 1; + * values[2] = 2; + * values[3] = 3; + * values[4] = 8; + * values[5] = 9; + * + * thrust::device_vector output(6); + * + * thrust::binary_search(input.begin(), input.end(), + * values.begin(), values.end(), + * output.begin(), + * thrust::less()); + * + * // output is now [true, false, true, false, true, false] + * \endcode + * + * \see http://www.sgi.com/tech/stl/binary_search.html + * \see \p lower_bound + * \see \p upper_bound + * \see \p equal_range + */ +template +OutputIterator binary_search(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator result, + StrictWeakOrdering comp); + + +/*! \} // end vectorized_binary_search + */ + + +/*! \} // end binary_search + */ + + +/*! \} // end searching + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/copy.h b/compat/thrust/copy.h new file mode 100644 index 0000000..eaa9719 --- /dev/null +++ b/compat/thrust/copy.h @@ -0,0 +1,505 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file copy.h + * \brief Copies elements from one range to another + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +/*! \addtogroup algorithms + */ + +/*! \addtogroup copying + * \ingroup algorithms + * \{ + */ + + +/*! \p copy copies elements from the range [\p first, \p last) to the range + * [\p result, \p result + (\p last - \p first)). That is, it performs + * the assignments *\p result = *\p first, *(\p result + \c 1) = *(\p first + \c 1), + * and so on. Generally, for every integer \c n from \c 0 to \p last - \p first, \p copy + * performs the assignment *(\p result + \c n) = *(\p first + \c n). Unlike + * \c std::copy, \p copy offers no guarantee on order of operation. As a result, + * calling \p copy with overlapping source and destination ranges has undefined + * behavior. + * + * The return value is \p result + (\p last - \p first). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to copy. + * \param last The end of the sequence to copy. + * \param result The destination sequence. + * \return The end of the destination sequence. + * \see http://www.sgi.com/tech/stl/copy.html + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, last) otherwise. + * + * The following code snippet demonstrates how to use \p copy + * to copy from one range to another using the \p thrust::device parallelization policy: + * + * \code + * #include + * #include + * #include + * ... + * + * thrust::device_vector vec0(100); + * thrust::device_vector vec1(100); + * ... + * + * thrust::copy(thrust::device, vec0.begin(), vec0.end(), vec1.begin()); + * + * // vec1 is now a copy of vec0 + * \endcode + */ +template + OutputIterator copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p copy_n copies elements from the range [first, first + n) to the range + * [result, result + n). That is, it performs the assignments *result = *first, *(result + 1) = *(first + 1), + * and so on. Generally, for every integer \c i from \c 0 to \c n, \p copy + * performs the assignment *(\p result + \c i) = *(\p first + \c i). Unlike + * \c std::copy_n, \p copy_n offers no guarantee on order of operation. As a result, + * calling \p copy_n with overlapping source and destination ranges has undefined + * behavior. + * + * The return value is \p result + \p n. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range to copy. + * \param n The number of elements to copy. + * \param result The beginning destination range. + * \return The end of the destination range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam Size is an integral type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, first + n) otherwise. + * + * The following code snippet demonstrates how to use \p copy + * to copy from one range to another using the \p thrust::device parallelization policy: + * + * \code + * #include + * #include + * #include + * ... + * size_t n = 100; + * thrust::device_vector vec0(n); + * thrust::device_vector vec1(n); + * ... + * thrust::copy_n(thrust::device, vec0.begin(), n, vec1.begin()); + * + * // vec1 is now a copy of vec0 + * \endcode + * + * \see http://www.sgi.com/tech/stl/copy_n.html + * \see thrust::copy + */ +template + OutputIterator copy_n(const thrust::detail::execution_policy_base &exec, + InputIterator first, + Size n, + OutputIterator result); + + + +/*! \p copy copies elements from the range [\p first, \p last) to the range + * [\p result, \p result + (\p last - \p first)). That is, it performs + * the assignments *\p result = *\p first, *(\p result + \c 1) = *(\p first + \c 1), + * and so on. Generally, for every integer \c n from \c 0 to \p last - \p first, \p copy + * performs the assignment *(\p result + \c n) = *(\p first + \c n). Unlike + * \c std::copy, \p copy offers no guarantee on order of operation. As a result, + * calling \p copy with overlapping source and destination ranges has undefined + * behavior. + * + * The return value is \p result + (\p last - \p first). + * + * \param first The beginning of the sequence to copy. + * \param last The end of the sequence to copy. + * \param result The destination sequence. + * \return The end of the destination sequence. + * \see http://www.sgi.com/tech/stl/copy.html + * + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, last) otherwise. + * + * The following code snippet demonstrates how to use \p copy + * to copy from one range to another. + * + * \code + * #include + * #include + * ... + * + * thrust::device_vector vec0(100); + * thrust::device_vector vec1(100); + * ... + * + * thrust::copy(vec0.begin(), vec0.end(), + * vec1.begin()); + * + * // vec1 is now a copy of vec0 + * \endcode + */ +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result); + +/*! \p copy_n copies elements from the range [first, first + n) to the range + * [result, result + n). That is, it performs the assignments *result = *first, *(result + 1) = *(first + 1), + * and so on. Generally, for every integer \c i from \c 0 to \c n, \p copy + * performs the assignment *(\p result + \c i) = *(\p first + \c i). Unlike + * \c std::copy_n, \p copy_n offers no guarantee on order of operation. As a result, + * calling \p copy_n with overlapping source and destination ranges has undefined + * behavior. + * + * The return value is \p result + \p n. + * + * \param first The beginning of the range to copy. + * \param n The number of elements to copy. + * \param result The beginning destination range. + * \return The end of the destination range. + * + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam Size is an integral type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, first + n) otherwise. + * + * The following code snippet demonstrates how to use \p copy + * to copy from one range to another. + * + * \code + * #include + * #include + * ... + * size_t n = 100; + * thrust::device_vector vec0(n); + * thrust::device_vector vec1(n); + * ... + * thrust::copy_n(vec0.begin(), n, vec1.begin()); + * + * // vec1 is now a copy of vec0 + * \endcode + * + * \see http://www.sgi.com/tech/stl/copy_n.html + * \see thrust::copy + */ +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result); + +/*! \} // end copying + */ + +/*! \addtogroup stream_compaction + * \{ + */ + + +/*! This version of \p copy_if copies elements from the range [first,last) + * to a range beginning at \ presult, except that any element which causes \p pred + * to be \p pred to be \c false is not copied. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p copy_if performs the assignment *result = *(first+n) and \p result + * is advanced one position if pred(*(first+n)). Otherwise, no assignment + * occurs and \p result is not advanced. + * + * The algorithm's execution is parallelized as determined by \p system. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence from which to copy. + * \param last The end of the sequence from which to copy. + * \param result The beginning of the sequence into which to copy. + * \param pred The predicate to test on every value of the range [first, last). + * \return result + n, where \c n is equal to the number of times \p pred + * evaluated to \c true in the range [first, last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p copy_if to perform stream compaction + * to copy even numbers to an output range using the \p thrust::host parallelization policy: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int result[4]; + * + * thrust::copy_if(thrust::host, V, V + N, result, is_even()); + * + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-2, 0, 0, 2} + * \endcode + * + * \see \c remove_copy_if + */ +template + OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + + +/*! This version of \p copy_if copies elements from the range [first,last) + * to a range beginning at \ presult, except that any element which causes \p pred + * to be \p pred to be \c false is not copied. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p copy_if performs the assignment *result = *(first+n) and \p result + * is advanced one position if pred(*(first+n)). Otherwise, no assignment + * occurs and \p result is not advanced. + * + * \param first The beginning of the sequence from which to copy. + * \param last The end of the sequence from which to copy. + * \param result The beginning of the sequence into which to copy. + * \param pred The predicate to test on every value of the range [first, last). + * \return result + n, where \c n is equal to the number of times \p pred + * evaluated to \c true in the range [first, last). + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p copy_if to perform stream compaction + * to copy even numbers to an output range. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int result[4]; + * + * thrust::copy_if(V, V + N, result, is_even()); + * + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-2, 0, 0, 2} + * \endcode + * + * \see \c remove_copy_if + */ +template + OutputIterator copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + +/*! This version of \p copy_if copies elements from the range [first,last) + * to a range beginning at \p result, except that any element whose corresponding stencil + * element causes \p pred to be \c false is not copied. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p copy_if performs the assignment *result = *(first+n) and \p result + * is advanced one position if pred(*(stencil+n)). Otherwise, no assignment + * occurs and \p result is not advanced. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence from which to copy. + * \param last The end of the sequence from which to copy. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the sequence into which to copy. + * \param pred The predicate to test on every value of the range [stencil, stencil + (last-first)). + * \return result + n, where \c n is equal to the number of times \p pred + * evaluated to \c true in the range [stencil, stencil + (last-first)). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. + * \pre The ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p copy_if to perform stream compaction + * to copy numbers to an output range when corresponding stencil elements are even using the \p thrust::host execution policy: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int N = 6; + * int data[N] = { 0, 1, 2, 3, 4, 5}; + * int stencil[N] = {-2, 0, -1, 0, 1, 2}; + * int result[4]; + * + * thrust::copy_if(thrust::host, data, data + N, stencil, result, is_even()); + * + * // data remains = { 0, 1, 2, 3, 4, 5}; + * // stencil remains = {-2, 0, -1, 0, 1, 2}; + * // result is now { 0, 1, 3, 5} + * \endcode + * + * \see \c remove_copy_if + */ +template + OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +/*! This version of \p copy_if copies elements from the range [first,last) + * to a range beginning at \p result, except that any element whose corresponding stencil + * element causes \p pred to be \c false is not copied. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p copy_if performs the assignment *result = *(first+n) and \p result + * is advanced one position if pred(*(stencil+n)). Otherwise, no assignment + * occurs and \p result is not advanced. + * + * \param first The beginning of the sequence from which to copy. + * \param last The end of the sequence from which to copy. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the sequence into which to copy. + * \param pred The predicate to test on every value of the range [stencil, stencil + (last-first)). + * \return result + n, where \c n is equal to the number of times \p pred + * evaluated to \c true in the range [stencil, stencil + (last-first)). + * + * \tparam InputIterator1 is a model of Input Iterator. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. + * \pre The ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p copy_if to perform stream compaction + * to copy numbers to an output range when corresponding stencil elements are even: + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int N = 6; + * int data[N] = { 0, 1, 2, 3, 4, 5}; + * int stencil[N] = {-2, 0, -1, 0, 1, 2}; + * int result[4]; + * + * thrust::copy_if(data, data + N, stencil, result, is_even()); + * + * // data remains = { 0, 1, 2, 3, 4, 5}; + * // stencil remains = {-2, 0, -1, 0, 1, 2}; + * // result is now { 0, 1, 3, 5} + * \endcode + * + * \see \c remove_copy_if + */ +template + OutputIterator copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + +/*! \} // end stream_compaction + */ + +} // end namespace thrust + +#include +#include + diff --git a/compat/thrust/count.h b/compat/thrust/count.h new file mode 100644 index 0000000..cddd1dd --- /dev/null +++ b/compat/thrust/count.h @@ -0,0 +1,231 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file count.h + * \brief Counting elements in a range + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + +/*! \addtogroup reductions + * \ingroup algorithms + * \{ + */ + +/*! \addtogroup counting + * \ingroup reductions + * \{ + */ + + +/*! \p count finds the number of elements in [first,last) that are equal + * to \p value. More precisely, \p count returns the number of iterators \c i in + * [first, last) such that *i == value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param value The value to be counted. + * \return The number of elements equal to \p value. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be a model of must be a model of Equality Comparable. + * \tparam EqualityComparable must be a model of Equality Comparable and can be compared for equality with \c InputIterator's \c value_type + * + * The following code snippet demonstrates how to use \p count to + * count the number of instances in a range of a value of interest using the \p thrust::device execution policy: + * + * \code + * #include + * #include + * #include + * ... + * // put 3 1s in a device_vector + * thrust::device_vector vec(5,0); + * vec[1] = 1; + * vec[3] = 1; + * vec[4] = 1; + * + * // count the 1s + * int result = thrust::count(thrust::device, vec.begin(), vec.end(), 1); + * // result == 3 + * \endcode + * + * \see http://www.sgi.com/tech/stl/count.html + */ +template + typename thrust::iterator_traits::difference_type + count(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, const EqualityComparable& value); + + + +/*! \p count finds the number of elements in [first,last) that are equal + * to \p value. More precisely, \p count returns the number of iterators \c i in + * [first, last) such that *i == value. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param value The value to be counted. + * \return The number of elements equal to \p value. + * + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be a model of must be a model of Equality Comparable. + * \tparam EqualityComparable must be a model of Equality Comparable and can be compared for equality with \c InputIterator's \c value_type + * + * The following code snippet demonstrates how to use \p count to + * count the number of instances in a range of a value of interest. + * \code + * #include + * #include + * ... + * // put 3 1s in a device_vector + * thrust::device_vector vec(5,0); + * vec[1] = 1; + * vec[3] = 1; + * vec[4] = 1; + * + * // count the 1s + * int result = thrust::count(vec.begin(), vec.end(), 1); + * // result == 3 + * \endcode + * + * \see http://www.sgi.com/tech/stl/count.html + */ +template + typename thrust::iterator_traits::difference_type + count(InputIterator first, InputIterator last, const EqualityComparable& value); + + +/*! \p count_if finds the number of elements in [first,last) for which + * a predicate is \c true. More precisely, \p count_if returns the number of iterators + * \c i in [first, last) such that pred(*i) == true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred The predicate. + * \return The number of elements where \p pred is \c true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c Predicate's \c argument_type. + * \tparam Predicate must be a model of Predicate. + * + * The following code snippet demonstrates how to use \p count to + * count the number of odd numbers in a range using the \p thrust::device execution policy: + * + * \code + * #include + * #include + * #include + * ... + * struct is_odd + * { + * __host__ __device__ + * bool operator()(int &x) + * { + * return x & 1; + * } + * }; + * ... + * // fill a device_vector with even & odd numbers + * thrust::device_vector vec(5); + * vec[0] = 0; + * vec[1] = 1; + * vec[2] = 2; + * vec[3] = 3; + * vec[4] = 4; + * + * // count the odd elements in vec + * int result = thrust::count_if(thrust::device, vec.begin(), vec.end(), is_odd()); + * // result == 2 + * \endcode + * + * \see http://www.sgi.com/tech/stl/count.html + */ +template + typename thrust::iterator_traits::difference_type + count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); + + +/*! \p count_if finds the number of elements in [first,last) for which + * a predicate is \c true. More precisely, \p count_if returns the number of iterators + * \c i in [first, last) such that pred(*i) == true. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred The predicate. + * \return The number of elements where \p pred is \c true. + * + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c Predicate's \c argument_type. + * \tparam Predicate must be a model of Predicate. + * + * The following code snippet demonstrates how to use \p count to + * count the number of odd numbers in a range. + * \code + * #include + * #include + * ... + * struct is_odd + * { + * __host__ __device__ + * bool operator()(int &x) + * { + * return x & 1; + * } + * }; + * ... + * // fill a device_vector with even & odd numbers + * thrust::device_vector vec(5); + * vec[0] = 0; + * vec[1] = 1; + * vec[2] = 2; + * vec[3] = 3; + * vec[4] = 4; + * + * // count the odd elements in vec + * int result = thrust::count_if(vec.begin(), vec.end(), is_odd()); + * // result == 2 + * \endcode + * + * \see http://www.sgi.com/tech/stl/count.html + */ +template + typename thrust::iterator_traits::difference_type + count_if(InputIterator first, InputIterator last, Predicate pred); + +/*! \} // end counting + * \} // end reductions + */ + +} // end thrust + +#include + diff --git a/compat/thrust/detail/adjacent_difference.inl b/compat/thrust/detail/adjacent_difference.inl new file mode 100644 index 0000000..6590f9d --- /dev/null +++ b/compat/thrust/detail/adjacent_difference.inl @@ -0,0 +1,88 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file adjacent_difference.inl + * \brief Inline file for adjacent_difference.h + */ + +#include +#include +#include +#include + +namespace thrust +{ + + +template +OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::adjacent_difference; + + return adjacent_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); +} // end adjacent_difference() + + +template +OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::adjacent_difference; + + return adjacent_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op); +} // end adjacent_difference() + + +template +OutputIterator adjacent_difference(InputIterator first, InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::adjacent_difference(select_system(system1, system2), first, last, result); +} // end adjacent_difference() + + +template +OutputIterator adjacent_difference(InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::adjacent_difference(select_system(system1, system2), first, last, result, binary_op); +} // end adjacent_difference() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/advance.inl b/compat/thrust/detail/advance.inl new file mode 100644 index 0000000..2907be7 --- /dev/null +++ b/compat/thrust/detail/advance.inl @@ -0,0 +1,38 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file advance.inl + * \brief Inline file for advance.h + */ + +#include +#include +#include + +namespace thrust +{ + + +template +void advance(InputIterator& i, Distance n) +{ + thrust::system::detail::generic::advance(i, n); +} // end advance() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/allocator/allocator_traits.h b/compat/thrust/detail/allocator/allocator_traits.h new file mode 100644 index 0000000..6ee99b4 --- /dev/null +++ b/compat/thrust/detail/allocator/allocator_traits.h @@ -0,0 +1,240 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace allocator_traits_detail +{ + +__THRUST_DEFINE_HAS_NESTED_TYPE(has_pointer, pointer) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_pointer, const_pointer) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_reference, reference) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_reference, const_reference) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_void_pointer, void_pointer) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_void_pointer, const_void_pointer) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_difference_type, difference_type) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_size_type, size_type) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_copy_assignment, propagate_on_container_copy_assignment) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_move_assignment, propagate_on_container_move_assignment) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_swap, propagate_on_container_swap) +__THRUST_DEFINE_HAS_NESTED_TYPE(has_system_type, system_type) + +template + struct nested_pointer +{ + typedef typename T::pointer type; +}; + +template + struct nested_const_pointer +{ + typedef typename T::const_pointer type; +}; + +template + struct nested_reference +{ + typedef typename T::reference type; +}; + +template + struct nested_const_reference +{ + typedef typename T::const_reference type; +}; + +template + struct nested_void_pointer +{ + typedef typename T::void_pointer type; +}; + +template + struct nested_const_void_pointer +{ + typedef typename T::const_void_pointer type; +}; + +template + struct nested_difference_type +{ + typedef typename T::difference_type type; +}; + +template + struct nested_size_type +{ + typedef typename T::size_type type; +}; + +template + struct nested_propagate_on_container_copy_assignment +{ + typedef typename T::propagate_on_container_copy_assignment type; +}; + +template + struct nested_propagate_on_container_move_assignment +{ + typedef typename T::propagate_on_container_move_assignment type; +}; + +template + struct nested_propagate_on_container_swap +{ + typedef typename T::propagate_on_container_swap type; +}; + +template + struct nested_system_type +{ + typedef typename T::system_type type; +}; + +} // end allocator_traits_detail + + +template + struct allocator_traits +{ + typedef Alloc allocator_type; + + typedef typename allocator_type::value_type value_type; + + typedef typename eval_if< + allocator_traits_detail::has_pointer::value, + allocator_traits_detail::nested_pointer, + identity_ + >::type pointer; + + private: + template + struct rebind_pointer + { + typedef typename pointer_traits::template rebind::other type; + }; + + public: + + typedef typename eval_if< + allocator_traits_detail::has_const_pointer::value, + allocator_traits_detail::nested_const_pointer, + rebind_pointer + >::type const_pointer; + + typedef typename eval_if< + allocator_traits_detail::has_void_pointer::value, + allocator_traits_detail::nested_void_pointer, + rebind_pointer + >::type void_pointer; + + typedef typename eval_if< + allocator_traits_detail::has_const_void_pointer::value, + allocator_traits_detail::nested_const_void_pointer, + rebind_pointer + >::type const_void_pointer; + + typedef typename eval_if< + allocator_traits_detail::has_difference_type::value, + allocator_traits_detail::nested_difference_type, + pointer_difference + >::type difference_type; + + typedef typename eval_if< + allocator_traits_detail::has_size_type::value, + allocator_traits_detail::nested_size_type, + make_unsigned + >::type size_type; + + typedef typename eval_if< + allocator_traits_detail::has_propagate_on_container_copy_assignment::value, + allocator_traits_detail::nested_propagate_on_container_copy_assignment, + identity_ + >::type propagate_on_container_copy_assignment; + + typedef typename eval_if< + allocator_traits_detail::has_propagate_on_container_move_assignment::value, + allocator_traits_detail::nested_propagate_on_container_move_assignment, + identity_ + >::type propagate_on_container_move_assignment; + + typedef typename eval_if< + allocator_traits_detail::has_propagate_on_container_swap::value, + allocator_traits_detail::nested_propagate_on_container_swap, + identity_ + >::type propagate_on_container_swap; + + typedef typename eval_if< + allocator_traits_detail::has_system_type::value, + allocator_traits_detail::nested_system_type, + thrust::iterator_system + >::type system_type; + + // XXX rebind and rebind_traits are alias templates + // and so are omitted while c++11 is unavailable + + inline static pointer allocate(allocator_type &a, size_type n); + + inline static pointer allocate(allocator_type &a, size_type n, const_void_pointer hint); + + inline static void deallocate(allocator_type &a, pointer p, size_type n); + + // XXX should probably change T* to pointer below and then relax later + + template + inline __host__ __device__ static void construct(allocator_type &a, T *p); + + template + inline __host__ __device__ static void construct(allocator_type &a, T *p, const Arg1 &arg1); + + template + inline __host__ __device__ static void destroy(allocator_type &a, T *p); + + inline static size_type max_size(const allocator_type &a); +}; // end allocator_traits + + +// XXX consider moving this non-standard functionality inside allocator_traits +template + struct allocator_system +{ + // the type of the allocator's system + typedef typename eval_if< + allocator_traits_detail::has_system_type::value, + allocator_traits_detail::nested_system_type, + thrust::iterator_system< + typename allocator_traits::pointer + > + >::type type; + + inline static type &get(Alloc &a); +}; + + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/allocator_traits.inl b/compat/thrust/detail/allocator/allocator_traits.inl new file mode 100644 index 0000000..8319335 --- /dev/null +++ b/compat/thrust/detail/allocator/allocator_traits.inl @@ -0,0 +1,287 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace allocator_traits_detail +{ + +__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_allocate_with_hint_impl, allocate) + +template + class has_member_allocate_with_hint +{ + typedef typename allocator_traits::pointer pointer; + typedef typename allocator_traits::size_type size_type; + typedef typename allocator_traits::const_void_pointer const_void_pointer; + + public: + typedef typename has_member_allocate_with_hint_impl::type type; + static const bool value = type::value; +}; + +template + typename enable_if< + has_member_allocate_with_hint::value, + typename allocator_traits::pointer + >::type + allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer hint) +{ + return a.allocate(n,hint); +} + +template + typename disable_if< + has_member_allocate_with_hint::value, + typename allocator_traits::pointer + >::type + allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer) +{ + return a.allocate(n); +} + + +__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_construct1_impl, construct) + +template + struct has_member_construct1 + : has_member_construct1_impl +{}; + +template + inline __host__ __device__ + typename enable_if< + has_member_construct1::value + >::type + construct(Alloc &a, T *p) +{ + a.construct(p); +} + +template + inline __host__ __device__ + typename disable_if< + has_member_construct1::value + >::type + construct(Alloc &a, T *p) +{ + ::new(static_cast(p)) T(); +} + + +__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_construct2_impl, construct) + +template + struct has_member_construct2 + : has_member_construct2_impl +{}; + +template + inline __host__ __device__ + typename enable_if< + has_member_construct2::value + >::type + construct(Alloc &a, T *p, const Arg1 &arg1) +{ + a.construct(p,arg1); +} + +template + inline __host__ __device__ + typename disable_if< + has_member_construct2::value + >::type + construct(Alloc &, T *p, const Arg1 &arg1) +{ + ::new(static_cast(p)) T(arg1); +} + + +__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_destroy_impl, destroy) + +template + struct has_member_destroy + : has_member_destroy_impl +{}; + +template + inline __host__ __device__ + typename enable_if< + has_member_destroy::value + >::type + destroy(Alloc &a, T *p) +{ + a.destroy(p); +} + +template + inline __host__ __device__ + typename disable_if< + has_member_destroy::value + >::type + destroy(Alloc &, T *p) +{ + p->~T(); +} + + +__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_max_size_impl, max_size) + +template + class has_member_max_size +{ + typedef typename allocator_traits::size_type size_type; + + public: + typedef typename has_member_max_size_impl::type type; + static const bool value = type::value; +}; + +template + typename enable_if< + has_member_max_size::value, + typename allocator_traits::size_type + >::type + max_size(const Alloc &a) +{ + return a.max_size(); +} + +template + typename disable_if< + has_member_max_size::value, + typename allocator_traits::size_type + >::type + max_size(const Alloc &a) +{ + typedef typename allocator_traits::size_type size_type; + return std::numeric_limits::max(); +} + +__THRUST_DEFINE_HAS_MEMBER_FUNCTION(has_member_system_impl, system) + +template + class has_member_system +{ + typedef typename allocator_system::type system_type; + + public: + typedef typename has_member_system_impl::type type; + static const bool value = type::value; +}; + +template + typename enable_if< + has_member_system::value, + typename allocator_system::type & + >::type + system(Alloc &a) +{ + return a.system(); +} + +template + typename disable_if< + has_member_system::value, + typename allocator_system::type & + >::type + system(Alloc &a) +{ + // assumes the system is default-constructible + static typename allocator_system::type state; + return state; +} + + +} // end allocator_traits_detail + + +template + typename allocator_traits::pointer + allocator_traits + ::allocate(Alloc &a, typename allocator_traits::size_type n) +{ + return a.allocate(n); +} + +template + typename allocator_traits::pointer + allocator_traits + ::allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer hint) +{ + return allocator_traits_detail::allocate(a, n, hint); +} + +template + void allocator_traits + ::deallocate(Alloc &a, typename allocator_traits::pointer p, typename allocator_traits::size_type n) +{ + return a.deallocate(p,n); +} + +template + template + void allocator_traits + ::construct(allocator_type &a, T *p) +{ + return allocator_traits_detail::construct(a,p); +} + +template + template + void allocator_traits + ::construct(allocator_type &a, T *p, const Arg1 &arg1) +{ + return allocator_traits_detail::construct(a,p,arg1); +} + +template + template + void allocator_traits + ::destroy(allocator_type &a, T *p) +{ + return allocator_traits_detail::destroy(a,p); +} + +template + typename allocator_traits::size_type + allocator_traits + ::max_size(const allocator_type &a) +{ + return allocator_traits_detail::max_size(a); +} + +template + typename allocator_system::type & + allocator_system + ::get(Alloc &a) +{ + return allocator_traits_detail::system(a); +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/copy_construct_range.h b/compat/thrust/detail/allocator/copy_construct_range.h new file mode 100644 index 0000000..5d99e1f --- /dev/null +++ b/compat/thrust/detail/allocator/copy_construct_range.h @@ -0,0 +1,45 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace detail +{ + +template + Pointer copy_construct_range(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + InputIterator last, + Pointer result); + +template + Pointer copy_construct_range_n(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + Size n, + Pointer result); + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/copy_construct_range.inl b/compat/thrust/detail/allocator/copy_construct_range.inl new file mode 100644 index 0000000..7c5478b --- /dev/null +++ b/compat/thrust/detail/allocator/copy_construct_range.inl @@ -0,0 +1,298 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace allocator_traits_detail +{ + + +template + struct copy_construct_with_allocator +{ + Allocator &a; + + copy_construct_with_allocator(Allocator &a) + : a(a) + {} + + template + inline __host__ __device__ + void operator()(Tuple t) + { + const InputType &in = thrust::get<0>(t); + OutputType &out = thrust::get<1>(t); + + allocator_traits::construct(a, &out, in); + } +}; + + +template + struct needs_copy_construct_via_allocator + : has_member_construct2< + Allocator, + T, + T + > +{}; + + +// we know that std::allocator::construct's only effect is to call T's +// copy constructor, so we needn't use it for copy construction +template + struct needs_copy_construct_via_allocator, T> + : thrust::detail::false_type +{}; + + +// XXX it's regrettable that this implementation is copied almost +// exactly from system::detail::generic::uninitialized_copy +// perhaps generic::uninitialized_copy could call this routine +// with a default allocator +template + typename enable_if_convertible< + FromSystem, + ToSystem, + Pointer + >::type + uninitialized_copy_with_allocator(Allocator &a, + thrust::execution_policy &from_system, + thrust::execution_policy &to_system, + InputIterator first, + InputIterator last, + Pointer result) +{ + // zip up the iterators + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result)); + ZipIterator end = begin; + + // get a zip_iterator pointing to the end + const typename thrust::iterator_difference::type n = thrust::distance(first,last); + thrust::advance(end,n); + + // create a functor + typedef typename iterator_traits::value_type InputType; + typedef typename iterator_traits::value_type OutputType; + + // do the for_each + // note we use to_system to dispatch the for_each + thrust::for_each(to_system, begin, end, copy_construct_with_allocator(a)); + + // return the end of the output range + return thrust::get<1>(end.get_iterator_tuple()); +} + + +// XXX it's regrettable that this implementation is copied almost +// exactly from system::detail::generic::uninitialized_copy_n +// perhaps generic::uninitialized_copy_n could call this routine +// with a default allocator +template + typename enable_if_convertible< + FromSystem, + ToSystem, + Pointer + >::type + uninitialized_copy_with_allocator_n(Allocator &a, + thrust::execution_policy &from_system, + thrust::execution_policy &to_system, + InputIterator first, + Size n, + Pointer result) +{ + // zip up the iterators + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result)); + + // create a functor + typedef typename iterator_traits::value_type InputType; + typedef typename iterator_traits::value_type OutputType; + + // do the for_each_n + // note we use to_system to dispatch the for_each_n + ZipIterator end = thrust::for_each_n(to_system, begin, n, copy_construct_with_allocator(a)); + + // return the end of the output range + return thrust::get<1>(end.get_iterator_tuple()); +} + + +template + typename disable_if_convertible< + FromSystem, + ToSystem, + Pointer + >::type + uninitialized_copy_with_allocator(Allocator &, + thrust::execution_policy &from_system, + thrust::execution_policy &to_system, + InputIterator first, + InputIterator last, + Pointer result) +{ + // the systems aren't trivially interoperable + // just call two_system_copy and hope for the best + return thrust::detail::two_system_copy(from_system, to_system, first, last, result); +} // end uninitialized_copy_with_allocator() + + +template + typename disable_if_convertible< + FromSystem, + ToSystem, + Pointer + >::type + uninitialized_copy_with_allocator_n(Allocator &, + thrust::execution_policy &from_system, + thrust::execution_policy &to_system, + InputIterator first, + Size n, + Pointer result) +{ + // the systems aren't trivially interoperable + // just call two_system_copy_n and hope for the best + return thrust::detail::two_system_copy_n(from_system, to_system, first, n, result); +} // end uninitialized_copy_with_allocator_n() + + +template + typename disable_if< + needs_copy_construct_via_allocator< + Allocator, + typename pointer_element::type + >::value, + Pointer + >::type + copy_construct_range(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + InputIterator last, + Pointer result) +{ + typename allocator_system::type &to_system = allocator_system::get(a); + + // just call two_system_copy + return thrust::detail::two_system_copy(from_system, to_system, first, last, result); +} + + +template + typename disable_if< + needs_copy_construct_via_allocator< + Allocator, + typename pointer_element::type + >::value, + Pointer + >::type + copy_construct_range_n(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + Size n, + Pointer result) +{ + typename allocator_system::type &to_system = allocator_system::get(a); + + // just call two_system_copy_n + return thrust::detail::two_system_copy_n(from_system, to_system, first, n, result); +} + + +template + typename enable_if< + needs_copy_construct_via_allocator< + Allocator, + typename pointer_element::type + >::value, + Pointer + >::type + copy_construct_range(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + InputIterator last, + Pointer result) +{ + typename allocator_system::type &to_system = allocator_system::get(a); + return uninitialized_copy_with_allocator(a, from_system, to_system, first, last, result); +} + + +template + typename enable_if< + needs_copy_construct_via_allocator< + Allocator, + typename pointer_element::type + >::value, + Pointer + >::type + copy_construct_range_n(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + Size n, + Pointer result) +{ + typename allocator_system::type &to_system = allocator_system::get(a); + return uninitialized_copy_with_allocator_n(a, from_system, to_system, first, n, result); +} + + +} // end allocator_traits_detail + + +template + Pointer copy_construct_range(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + InputIterator last, + Pointer result) +{ + return allocator_traits_detail::copy_construct_range(from_system, a, first, last, result); +} + + +template + Pointer copy_construct_range_n(thrust::execution_policy &from_system, + Allocator &a, + InputIterator first, + Size n, + Pointer result) +{ + return allocator_traits_detail::copy_construct_range_n(from_system, a, first, n, result); +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/default_construct_range.h b/compat/thrust/detail/allocator/default_construct_range.h new file mode 100644 index 0000000..d83cb31 --- /dev/null +++ b/compat/thrust/detail/allocator/default_construct_range.h @@ -0,0 +1,36 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + + +template +inline void default_construct_range(Allocator &a, Pointer p, Size n); + + +} // end detail +} // end thrust + +#include + + diff --git a/compat/thrust/detail/allocator/default_construct_range.inl b/compat/thrust/detail/allocator/default_construct_range.inl new file mode 100644 index 0000000..45fe9c6 --- /dev/null +++ b/compat/thrust/detail/allocator/default_construct_range.inl @@ -0,0 +1,105 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace allocator_traits_detail +{ + + +template + struct construct1_via_allocator +{ + Allocator &a; + + construct1_via_allocator(Allocator &a) + : a(a) + {} + + template + inline __host__ __device__ + void operator()(T &x) + { + allocator_traits::construct(a, &x); + } +}; + + +template + struct needs_default_construct_via_allocator + : has_member_construct1< + Allocator, + T + > +{}; + + +// we know that std::allocator::construct's only effect is to call T's +// default constructor, so we needn't use it for default construction +template + struct needs_default_construct_via_allocator, T> + : thrust::detail::false_type +{}; + + +template + typename enable_if< + needs_default_construct_via_allocator< + Allocator, + typename pointer_element::type + >::value + >::type + default_construct_range(Allocator &a, Pointer p, Size n) +{ + thrust::for_each_n(allocator_system::get(a), p, n, construct1_via_allocator(a)); +} + + +template + typename disable_if< + needs_default_construct_via_allocator< + Allocator, + typename pointer_element::type + >::value + >::type + default_construct_range(Allocator &a, Pointer p, Size n) +{ + thrust::uninitialized_fill_n(allocator_system::get(a), p, n, typename pointer_element::type()); +} + + +} // end allocator_traits_detail + + +template + void default_construct_range(Allocator &a, Pointer p, Size n) +{ + return allocator_traits_detail::default_construct_range(a,p,n); +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/destroy_range.h b/compat/thrust/detail/allocator/destroy_range.h new file mode 100644 index 0000000..d690a60 --- /dev/null +++ b/compat/thrust/detail/allocator/destroy_range.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + +template + inline void destroy_range(Allocator &a, Pointer p, Size n); + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/destroy_range.inl b/compat/thrust/detail/allocator/destroy_range.inl new file mode 100644 index 0000000..ace2223 --- /dev/null +++ b/compat/thrust/detail/allocator/destroy_range.inl @@ -0,0 +1,158 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace allocator_traits_detail +{ + + +// destroy_range has three cases: +// if Allocator has an effectful member function destroy: +// 1. destroy via the allocator +// else +// 2. if T has a non-trivial destructor, destroy the range without using the allocator +// 3. if T has a trivial destructor, do a no-op + +template + struct has_effectful_member_destroy + : has_member_destroy +{}; + +// std::allocator::destroy's only effect is to invoke its argument's destructor +template + struct has_effectful_member_destroy, T> + : thrust::detail::false_type +{}; + +// case 1: Allocator has an effectful 1-argument member function "destroy" +template + struct enable_if_destroy_range_case1 + : thrust::detail::enable_if< + has_effectful_member_destroy< + Allocator, + typename pointer_element::type + >::value + > +{}; + +// case 2: Allocator has no member function "destroy", but T has a non-trivial destructor +template + struct enable_if_destroy_range_case2 + : thrust::detail::enable_if< + !has_effectful_member_destroy< + Allocator, + typename pointer_element::type + >::value && + !has_trivial_destructor< + typename pointer_element::type + >::value + > +{}; + +// case 3: Allocator has no member function "destroy", and T has a trivial destructor +template + struct enable_if_destroy_range_case3 + : thrust::detail::enable_if< + !has_effectful_member_destroy< + Allocator, + typename pointer_element::type + >::value && + has_trivial_destructor< + typename pointer_element::type + >::value + > +{}; + + + +template + struct destroy_via_allocator +{ + Allocator &a; + + destroy_via_allocator(Allocator &a) + : a(a) + {} + + template + inline __host__ __device__ + void operator()(T &x) + { + allocator_traits::destroy(a, &x); + } +}; + + +// destroy_range case 1: destroy via allocator +template + typename enable_if_destroy_range_case1::type + destroy_range(Allocator &a, Pointer p, Size n) +{ + thrust::for_each_n(allocator_system::get(a), p, n, destroy_via_allocator(a)); +} + + +// we must prepare for His coming +struct gozer +{ + template + inline __host__ __device__ + void operator()(T &x) + { + x.~T(); + } +}; + +// destroy_range case 2: destroy without the allocator +template + typename enable_if_destroy_range_case2::type + destroy_range(Allocator &a, Pointer p, Size n) +{ + thrust::for_each_n(allocator_system::get(a), p, n, gozer()); +} + + +// destroy_range case 3: no-op +template + typename enable_if_destroy_range_case3::type + destroy_range(Allocator &, Pointer, Size) +{ + // no op +} + + +} // end allocator_traits_detail + + +template + void destroy_range(Allocator &a, Pointer p, Size n) +{ + return allocator_traits_detail::destroy_range(a,p,n); +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/fill_construct_range.h b/compat/thrust/detail/allocator/fill_construct_range.h new file mode 100644 index 0000000..66fec41 --- /dev/null +++ b/compat/thrust/detail/allocator/fill_construct_range.h @@ -0,0 +1,35 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + + +template +inline void fill_construct_range(Allocator &a, Pointer p, Size n, const T &value); + + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/fill_construct_range.inl b/compat/thrust/detail/allocator/fill_construct_range.inl new file mode 100644 index 0000000..e2c9c09 --- /dev/null +++ b/compat/thrust/detail/allocator/fill_construct_range.inl @@ -0,0 +1,109 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace allocator_traits_detail +{ + +// fill_construct_range has 2 cases: +// if Allocator has an effectful member function construct: +// 1. construct via the allocator +// else +// 2. construct via uninitialized_fill + +template + struct has_effectful_member_construct2 + : has_member_construct2 +{}; + +// std::allocator::construct's only effect is to invoke placement new +template + struct has_effectful_member_construct2,T,Arg1> + : thrust::detail::false_type +{}; + + +template + struct construct2_via_allocator +{ + Allocator &a; + Arg1 arg; + + construct2_via_allocator(Allocator &a, const Arg1 &arg) + : a(a), arg(arg) + {} + + template + inline __host__ __device__ + void operator()(T &x) + { + allocator_traits::construct(a, &x, arg); + } +}; + + +template + typename enable_if< + has_effectful_member_construct2< + Allocator, + typename pointer_element::type, + T + >::value + >::type + fill_construct_range(Allocator &a, Pointer p, Size n, const T &value) +{ + thrust::for_each_n(allocator_system::get(a), p, n, construct2_via_allocator(a, value)); +} + + +template + typename disable_if< + has_effectful_member_construct2< + Allocator, + typename pointer_element::type, + T + >::value + >::type + fill_construct_range(Allocator &a, Pointer p, Size n, const T &value) +{ + thrust::uninitialized_fill_n(allocator_system::get(a), p, n, value); +} + + +} // end allocator_traits_detail + + +template + void fill_construct_range(Alloc &a, Pointer p, Size n, const T &value) +{ + return allocator_traits_detail::fill_construct_range(a,p,n,value); +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/malloc_allocator.h b/compat/thrust/detail/allocator/malloc_allocator.h new file mode 100644 index 0000000..cf4567e --- /dev/null +++ b/compat/thrust/detail/allocator/malloc_allocator.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template + class malloc_allocator + : public thrust::detail::tagged_allocator< + T, System, Pointer + > +{ + private: + typedef thrust::detail::tagged_allocator< + T, System, Pointer + > super_t; + + public: + typedef typename super_t::pointer pointer; + typedef typename super_t::size_type size_type; + + pointer allocate(size_type cnt); + + void deallocate(pointer p, size_type n); +}; + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/malloc_allocator.inl b/compat/thrust/detail/allocator/malloc_allocator.inl new file mode 100644 index 0000000..dd70202 --- /dev/null +++ b/compat/thrust/detail/allocator/malloc_allocator.inl @@ -0,0 +1,64 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + typename malloc_allocator::pointer + malloc_allocator + ::allocate(typename malloc_allocator::size_type cnt) +{ + using thrust::system::detail::generic::select_system; + + // XXX should use a hypothetical thrust::static_pointer_cast here + System system; + + pointer result = thrust::malloc(select_system(system), cnt); + + if(result.get() == 0) + { + throw thrust::system::detail::bad_alloc("malloc_allocator::allocate: malloc failed"); + } // end if + + return result; +} // end malloc_allocator::allocate() + + +template + void malloc_allocator + ::deallocate(typename malloc_allocator::pointer p, typename malloc_allocator::size_type n) +{ + using thrust::system::detail::generic::select_system; + + System system; + thrust::free(select_system(system), p); +} // end malloc_allocator + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/no_throw_allocator.h b/compat/thrust/detail/allocator/no_throw_allocator.h new file mode 100644 index 0000000..ce397db --- /dev/null +++ b/compat/thrust/detail/allocator/no_throw_allocator.h @@ -0,0 +1,62 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + +template + struct no_throw_allocator : BaseAllocator +{ + private: + typedef BaseAllocator super_t; + + public: + inline no_throw_allocator(const BaseAllocator &other = BaseAllocator()) + : super_t(other) + {} + + template + struct rebind + { + typedef no_throw_allocator::other> other; + }; // end rebind + + void deallocate(typename super_t::pointer p, typename super_t::size_type n) + { + try + { + super_t::deallocate(p, n); + } // end try + catch(...) + { + // catch anything + } // end catch + } // end deallocate() + + inline bool operator==(no_throw_allocator const &other) { return super_t::operator==(other); } + inline bool operator!=(no_throw_allocator const &other) { return super_t::operator!=(other); } +}; // end no_throw_allocator + +} // end detail +} // end thrust + + diff --git a/compat/thrust/detail/allocator/tagged_allocator.h b/compat/thrust/detail/allocator/tagged_allocator.h new file mode 100644 index 0000000..3cb87a3 --- /dev/null +++ b/compat/thrust/detail/allocator/tagged_allocator.h @@ -0,0 +1,101 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template class tagged_allocator; + +template + class tagged_allocator +{ + public: + typedef void value_type; + typedef typename thrust::detail::pointer_traits::template rebind::other pointer; + typedef typename thrust::detail::pointer_traits::template rebind::other const_pointer; + typedef std::size_t size_type; + typedef typename thrust::detail::pointer_traits::difference_type difference_type; + typedef Tag system_type; + + template + struct rebind + { + typedef tagged_allocator other; + }; // end rebind +}; + +template + class tagged_allocator +{ + public: + typedef T value_type; + typedef typename thrust::detail::pointer_traits::template rebind::other pointer; + typedef typename thrust::detail::pointer_traits::template rebind::other const_pointer; + typedef typename thrust::iterator_reference::type reference; + typedef typename thrust::iterator_reference::type const_reference; + typedef std::size_t size_type; + typedef typename thrust::detail::pointer_traits::difference_type difference_type; + typedef Tag system_type; + + template + struct rebind + { + typedef tagged_allocator other; + }; // end rebind + + __host__ __device__ + inline tagged_allocator(); + + __host__ __device__ + inline tagged_allocator(const tagged_allocator &); + + template + __host__ __device__ + inline tagged_allocator(const tagged_allocator &); + + __host__ __device__ + inline ~tagged_allocator(); + + __host__ __device__ + pointer address(reference x) const; + + __host__ __device__ + const_pointer address(const_reference x) const; + + size_type max_size() const; +}; + +template +__host__ __device__ +bool operator==(const tagged_allocator &, const tagged_allocator &); + +template +__host__ __device__ +bool operator!=(const tagged_allocator &, const tagged_allocator &); + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/tagged_allocator.inl b/compat/thrust/detail/allocator/tagged_allocator.inl new file mode 100644 index 0000000..cb362a8 --- /dev/null +++ b/compat/thrust/detail/allocator/tagged_allocator.inl @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + tagged_allocator + ::tagged_allocator() +{} + + +template + tagged_allocator + ::tagged_allocator(const tagged_allocator &) +{} + + +template + template + tagged_allocator + ::tagged_allocator(const tagged_allocator &) +{} + + +template + tagged_allocator + ::~tagged_allocator() +{} + + +template + typename tagged_allocator::pointer + tagged_allocator + ::address(reference x) const +{ + return &x; +} + + +template + typename tagged_allocator::const_pointer + tagged_allocator + ::address(const_reference x) const +{ + return &x; +} + + +template + typename tagged_allocator::size_type + tagged_allocator + ::max_size() const +{ + return (std::numeric_limits::max)() / sizeof(T); +} + + +template +__host__ __device__ +bool operator==(const tagged_allocator &, const tagged_allocator &) +{ + return true; +} + + +template +__host__ __device__ +bool operator!=(const tagged_allocator &, const tagged_allocator &) +{ + return false; +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/allocator/temporary_allocator.h b/compat/thrust/detail/allocator/temporary_allocator.h new file mode 100644 index 0000000..f0496f9 --- /dev/null +++ b/compat/thrust/detail/allocator/temporary_allocator.h @@ -0,0 +1,75 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +// XXX the pointer parameter given to tagged_allocator should be related to +// the type of the expression get_temporary_buffer(system, n).first +// without decltype, compromise on pointer +template + class temporary_allocator + : public thrust::detail::tagged_allocator< + T, System, thrust::pointer + > +{ + private: + typedef thrust::detail::tagged_allocator< + T, System, thrust::pointer + > super_t; + + System &m_system; + + public: + typedef typename super_t::pointer pointer; + typedef typename super_t::size_type size_type; + + inline explicit temporary_allocator(thrust::execution_policy &system) : + super_t(), + m_system(thrust::detail::derived_cast(system)) + {} + + pointer allocate(size_type cnt); + + void deallocate(pointer p, size_type n); + + inline System &system() + { + return m_system; + } // end system() + + private: + typedef thrust::pair pointer_and_size; +}; // end temporary_allocator + + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/allocator/temporary_allocator.inl b/compat/thrust/detail/allocator/temporary_allocator.inl new file mode 100644 index 0000000..63221d5 --- /dev/null +++ b/compat/thrust/detail/allocator/temporary_allocator.inl @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + typename temporary_allocator::pointer + temporary_allocator + ::allocate(typename temporary_allocator::size_type cnt) +{ + pointer_and_size result = thrust::get_temporary_buffer(system(), cnt); + + // handle failure + if(result.second < cnt) + { + // deallocate and throw + // note that we pass cnt to deallocate, not a value derived from result.second + deallocate(result.first, cnt); + + throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed"); + } // end if + + return result.first; +} // end temporary_allocator::allocate() + + +template + void temporary_allocator + ::deallocate(typename temporary_allocator::pointer p, typename temporary_allocator::size_type n) +{ + return thrust::return_temporary_buffer(system(), p); +} // end temporary_allocator + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/binary_search.inl b/compat/thrust/detail/binary_search.inl new file mode 100644 index 0000000..0fd799a --- /dev/null +++ b/compat/thrust/detail/binary_search.inl @@ -0,0 +1,458 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file binary_search.inl + * \brief Inline file for binary_search.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value) +{ + using thrust::system::detail::generic::lower_bound; + return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} + + +template +ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::lower_bound; + return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); +} + + +template +ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value) +{ + using thrust::system::detail::generic::upper_bound; + return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} + + +template +ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::upper_bound; + return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); +} + + +template +bool binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value) +{ + using thrust::system::detail::generic::binary_search; + return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} + + +template +bool binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::binary_search; + return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); +} + + +template +thrust::pair +equal_range(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::equal_range; + return equal_range(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); +} + + +template +thrust::pair +equal_range(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value) +{ + using thrust::system::detail::generic::equal_range; + return equal_range(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} + + +template +OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output) +{ + using thrust::system::detail::generic::lower_bound; + return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output); +} + + +template +OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::lower_bound; + return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp); +} + + +template +OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output) +{ + using thrust::system::detail::generic::upper_bound; + return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output); +} + + +template +OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::upper_bound; + return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp); +} + + +template +OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output) +{ + using thrust::system::detail::generic::binary_search; + return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output); +} + + +template +OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::binary_search; + return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp); +} + + +////////////////////// +// Scalar Functions // +////////////////////// + +template +ForwardIterator lower_bound(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::lower_bound(select_system(system), first, last, value); +} + +template +ForwardIterator lower_bound(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::lower_bound(select_system(system), first, last, value, comp); +} + +template +ForwardIterator upper_bound(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::upper_bound(select_system(system), first, last, value); +} + +template +ForwardIterator upper_bound(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::upper_bound(select_system(system), first, last, value, comp); +} + +template +bool binary_search(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::binary_search(select_system(system), first, last, value); +} + +template +bool binary_search(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::binary_search(select_system(system), first, last, value, comp); +} + +template +thrust::pair +equal_range(ForwardIterator first, + ForwardIterator last, + const LessThanComparable& value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::equal_range(select_system(system), first, last, value); +} + +template +thrust::pair +equal_range(ForwardIterator first, + ForwardIterator last, + const T& value, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::equal_range(select_system(system), first, last, value, comp); +} + +////////////////////// +// Vector Functions // +////////////////////// + +template +OutputIterator lower_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::lower_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output); +} + +template +OutputIterator lower_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::lower_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp); +} + +template +OutputIterator upper_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::upper_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output); +} + +template +OutputIterator upper_bound(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::upper_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp); +} + +template +OutputIterator binary_search(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::binary_search(select_system(system1,system2,system3), first, last, values_first, values_last, output); +} + +template +OutputIterator binary_search(ForwardIterator first, + ForwardIterator last, + InputIterator values_first, + InputIterator values_last, + OutputIterator output, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::binary_search(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp); +} + +} // end namespace thrust + diff --git a/compat/thrust/detail/config.h b/compat/thrust/detail/config.h new file mode 100644 index 0000000..d6b6691 --- /dev/null +++ b/compat/thrust/detail/config.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/*! \file config.h + * \brief Defines platform configuration. + */ + +#pragma once + +#include + diff --git a/compat/thrust/detail/config/compiler.h b/compat/thrust/detail/config/compiler.h new file mode 100644 index 0000000..90ce911 --- /dev/null +++ b/compat/thrust/detail/config/compiler.h @@ -0,0 +1,103 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file compiler.h + * \brief Compiler-specific configuration + */ + +#pragma once + +#ifdef __CUDACC__ + +#include + +// Thrust supports CUDA >= 3.0 +#if CUDA_VERSION < 3000 +#error "CUDA v3.0 or newer is required" +#endif // CUDA_VERSION + +#endif // __CUDACC__ + +// enumerate host compilers we know about +#define THRUST_HOST_COMPILER_UNKNOWN 0 +#define THRUST_HOST_COMPILER_MSVC 1 +#define THRUST_HOST_COMPILER_GCC 2 + +// enumerate host compilers we know about +#define THRUST_DEVICE_COMPILER_UNKNOWN 0 +#define THRUST_DEVICE_COMPILER_MSVC 1 +#define THRUST_DEVICE_COMPILER_GCC 2 +#define THRUST_DEVICE_COMPILER_NVCC 3 + +// figure out which host compiler we're using +// XXX we should move the definition of THRUST_DEPRECATED out of this logic +#if defined(_MSC_VER) +#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_MSVC +#define THRUST_DEPRECATED __declspec(deprecated) +#elif defined(__GNUC__) +#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_GCC +#define THRUST_DEPRECATED __attribute__ ((deprecated)) +#define THRUST_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#else +#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_UNKNOWN +#define THRUST_DEPRECATED +#endif // THRUST_HOST_COMPILER + +// figure out which device compiler we're using +#if defined(__CUDACC__) +#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_NVCC +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_MSVC +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC +#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_GCC +#else +#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_UNKNOWN +#endif + +// is the device compiler capable of compiling omp? +#ifdef _OPENMP +#define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_TRUE +#else +#define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_FALSE +#endif // _OPENMP + +// disable specific MSVC warnings +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && !defined(__CUDA_ARCH__) +#define __THRUST_DISABLE_MSVC_WARNING_BEGIN(x) \ +__pragma(warning(push)) \ +__pragma(warning(disable : x)) +#define __THRUST_DISABLE_MSVC_WARNING_END(x) \ +__pragma(warning(pop)) +#else +#define __THRUST_DISABLE_MSVC_WARNING_BEGIN(x) +#define __THRUST_DISABLE_MSVC_WARNING_END(x) +#endif +#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING(x) \ +__THRUST_DISABLE_MSVC_WARNING_BEGIN(4244 4267) \ +x;\ +__THRUST_DISABLE_MSVC_WARNING_END(4244 4267) +#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN \ +__THRUST_DISABLE_MSVC_WARNING_BEGIN(4244 4267) +#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END \ +__THRUST_DISABLE_MSVC_WARNING_END(4244 4267) +#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL(x) \ +__THRUST_DISABLE_MSVC_WARNING_BEGIN(4800) \ +x;\ +__THRUST_DISABLE_MSVC_WARNING_END(4800) +#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_BEGIN \ +__THRUST_DISABLE_MSVC_WARNING_BEGIN(4800) +#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_END \ +__THRUST_DISABLE_MSVC_WARNING_END(4800) diff --git a/compat/thrust/detail/config/compiler_fence.h b/compat/thrust/detail/config/compiler_fence.h new file mode 100644 index 0000000..f5cbf98 --- /dev/null +++ b/compat/thrust/detail/config/compiler_fence.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// msvc case +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC + +#ifndef _DEBUG + +#include +#pragma intrinsic(_ReadWriteBarrier) +#define __thrust_compiler_fence() _ReadWriteBarrier() +#else + +#define __thrust_compiler_fence() do {} while (0) + +#endif // _DEBUG + +// gcc case +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC + +#if THRUST_GCC_VERSION >= 40200 // atomic built-ins were introduced ~4.2 +#define __thrust_compiler_fence() __sync_synchronize() +#else +// allow the code to compile without any guarantees +#define __thrust_compiler_fence() do {} while (0) +#endif // THRUST_GCC_VERSION + +// unknown case +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_UNKNOWN + +// allow the code to compile without any guarantees +#define __thrust_compiler_fence() do {} while (0) + +#endif + diff --git a/compat/thrust/detail/config/config.h b/compat/thrust/detail/config/config.h new file mode 100644 index 0000000..f3498ac --- /dev/null +++ b/compat/thrust/detail/config/config.h @@ -0,0 +1,36 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file config.h + * \brief Defines platform configuration. + */ + +#pragma once + +// XXX the order of these #includes matters + +#include +#include +// host_system.h & device_system.h must be #included as early as possible +// because other config headers depend on it +#include +#include +#include +#include +#include +#include +#include + diff --git a/compat/thrust/detail/config/debug.h b/compat/thrust/detail/config/debug.h new file mode 100644 index 0000000..56c1bad --- /dev/null +++ b/compat/thrust/detail/config/debug.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef THRUST_DEBUG +# ifndef NDEBUG +# if (DEBUG || _DEBUG) +# define THRUST_DEBUG 1 +# endif // (DEBUG || _DEBUG) +# endif // NDEBUG +#endif // THRUST_DEBUG + +#if THRUST_DEBUG +# ifndef __THRUST_SYNCHRONOUS +# define __THRUST_SYNCHRONOUS 1 +# endif // __THRUST_SYNCHRONOUS +#endif // THRUST_DEBUG + diff --git a/compat/thrust/detail/config/device_system.h b/compat/thrust/detail/config/device_system.h new file mode 100644 index 0000000..a104906 --- /dev/null +++ b/compat/thrust/detail/config/device_system.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// reserve 0 for undefined +#define THRUST_DEVICE_SYSTEM_CUDA 1 +#define THRUST_DEVICE_SYSTEM_OMP 2 +#define THRUST_DEVICE_SYSTEM_TBB 3 +#define THRUST_DEVICE_SYSTEM_CPP 4 + +#ifndef THRUST_DEVICE_SYSTEM +#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CUDA +#endif // THRUST_DEVICE_SYSTEM + +// XXX make the use of THRUST_DEVICE_BACKEND an error in Thrust 1.7 +// XXX eliminate the following in Thrust 1.7 + +#define THRUST_DEVICE_BACKEND_CUDA THRUST_DEVICE_SYSTEM_CUDA +#define THRUST_DEVICE_BACKEND_OMP THRUST_DEVICE_SYSTEM_OMP +#define THRUST_DEVICE_BACKEND_TBB THRUST_DEVICE_SYSTEM_TBB + +#ifdef THRUST_DEVICE_BACKEND +# if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +# pragma message("----------------------------------------------------------------------------------") +# pragma message("| WARNING: THRUST_DEVICE_BACKEND is deprecated; use THRUST_DEVICE_SYSTEM instead |") +# pragma message("----------------------------------------------------------------------------------") +# else +# warning ---------------------------------------------------------------------------------- +# warning | WARNING: THRUST_DEVICE_BACKEND is deprecated; use THRUST_DEVICE_SYSTEM instead | +# warning ---------------------------------------------------------------------------------- +# endif // THRUST_HOST_COMPILER +# undef THRUST_DEVICE_SYSTEM +# define THRUST_DEVICE_SYSTEM THRUST_DEVICE_BACKEND +#endif // THRUST_DEVICE_BACKEND + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +#define __THRUST_DEVICE_SYSTEM_NAMESPACE cuda +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_OMP +#define __THRUST_DEVICE_SYSTEM_NAMESPACE omp +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB +#define __THRUST_DEVICE_SYSTEM_NAMESPACE tbb +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CPP +#define __THRUST_DEVICE_SYSTEM_NAMESPACE cpp +#endif + +#define __THRUST_DEVICE_SYSTEM_ROOT thrust/system/__THRUST_DEVICE_SYSTEM_NAMESPACE + diff --git a/compat/thrust/detail/config/forceinline.h b/compat/thrust/detail/config/forceinline.h new file mode 100644 index 0000000..620769b --- /dev/null +++ b/compat/thrust/detail/config/forceinline.h @@ -0,0 +1,36 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file forceinline.h + * \brief Defines __thrust_forceinline__ + */ + +#pragma once + +#include + +#if defined(__CUDACC__) + +#define __thrust_forceinline__ __forceinline__ + +#else + +// TODO add + +#define __thrust_forceinline__ + +#endif + diff --git a/compat/thrust/detail/config/hd_warning_disable.h b/compat/thrust/detail/config/hd_warning_disable.h new file mode 100644 index 0000000..b993ef2 --- /dev/null +++ b/compat/thrust/detail/config/hd_warning_disable.h @@ -0,0 +1,35 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file hd_warning_disable.h + * \brief Defines __thrust_hd_warning_disable__ + */ + +#pragma once + +#include + +#if defined(__CUDACC__) + +#define __thrust_hd_warning_disable__ \ +#pragma hd_warning_disable +#else + +#define __thrust_hd_warning_disable__ + +#endif + + diff --git a/compat/thrust/detail/config/host_device.h b/compat/thrust/detail/config/host_device.h new file mode 100644 index 0000000..5d0975d --- /dev/null +++ b/compat/thrust/detail/config/host_device.h @@ -0,0 +1,44 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file host_device.h + * \brief Defines __host__ and __device__ and other CUDA-isms + */ + +#pragma once + +#include + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + +#include + +#else + +// since __host__ & __device__ might have already be defined, only +// #define them if not defined already +// XXX this will break if the client does #include later + +#ifndef __host__ +#define __host__ +#endif // __host__ + +#ifndef __device__ +#define __device__ +#endif // __device__ + +#endif + diff --git a/compat/thrust/detail/config/host_system.h b/compat/thrust/detail/config/host_system.h new file mode 100644 index 0000000..fb8edab --- /dev/null +++ b/compat/thrust/detail/config/host_system.h @@ -0,0 +1,58 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// reserve 0 for undefined +#define THRUST_HOST_SYSTEM_CPP 1 +#define THRUST_HOST_SYSTEM_OMP 2 +#define THRUST_HOST_SYSTEM_TBB 3 + +#ifndef THRUST_HOST_SYSTEM +#define THRUST_HOST_SYSTEM THRUST_HOST_SYSTEM_CPP +#endif // THRUST_HOST_SYSTEM + +// XXX make the use of THRUST_HOST_BACKEND an error in Thrust 1.7 +// XXX eliminate the following in Thrust 1.7 + +#define THRUST_HOST_BACKEND_CPP THRUST_HOST_SYSTEM_CPP +#define THRUST_HOST_BACKEND_OMP THRUST_HOST_SYSTEM_OMP +#define THRUST_HOST_BACKEND_TBB THRUST_HOST_SYSTEM_TBB + +#ifdef THRUST_HOST_BACKEND +# if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +# pragma message("------------------------------------------------------------------------------") +# pragma message("| WARNING: THRUST_HOST_BACKEND is deprecated; use THRUST_HOST_SYSTEM instead |") +# pragma message("------------------------------------------------------------------------------") +# else +# warning ------------------------------------------------------------------------------ +# warning | WARNING: THRUST_HOST_BACKEND is deprecated; use THRUST_HOST_SYSTEM instead | +# warning ------------------------------------------------------------------------------ +# endif // THRUST_HOST_COMPILER +# undef THRUST_HOST_SYSTEM +# define THRUST_HOST_SYSTEM THRUST_HOST_BACKEND +#endif // THRUST_HOST_BACKEND + +#if THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_CPP +#define __THRUST_HOST_SYSTEM_NAMESPACE cpp +#elif THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_OMP +#define __THRUST_HOST_SYSTEM_NAMESPACE omp +#elif THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_TBB +#define __THRUST_HOST_SYSTEM_NAMESPACE tbb +#endif + +#define __THRUST_HOST_SYSTEM_ROOT thrust/system/__THRUST_HOST_SYSTEM_NAMESPACE + diff --git a/compat/thrust/detail/config/simple_defines.h b/compat/thrust/detail/config/simple_defines.h new file mode 100644 index 0000000..f9510ee --- /dev/null +++ b/compat/thrust/detail/config/simple_defines.h @@ -0,0 +1,28 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file simple_defines.h + * \brief Primitive macros without dependencies. + */ + +#pragma once + +#define THRUST_UNKNOWN 0 +#define THRUST_FALSE 0 +#define THRUST_TRUE 1 + +#define THRUST_PREVENT_MACRO_SUBSTITUTION + diff --git a/compat/thrust/detail/contiguous_storage.h b/compat/thrust/detail/contiguous_storage.h new file mode 100644 index 0000000..fe72bce --- /dev/null +++ b/compat/thrust/detail/contiguous_storage.h @@ -0,0 +1,129 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +// XXX parameter T is redundant with parameter Alloc +template + class contiguous_storage +{ + private: + typedef thrust::detail::allocator_traits alloc_traits; + + public: + typedef Alloc allocator_type; + typedef T value_type; + typedef typename alloc_traits::pointer pointer; + typedef typename alloc_traits::const_pointer const_pointer; + typedef typename alloc_traits::size_type size_type; + typedef typename alloc_traits::difference_type difference_type; + + // XXX we should bring reference & const_reference into allocator_traits + // at the moment, it's unclear how -- we have nothing analogous to + // rebind_pointer for references + // we either need to add reference_traits or extend the existing + // pointer_traits to support wrapped references + typedef typename Alloc::reference reference; + typedef typename Alloc::const_reference const_reference; + + typedef thrust::detail::normal_iterator iterator; + typedef thrust::detail::normal_iterator const_iterator; + + explicit contiguous_storage(const allocator_type &alloc = allocator_type()); + + explicit contiguous_storage(size_type n, const allocator_type &alloc = allocator_type()); + + ~contiguous_storage(void); + + size_type size(void) const; + + size_type max_size(void) const; + + iterator begin(void); + + const_iterator begin(void) const; + + iterator end(void); + + const_iterator end(void) const; + + reference operator[](size_type n); + + const_reference operator[](size_type n) const; + + allocator_type get_allocator(void) const; + + // note that allocate does *not* automatically call deallocate + void allocate(size_type n); + + void deallocate(void); + + void swap(contiguous_storage &x); + + void default_construct_n(iterator first, size_type n); + + void uninitialized_fill_n(iterator first, size_type n, const value_type &value); + + template + iterator uninitialized_copy(InputIterator first, InputIterator last, iterator result); + + template + iterator uninitialized_copy(thrust::execution_policy &from_system, + InputIterator first, + InputIterator last, + iterator result); + + template + iterator uninitialized_copy_n(InputIterator first, Size n, iterator result); + + template + iterator uninitialized_copy_n(thrust::execution_policy &from_system, + InputIterator first, + Size n, + iterator result); + + void destroy(iterator first, iterator last); + + private: + // XXX we could inherit from this to take advantage of empty base class optimization + allocator_type m_allocator; + + iterator m_begin; + + size_type m_size; + + // disallow assignment + contiguous_storage &operator=(const contiguous_storage &x); +}; // end contiguous_storage + +} // end detail + +template void swap(detail::contiguous_storage &lhs, detail::contiguous_storage &rhs); + +} // end thrust + +#include + diff --git a/compat/thrust/detail/contiguous_storage.inl b/compat/thrust/detail/contiguous_storage.inl new file mode 100644 index 0000000..7e26c26 --- /dev/null +++ b/compat/thrust/detail/contiguous_storage.inl @@ -0,0 +1,245 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include // for use of std::swap in the WAR below + +namespace thrust +{ + +namespace detail +{ + +template + contiguous_storage + ::contiguous_storage(const Alloc &alloc) + :m_allocator(alloc), + m_begin(pointer(static_cast(0))), + m_size(0) +{ + ; +} // end contiguous_storage::contiguous_storage() + +template + contiguous_storage + ::contiguous_storage(size_type n, const Alloc &alloc) + :m_allocator(alloc), + m_begin(pointer(static_cast(0))), + m_size(0) +{ + allocate(n); +} // end contiguous_storage::contiguous_storage() + +template + contiguous_storage + ::~contiguous_storage(void) +{ + deallocate(); +} // end contiguous_storage::~contiguous_storage() + +template + typename contiguous_storage::size_type + contiguous_storage + ::size(void) const +{ + return m_size; +} // end contiguous_storage::size() + +template + typename contiguous_storage::size_type + contiguous_storage + ::max_size(void) const +{ + return alloc_traits::max_size(m_allocator); +} // end contiguous_storage::max_size() + +template + typename contiguous_storage::iterator + contiguous_storage + ::begin(void) +{ + return m_begin; +} // end contiguous_storage::begin() + +template + typename contiguous_storage::const_iterator + contiguous_storage + ::begin(void) const +{ + return m_begin; +} // end contiguous_storage::begin() + +template + typename contiguous_storage::iterator + contiguous_storage + ::end(void) +{ + return m_begin + size(); +} // end contiguous_storage::end() + +template + typename contiguous_storage::const_iterator + contiguous_storage + ::end(void) const +{ + return m_begin + size(); +} // end contiguous_storage::end() + +template + typename contiguous_storage::reference + contiguous_storage + ::operator[](size_type n) +{ + return m_begin[n]; +} // end contiguous_storage::operator[]() + +template + typename contiguous_storage::const_reference + contiguous_storage + ::operator[](size_type n) const +{ + return m_begin[n]; +} // end contiguous_storage::operator[]() + +template + typename contiguous_storage::allocator_type + contiguous_storage + ::get_allocator(void) const +{ + return m_allocator; +} // end contiguous_storage::get_allocator() + +template + void contiguous_storage + ::allocate(size_type n) +{ + if(n > 0) + { + m_begin = iterator(m_allocator.allocate(n)); + m_size = n; + } // end if + else + { + m_begin = iterator(pointer(static_cast(0))); + m_size = 0; + } // end else +} // end contiguous_storage::allocate() + +template + void contiguous_storage + ::deallocate(void) +{ + if(size() > 0) + { + m_allocator.deallocate(m_begin.base(), size()); + m_begin = iterator(pointer(static_cast(0))); + m_size = 0; + } // end if +} // end contiguous_storage::deallocate() + +template + void contiguous_storage + ::swap(contiguous_storage &x) +{ + thrust::swap(m_begin, x.m_begin); + thrust::swap(m_size, x.m_size); + + // XXX WAR nvcc 4.0's "calling a __host__ function from a __host__ __device__ function is not allowed" warning + //thrust::swap(m_allocator, x.m_allocator); + std::swap(m_allocator, x.m_allocator); +} // end contiguous_storage::swap() + +template + void contiguous_storage + ::default_construct_n(iterator first, size_type n) +{ + default_construct_range(m_allocator, first.base(), n); +} // end contiguous_storage::default_construct_n() + +template + void contiguous_storage + ::uninitialized_fill_n(iterator first, size_type n, const value_type &x) +{ + fill_construct_range(m_allocator, first.base(), n, x); +} // end contiguous_storage::uninitialized_fill() + +template + template + typename contiguous_storage::iterator + contiguous_storage + ::uninitialized_copy(thrust::execution_policy &from_system, InputIterator first, InputIterator last, iterator result) +{ + return iterator(copy_construct_range(from_system, m_allocator, first, last, result.base())); +} // end contiguous_storage::uninitialized_copy() + +template + template + typename contiguous_storage::iterator + contiguous_storage + ::uninitialized_copy(InputIterator first, InputIterator last, iterator result) +{ + // XXX assumes InputIterator's associated System is default-constructible + typename thrust::iterator_system::type from_system; + + return iterator(copy_construct_range(from_system, m_allocator, first, last, result.base())); +} // end contiguous_storage::uninitialized_copy() + +template + template + typename contiguous_storage::iterator + contiguous_storage + ::uninitialized_copy_n(thrust::execution_policy &from_system, InputIterator first, Size n, iterator result) +{ + return iterator(copy_construct_range_n(from_system, m_allocator, first, n, result.base())); +} // end contiguous_storage::uninitialized_copy_n() + +template + template + typename contiguous_storage::iterator + contiguous_storage + ::uninitialized_copy_n(InputIterator first, Size n, iterator result) +{ + // XXX assumes InputIterator's associated System is default-constructible + typename thrust::iterator_system::type from_system; + + return iterator(copy_construct_range_n(from_system, m_allocator, first, n, result.base())); +} // end contiguous_storage::uninitialized_copy_n() + +template + void contiguous_storage + ::destroy(iterator first, iterator last) +{ + destroy_range(m_allocator, first.base(), last - first); +} // end contiguous_storage::destroy() + +} // end detail + +template + void swap(detail::contiguous_storage &lhs, detail::contiguous_storage &rhs) +{ + lhs.swap(rhs); +} // end swap() + +} // end thrust + diff --git a/compat/thrust/detail/copy.h b/compat/thrust/detail/copy.h new file mode 100644 index 0000000..8ed3abd --- /dev/null +++ b/compat/thrust/detail/copy.h @@ -0,0 +1,87 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +template + OutputIterator copy(const thrust::detail::execution_policy_base &system, + InputIterator first, + InputIterator last, + OutputIterator result); + +template + OutputIterator copy_n(const thrust::detail::execution_policy_base &system, + InputIterator first, + Size n, + OutputIterator result); + +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result); + +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result); + + +namespace detail +{ + + +template + OutputIterator two_system_copy(thrust::execution_policy &from_system, + thrust::execution_policy &two_system, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template + OutputIterator two_system_copy_n(thrust::execution_policy &from_system, + thrust::execution_policy &two_system, + InputIterator first, + Size n, + OutputIterator result); + + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/copy.inl b/compat/thrust/detail/copy.inl new file mode 100644 index 0000000..9ac4807 --- /dev/null +++ b/compat/thrust/detail/copy.inl @@ -0,0 +1,124 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::copy; + return copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); +} // end copy() + + +template + OutputIterator copy_n(const thrust::detail::execution_policy_base &exec, + InputIterator first, + Size n, + OutputIterator result) +{ + using thrust::system::detail::generic::copy_n; + return copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result); +} // end copy_n() + + +namespace detail +{ + + +template + OutputIterator two_system_copy(thrust::execution_policy &system1, + thrust::execution_policy &system2, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + return thrust::copy(select_system(thrust::detail::derived_cast(thrust::detail::strip_const(system1)), thrust::detail::derived_cast(thrust::detail::strip_const(system2))), first, last, result); +} // end two_system_copy() + + +template + OutputIterator two_system_copy_n(thrust::execution_policy &system1, + thrust::execution_policy &system2, + InputIterator first, + Size n, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + return thrust::copy_n(select_system(thrust::detail::derived_cast(thrust::detail::strip_const(system1)), thrust::detail::derived_cast(thrust::detail::strip_const(system2))), first, n, result); +} // end two_system_copy_n() + + +} // end detail + + +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result) +{ + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::detail::two_system_copy(system1, system2, first, last, result); +} // end copy() + + +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result) +{ + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::detail::two_system_copy_n(system1, system2, first, n, result); +} // end copy_n() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/copy_if.h b/compat/thrust/detail/copy_if.h new file mode 100644 index 0000000..54e1ef4 --- /dev/null +++ b/compat/thrust/detail/copy_if.h @@ -0,0 +1,68 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +template + OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + +template + OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + +template + OutputIterator copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + +template + OutputIterator copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + +} // end thrust + +#include + diff --git a/compat/thrust/detail/copy_if.inl b/compat/thrust/detail/copy_if.inl new file mode 100644 index 0000000..e443bb7 --- /dev/null +++ b/compat/thrust/detail/copy_if.inl @@ -0,0 +1,105 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::copy_if; + return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred); +} // end copy_if() + + +template + OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::copy_if; + return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred); +} // end copy_if() + + +template + OutputIterator copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::copy_if(select_system(system1,system2), first, last, result, pred); +} // end copy_if() + + +template + OutputIterator copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred); +} // end copy_if() + + +} // end thrust + diff --git a/compat/thrust/detail/count.inl b/compat/thrust/detail/count.inl new file mode 100644 index 0000000..d2856ae --- /dev/null +++ b/compat/thrust/detail/count.inl @@ -0,0 +1,80 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file count.inl + * \brief Inline file for count.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + typename thrust::iterator_traits::difference_type + count(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, const EqualityComparable& value) +{ + using thrust::system::detail::generic::count; + return count(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} // end count() + + +template + typename thrust::iterator_traits::difference_type + count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::count_if; + return count_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end count_if() + + +template +typename thrust::iterator_traits::difference_type +count(InputIterator first, InputIterator last, const EqualityComparable& value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::count(select_system(system), first, last, value); +} // end count() + + +template +typename thrust::iterator_traits::difference_type +count_if(InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::count_if(select_system(system), first, last, pred); +} // end count_if() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/cstdint.h b/compat/thrust/detail/cstdint.h new file mode 100644 index 0000000..25d30fd --- /dev/null +++ b/compat/thrust/detail/cstdint.h @@ -0,0 +1,79 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) +#include +#endif + +namespace thrust +{ +namespace detail +{ + +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) + +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + +#else + +typedef ::int8_t int8_t; +typedef ::int16_t int16_t; +typedef ::int32_t int32_t; +typedef ::int64_t int64_t; +typedef ::uint8_t uint8_t; +typedef ::uint16_t uint16_t; +typedef ::uint32_t uint32_t; +typedef ::uint64_t uint64_t; + +#endif + + +// an oracle to tell us how to define intptr_t +template struct divine_intptr_t; +template struct divine_uintptr_t; + +// 32b platforms +template<> struct divine_intptr_t<4> { typedef thrust::detail::int32_t type; }; +template<> struct divine_uintptr_t<4> { typedef thrust::detail::uint32_t type; }; + +// 64b platforms +template<> struct divine_intptr_t<8> { typedef thrust::detail::int64_t type; }; +template<> struct divine_uintptr_t<8> { typedef thrust::detail::uint64_t type; }; + +typedef divine_intptr_t<>::type intptr_t; +typedef divine_uintptr_t<>::type uintptr_t; + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/device_delete.inl b/compat/thrust/detail/device_delete.inl new file mode 100644 index 0000000..dd70d76 --- /dev/null +++ b/compat/thrust/detail/device_delete.inl @@ -0,0 +1,47 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_delete.inl + * \brief Inline file for device_delete.h. + */ + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +// define an empty allocator class to use below +struct device_delete_allocator {}; + +} + +template + void device_delete(device_ptr ptr, + const size_t n) +{ + // we can use device_allocator to destroy the range + thrust::detail::device_delete_allocator a; + thrust::detail::destroy_range(a, ptr, n); + thrust::device_free(ptr); +} // end device_delete() + +} // end thrust + diff --git a/compat/thrust/detail/device_free.inl b/compat/thrust/detail/device_free.inl new file mode 100644 index 0000000..ab8db9f --- /dev/null +++ b/compat/thrust/detail/device_free.inl @@ -0,0 +1,44 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_free.inl + * \brief Inline file for device_free.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + +void device_free(thrust::device_ptr ptr) +{ + using thrust::system::detail::generic::select_system; + + typedef thrust::iterator_system< thrust::device_ptr >::type system; + + // XXX lower to select_system(system) here + system s; + + thrust::free(s, ptr); +} // end device_free() + +} // end thrust + diff --git a/compat/thrust/detail/device_malloc.inl b/compat/thrust/detail/device_malloc.inl new file mode 100644 index 0000000..76d0029 --- /dev/null +++ b/compat/thrust/detail/device_malloc.inl @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_malloc.inl + * \brief Inline file for device_malloc.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +thrust::device_ptr device_malloc(const std::size_t n) +{ + using thrust::system::detail::generic::select_system; + + typedef thrust::iterator_system< thrust::device_ptr >::type system; + + // XXX lower to select_system(system) here + system s; + + return thrust::device_ptr(thrust::malloc(s, n).get()); +} // end device_malloc() + + +template + thrust::device_ptr device_malloc(const std::size_t n) +{ + using thrust::system::detail::generic::select_system; + + typedef thrust::iterator_system< thrust::device_ptr >::type system; + + // XXX lower to select_system(system) here + system s; + + return thrust::device_ptr(thrust::malloc(s,n).get()); +} // end device_malloc() + + +} // end thrust + diff --git a/compat/thrust/detail/device_new.inl b/compat/thrust/detail/device_new.inl new file mode 100644 index 0000000..1f00a97 --- /dev/null +++ b/compat/thrust/detail/device_new.inl @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_new.inl + * \brief Inline file for device_new.h. + */ + +#include +#include +#include + +namespace thrust +{ + +template + device_ptr device_new(device_ptr p, + const size_t n) +{ + // XXX TODO dispatch n null device constructors at p here + // in the meantime, dispatch 1 null host constructor here + // and dispatch n copy constructors + return device_new(p, T(), n); +} // end device_new() + +template + device_ptr device_new(device_ptr p, + const T &exemplar, + const size_t n) +{ + device_ptr result(reinterpret_cast(p.get())); + + // run copy constructors at p here + thrust::uninitialized_fill(result, result + n, exemplar); + + return result; +} // end device_new() + +template + device_ptr device_new(const size_t n) +{ + // call placement new + return device_new(thrust::device_malloc(n)); +} // end device_new() + +} // thrust + diff --git a/compat/thrust/detail/device_ptr.inl b/compat/thrust/detail/device_ptr.inl new file mode 100644 index 0000000..0afe8a1 --- /dev/null +++ b/compat/thrust/detail/device_ptr.inl @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_ptr.inl + * \brief Inline file for device_ptr.h. + */ + +#include +#include +#include + +#include +#include + +namespace thrust +{ + +template + device_ptr device_pointer_cast(T *ptr) +{ + return device_ptr(ptr); +} // end device_pointer_cast() + +template + device_ptr device_pointer_cast(const device_ptr &ptr) +{ + return ptr; +} // end device_pointer_cast() + +// output to ostream +template + std::basic_ostream &operator<<(std::basic_ostream &os, const device_ptr &p) +{ + return os << p.get(); +} // end operator<<() + + +namespace detail +{ + +template + struct is_device_ptr< thrust::device_ptr > + : public true_type +{ +}; // end is_device_ptr + +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) +// XXX WAR MSVC 2005 problem with correctly implementing +// pointer_raw_pointer for device_ptr by specializing it here +template + struct pointer_raw_pointer< thrust::device_ptr > +{ + typedef typename device_ptr::raw_pointer type; +}; // end pointer_raw_pointer +#endif + + +} // end namespace detail +} // end namespace thrust + diff --git a/compat/thrust/detail/device_reference.inl b/compat/thrust/detail/device_reference.inl new file mode 100644 index 0000000..ad5cb76 --- /dev/null +++ b/compat/thrust/detail/device_reference.inl @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_reference.inl + * \brief Inline file for device_reference.h. + */ + +#include +#include + +namespace thrust +{ + +template + template + device_reference & + device_reference + ::operator=(const device_reference &other) +{ + return super_t::operator=(other); +} // end operator=() + +template + device_reference & + device_reference + ::operator=(const value_type &x) +{ + return super_t::operator=(x); +} // end operator=() + +template +__host__ __device__ +void swap(device_reference &a, device_reference &b) +{ + a.swap(b); +} // end swap() + +} // end thrust + diff --git a/compat/thrust/detail/device_vector.inl b/compat/thrust/detail/device_vector.inl new file mode 100644 index 0000000..f6bafba --- /dev/null +++ b/compat/thrust/detail/device_vector.inl @@ -0,0 +1,37 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_vector.inl + * \brief Inline file for device_vector.h. + */ + +#include + +namespace thrust +{ + +template + template + device_vector + ::device_vector(const host_vector &v) + :Parent(v) +{ + ; +} // end device_vector::device_vector() + +} // end namespace thrust + diff --git a/compat/thrust/detail/dispatch/is_trivial_copy.h b/compat/thrust/detail/dispatch/is_trivial_copy.h new file mode 100644 index 0000000..2bedf1f --- /dev/null +++ b/compat/thrust/detail/dispatch/is_trivial_copy.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file trivial_copy.h + * \brief Device implementations for copying memory between host and device. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +namespace dispatch +{ + + +// a trivial copy's iterator's value_types match, +// the iterators themselves are normal_iterators +// and the ToIterator's value_type has_trivial_assign +template + struct is_trivial_copy : + integral_constant< + bool, + is_same< + typename thrust::iterator_value::type, + typename thrust::iterator_value::type + >::value + && is_trivial_iterator::value + && is_trivial_iterator::value + && has_trivial_assign::type>::value + > {}; + +} // end namespace dispatch + +} // end namespace detail + +} // end namespace thrust + diff --git a/compat/thrust/detail/distance.inl b/compat/thrust/detail/distance.inl new file mode 100644 index 0000000..f37595f --- /dev/null +++ b/compat/thrust/detail/distance.inl @@ -0,0 +1,39 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file distance.inl + * \brief Inline file for distance.h + */ + +#include +#include +#include + +namespace thrust +{ + + +template + inline typename thrust::iterator_traits::difference_type + distance(InputIterator first, InputIterator last) +{ + return thrust::system::detail::generic::distance(first, last); +} // end distance() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/equal.inl b/compat/thrust/detail/equal.inl new file mode 100644 index 0000000..ca6fecc --- /dev/null +++ b/compat/thrust/detail/equal.inl @@ -0,0 +1,82 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file equal.inl + * \brief Inline file for equal.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +bool equal(const thrust::detail::execution_policy_base &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) +{ + using thrust::system::detail::generic::equal; + return equal(thrust::detail::derived_cast(thrust::detail::strip_const(system)), first1, last1, first2); +} // end equal() + + +template +bool equal(const thrust::detail::execution_policy_base &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::equal; + return equal(thrust::detail::derived_cast(thrust::detail::strip_const(system)), first1, last1, first2, binary_pred); +} // end equal() + + +template +bool equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::equal(select_system(system1,system2), first1, last1, first2); +} + + +template +bool equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::equal(select_system(system1,system2), first1, last1, first2, binary_pred); +} + + +} // end namespace thrust + diff --git a/compat/thrust/detail/execute_with_allocator.h b/compat/thrust/detail/execute_with_allocator.h new file mode 100644 index 0000000..9d3c1ba --- /dev/null +++ b/compat/thrust/detail/execute_with_allocator.h @@ -0,0 +1,84 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template +__host__ __device__ +ToPointer reinterpret_pointer_cast(FromPointer ptr) +{ + typedef typename thrust::detail::pointer_element::type to_element; + return ToPointer(reinterpret_cast(thrust::raw_pointer_cast(ptr))); +} + + +template class BaseSystem> + struct execute_with_allocator + : BaseSystem > +{ + Allocator &m_alloc; + + execute_with_allocator(Allocator &alloc) + : m_alloc(alloc) + {} + + template + friend thrust::pair + get_temporary_buffer(execute_with_allocator &system, std::ptrdiff_t n) + { + typedef typename thrust::detail::allocator_traits alloc_traits; + typedef typename alloc_traits::void_pointer void_pointer; + typedef typename alloc_traits::size_type size_type; + typedef typename alloc_traits::value_type value_type; + + // how many elements of type value_type do we need to accomodate n elements of type T? + size_type num_elements = thrust::detail::util::divide_ri(sizeof(T) * n, sizeof(value_type)); + + // allocate that many + void_pointer ptr = alloc_traits::allocate(system.m_alloc, num_elements); + + // return the pointer and the number of elements of type T allocated + return thrust::make_pair(thrust::detail::reinterpret_pointer_cast(ptr),n); + } + + template + friend void return_temporary_buffer(execute_with_allocator &system, Pointer p) + { + typedef typename thrust::detail::allocator_traits alloc_traits; + typedef typename alloc_traits::pointer pointer; + + // return the pointer to the allocator + pointer to_ptr = thrust::detail::reinterpret_pointer_cast(p); + alloc_traits::deallocate(system.m_alloc, to_ptr, 0); + } +}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/execution_policy.h b/compat/thrust/detail/execution_policy.h new file mode 100644 index 0000000..28e77f2 --- /dev/null +++ b/compat/thrust/detail/execution_policy.h @@ -0,0 +1,78 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + + +// execution_policy_base serves as a guard against +// inifinite recursion in thrust entry points: +// +// template +// void foo(const thrust::detail::execution_policy_base &s) +// { +// using thrust::system::detail::generic::foo; +// +// foo(thrust::detail::derived_cast(thrust::detail::strip_const(s)); +// } +// +// foo is not recursive when +// 1. DerivedPolicy is derived from thrust::execution_policy below +// 2. generic::foo takes thrust::execution_policy as a parameter +template struct execution_policy_base {}; + + +template +__host__ __device__ +inline execution_policy_base &strip_const(const execution_policy_base &x) +{ + return const_cast&>(x); +} + + +template +__host__ __device__ +inline DerivedPolicy &derived_cast(execution_policy_base &x) +{ + return static_cast(x); +} + + +template +__host__ __device__ +inline const DerivedPolicy &derived_cast(const execution_policy_base &x) +{ + return static_cast(x); +} + + +} // end detail + + +template + struct execution_policy + : thrust::detail::execution_policy_base +{}; + + +} // end thrust + diff --git a/compat/thrust/detail/extrema.inl b/compat/thrust/detail/extrema.inl new file mode 100644 index 0000000..4bcd0bd --- /dev/null +++ b/compat/thrust/detail/extrema.inl @@ -0,0 +1,160 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) +{ + using thrust::system::detail::generic::min_element; + return min_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end min_element() + + +template +ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) +{ + using thrust::system::detail::generic::min_element; + return min_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end min_element() + + +template +ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) +{ + using thrust::system::detail::generic::max_element; + return max_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end max_element() + + +template +ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) +{ + using thrust::system::detail::generic::max_element; + return max_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end max_element() + + +template +thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) +{ + using thrust::system::detail::generic::minmax_element; + return minmax_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end minmax_element() + + +template +thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) +{ + using thrust::system::detail::generic::minmax_element; + return minmax_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end minmax_element() + + +template +ForwardIterator min_element(ForwardIterator first, ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::min_element(select_system(system), first, last); +} // end min_element() + + +template +ForwardIterator min_element(ForwardIterator first, ForwardIterator last, + BinaryPredicate comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::min_element(select_system(system), first, last, comp); +} // end min_element() + + +template +ForwardIterator max_element(ForwardIterator first, ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::max_element(select_system(system), first, last); +} // end max_element() + + +template +ForwardIterator max_element(ForwardIterator first, ForwardIterator last, + BinaryPredicate comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::max_element(select_system(system), first, last, comp); +} // end max_element() + + +template +thrust::pair +minmax_element(ForwardIterator first, ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::minmax_element(select_system(system), first, last); +} // end minmax_element() + + +template +thrust::pair +minmax_element(ForwardIterator first, ForwardIterator last, BinaryPredicate comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::minmax_element(select_system(system), first, last, comp); +} // end minmax_element() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/fill.inl b/compat/thrust/detail/fill.inl new file mode 100644 index 0000000..c60e4a0 --- /dev/null +++ b/compat/thrust/detail/fill.inl @@ -0,0 +1,85 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file fill.inl + * \brief Inline file for fill.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void fill(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value) +{ + using thrust::system::detail::generic::fill; + return fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} // end fill() + + +template + OutputIterator fill_n(const thrust::detail::execution_policy_base &exec, + OutputIterator first, + Size n, + const T &value) +{ + using thrust::system::detail::generic::fill_n; + return fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, value); +} // end fill_n() + + +template + void fill(ForwardIterator first, + ForwardIterator last, + const T &value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + thrust::fill(select_system(system), first, last, value); +} // end fill() + + +template + OutputIterator fill_n(OutputIterator first, + Size n, + const T &value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::fill_n(select_system(system), first, n, value); +} // end fill() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/find.inl b/compat/thrust/detail/find.inl new file mode 100644 index 0000000..465c937 --- /dev/null +++ b/compat/thrust/detail/find.inl @@ -0,0 +1,109 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file find.inl + * \brief Inline file for find.h + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +InputIterator find(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + const T& value) +{ + using thrust::system::detail::generic::find; + return find(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} // end find() + + +template +InputIterator find_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::find_if; + return find_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end find_if() + + +template +InputIterator find_if_not(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::find_if_not; + return find_if_not(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end find_if_not() + + +template +InputIterator find(InputIterator first, + InputIterator last, + const T& value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::find(select_system(system), first, last, value); +} + +template +InputIterator find_if(InputIterator first, + InputIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::find_if(select_system(system), first, last, pred); +} + +template +InputIterator find_if_not(InputIterator first, + InputIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::find_if_not(select_system(system), first, last, pred); +} + + +} // end namespace thrust + diff --git a/compat/thrust/detail/for_each.inl b/compat/thrust/detail/for_each.inl new file mode 100644 index 0000000..7c9dc17 --- /dev/null +++ b/compat/thrust/detail/for_each.inl @@ -0,0 +1,90 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.inl + * \brief Inline file for for_each.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + InputIterator for_each(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + UnaryFunction f) +{ + using thrust::system::detail::generic::for_each; + + return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f); +} + + +template +InputIterator for_each(InputIterator first, + InputIterator last, + UnaryFunction f) +{ + using thrust::system::detail::generic::select_system; + typedef typename thrust::iterator_system::type System; + + System system; + return thrust::for_each(select_system(system), first, last, f); +} // end for_each() + + +template + InputIterator for_each_n(const thrust::detail::execution_policy_base &exec, + InputIterator first, + Size n, + UnaryFunction f) +{ + using thrust::system::detail::generic::for_each_n; + + return for_each_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, f); +} // end for_each_n() + + +template +InputIterator for_each_n(InputIterator first, + Size n, + UnaryFunction f) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + return thrust::for_each_n(select_system(system), first, n, f); +} // end for_each_n() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/function.h b/compat/thrust/detail/function.h new file mode 100644 index 0000000..36b76c2 --- /dev/null +++ b/compat/thrust/detail/function.h @@ -0,0 +1,226 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + struct host_function +{ + // mutable because Function::operator() might be const + mutable Function m_f; + + inline host_function() + : m_f() + {} + + inline host_function(const Function &f) + : m_f(f) + {} + + template + inline Result operator()(Argument &x) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x))); + } + + template + inline Result operator()(const Argument &x) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x))); + } + + template + inline Result operator()(Argument1 &x, Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline Result operator()(const Argument1 &x, Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline Result operator()(const Argument1 &x, const Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline Result operator()(Argument1 &x, const Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } +}; // end host_function + + +template + struct device_function +{ + // mutable because Function::operator() might be const + mutable Function m_f; + + inline __device__ device_function() + : m_f() + {} + + inline __device__ device_function(const Function &f) + : m_f(f) + {} + + template + inline __device__ Result operator()(Argument &x) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x))); + } + + template + inline __device__ Result operator()(const Argument &x) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x))); + } + + template + inline __device__ Result operator()(Argument1 &x, Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline __device__ Result operator()(const Argument1 &x, Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline __device__ Result operator()(const Argument1 &x, const Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline __device__ Result operator()(Argument1 &x, const Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } +}; // end device_function + + +template + struct host_device_function +{ + // mutable because Function::operator() might be const + mutable Function m_f; + + inline __host__ __device__ + host_device_function() + : m_f() + {} + + inline __host__ __device__ + host_device_function(const Function &f) + : m_f(f) + {} + + __thrust_hd_warning_disable__ + template + inline __host__ __device__ + Result operator()(Argument &x) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x))); + } + + template + inline __host__ __device__ Result operator()(const Argument &x) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x))); + } + + template + inline __host__ __device__ Result operator()(Argument1 &x, Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline __host__ __device__ Result operator()(const Argument1 &x, Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline __host__ __device__ Result operator()(const Argument1 &x, const Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } + + template + inline __host__ __device__ Result operator()(Argument1 &x, const Argument2 &y) const + { + // we static cast to Result to handle void Result without error + // in case Function's result is non-void + return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + } +}; // end host_device_function + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional.inl b/compat/thrust/detail/functional.inl new file mode 100644 index 0000000..4024585 --- /dev/null +++ b/compat/thrust/detail/functional.inl @@ -0,0 +1,122 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace thrust +{ + +namespace detail +{ + +template + struct unary_traits_imp; + +template + struct unary_traits_imp +{ + typedef Operation function_type; + typedef const function_type & param_type; + typedef typename Operation::result_type result_type; + typedef typename Operation::argument_type argument_type; +}; // end unary_traits_imp + +template + struct unary_traits_imp +{ + typedef Result (*function_type)(Argument); + typedef Result (*param_type)(Argument); + typedef Result result_type; + typedef Argument argument_type; +}; // end unary_traits_imp + +template + struct binary_traits_imp; + +template + struct binary_traits_imp +{ + typedef Operation function_type; + typedef const function_type & param_type; + typedef typename Operation::result_type result_type; + typedef typename Operation::first_argument_type first_argument_type; + typedef typename Operation::second_argument_type second_argument_type; +}; // end binary_traits_imp + +template + struct binary_traits_imp +{ + typedef Result (*function_type)(Argument1, Argument2); + typedef Result (*param_type)(Argument1, Argument2); + typedef Result result_type; + typedef Argument1 first_argument_type; + typedef Argument2 second_argument_type; +}; // end binary_traits_imp + +} // end detail + +template + struct unary_traits +{ + typedef typename detail::unary_traits_imp::function_type function_type; + typedef typename detail::unary_traits_imp::param_type param_type; + typedef typename detail::unary_traits_imp::result_type result_type; + typedef typename detail::unary_traits_imp::argument_type argument_type; +}; // end unary_traits + +template + struct unary_traits +{ + typedef Result (*function_type)(Argument); + typedef Result (*param_type)(Argument); + typedef Result result_type; + typedef Argument argument_type; +}; // end unary_traits + +template + struct binary_traits +{ + typedef typename detail::binary_traits_imp::function_type function_type; + typedef typename detail::binary_traits_imp::param_type param_type; + typedef typename detail::binary_traits_imp::result_type result_type; + typedef typename detail::binary_traits_imp::first_argument_type first_argument_type; + typedef typename detail::binary_traits_imp::second_argument_type second_argument_type; +}; // end binary_traits + +template + struct binary_traits +{ + typedef Result (*function_type)(Argument1, Argument2); + typedef Result (*param_type)(Argument1, Argument2); + typedef Result result_type; + typedef Argument1 first_argument_type; + typedef Argument2 second_argument_type; +}; // end binary_traits + +template + unary_negate not1(const Predicate &pred) +{ + return unary_negate(pred); +} // end not1() + +template + binary_negate not2(const BinaryPredicate &pred) +{ + return binary_negate(pred); +} // end not2() + +} // end thrust + diff --git a/compat/thrust/detail/functional/actor.h b/compat/thrust/detail/functional/actor.h new file mode 100644 index 0000000..0b95a6b --- /dev/null +++ b/compat/thrust/detail/functional/actor.h @@ -0,0 +1,192 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Portions of this code are derived from +// +// Manjunath Kudlur's Carbon library +// +// and +// +// Based on Boost.Phoenix v1.2 +// Copyright (c) 2001-2002 Joel de Guzman + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template + struct apply_actor +{ + typedef typename Action::template result::type type; +}; + +template + struct actor + : Eval +{ + typedef Eval eval_type; + + __host__ __device__ + actor(void); + + __host__ __device__ + actor(const Eval &base); + + __host__ __device__ + typename apply_actor::type + operator()(void) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8) const; + + template + __host__ __device__ + typename apply_actor >::type + operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8, T9 &_9) const; + + template + __host__ __device__ + typename assign_result::type + operator=(const T &_1) const; +}; // end actor + +// in general, as_actor should turn things into values +template + struct as_actor +{ + typedef value type; + + static inline __host__ __device__ type convert(const T &x) + { + return val(x); + } // end convert() +}; // end as_actor + +// specialization for things which are already actors +template + struct as_actor > +{ + typedef actor type; + + static inline __host__ __device__ const type &convert(const actor &x) + { + return x; + } // end convert() +}; // end as_actor + +template + typename as_actor::type + __host__ __device__ + make_actor(const T &x) +{ + return as_actor::convert(x); +} // end make_actor() + +} // end functional + +// provide specializations for result_of for nullary, unary, and binary invocations of actor +template + struct result_of< + thrust::detail::functional::actor() + > +{ + typedef typename thrust::detail::functional::apply_actor< + thrust::detail::functional::actor, + thrust::null_type + >::type type; +}; // end result_of + +template + struct result_of< + thrust::detail::functional::actor(Arg1) + > +{ + typedef typename thrust::detail::functional::apply_actor< + thrust::detail::functional::actor, + thrust::tuple + >::type type; +}; // end result_of + +template + struct result_of< + thrust::detail::functional::actor(Arg1,Arg2) + > +{ + typedef typename thrust::detail::functional::apply_actor< + thrust::detail::functional::actor, + thrust::tuple + >::type type; +}; // end result_of + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/functional/actor.inl b/compat/thrust/detail/functional/actor.inl new file mode 100644 index 0000000..84347be --- /dev/null +++ b/compat/thrust/detail/functional/actor.inl @@ -0,0 +1,194 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Portions of this code are derived from +// +// Manjunath Kudlur's Carbon library +// +// and +// +// Based on Boost.Phoenix v1.2 +// Copyright (c) 2001-2002 Joel de Guzman + +#include +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ +namespace functional +{ + +template + actor + ::actor(void) + : eval_type() +{} + +template + actor + ::actor(const Eval &base) + : eval_type(base) +{} + +template + typename apply_actor< + typename actor::eval_type, + typename thrust::null_type + >::type + actor + ::operator()(void) const +{ + return eval_type::eval(thrust::null_type()); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0) const +{ + return eval_type::eval(thrust::tie(_0)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1) const +{ + return eval_type::eval(thrust::tie(_0,_1)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7,_8)); +} // end basic_environment::operator() + +template + template + typename apply_actor< + typename actor::eval_type, + typename thrust::tuple + >::type + actor + ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8, T9 &_9) const +{ + return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9)); +} // end basic_environment::operator() + +template + template + typename assign_result::type + actor + ::operator=(const T& _1) const +{ + return do_assign(*this,_1); +} // end actor::operator=() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/argument.h b/compat/thrust/detail/functional/argument.h new file mode 100644 index 0000000..96a20be --- /dev/null +++ b/compat/thrust/detail/functional/argument.h @@ -0,0 +1,75 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Portions of this code are derived from +// +// Manjunath Kudlur's Carbon library +// +// and +// +// Based on Boost.Phoenix v1.2 +// Copyright (c) 2001-2002 Joel de Guzman + +#pragma once + +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template + struct argument_helper +{ + typedef typename thrust::tuple_element::type type; +}; + +template + struct argument_helper +{ + typedef thrust::null_type type; +}; + + +template + class argument +{ + public: + template + struct result + : argument_helper + { + }; + + __host__ __device__ + argument(void){} + + template + __host__ __device__ + typename result::type eval(const Env &e) const + { + return thrust::get(e); + } // end eval() +}; // end argument + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/composite.h b/compat/thrust/detail/functional/composite.h new file mode 100644 index 0000000..1d5fde3 --- /dev/null +++ b/compat/thrust/detail/functional/composite.h @@ -0,0 +1,163 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Portions of this code are derived from +// +// Manjunath Kudlur's Carbon library +// +// and +// +// Based on Boost.Phoenix v1.2 +// Copyright (c) 2001-2002 Joel de Guzman + +#pragma once + +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +// XXX we should just take a single EvalTuple +template + class composite; + +template + class composite< + Eval0, + Eval1, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type + > +{ + public: + template + struct result + { + typedef typename Eval0::template result< + thrust::tuple< + typename Eval1::template result::type + > + >::type type; + }; + + __host__ __device__ + composite(const Eval0 &e0, const Eval1 &e1) + : m_eval0(e0), + m_eval1(e1) + {} + + template + __host__ __device__ + typename result::type + eval(const Env &x) const + { + typename Eval1::template result::type result1 = m_eval1.eval(x); + return m_eval0.eval(thrust::tie(result1)); + } + + private: + Eval0 m_eval0; + Eval1 m_eval1; +}; // end composite + +template + class composite< + Eval0, + Eval1, + Eval2, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type, + thrust::null_type + > +{ + public: + template + struct result + { + typedef typename Eval0::template result< + thrust::tuple< + typename Eval1::template result::type, + typename Eval2::template result::type + > + >::type type; + }; + + __host__ __device__ + composite(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2) + : m_eval0(e0), + m_eval1(e1), + m_eval2(e2) + {} + + template + __host__ __device__ + typename result::type + eval(const Env &x) const + { + typename Eval1::template result::type result1 = m_eval1.eval(x); + typename Eval2::template result::type result2 = m_eval2.eval(x); + return m_eval0.eval(thrust::tie(result1,result2)); + } + + private: + Eval0 m_eval0; + Eval1 m_eval1; + Eval2 m_eval2; +}; // end composite + +template +__host__ __device__ + actor > compose(const Eval0 &e0, const Eval1 &e1) +{ + return actor >(composite(e0,e1)); +} + +template +__host__ __device__ + actor > compose(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2) +{ + return actor >(composite(e0,e1,e2)); +} + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators.h b/compat/thrust/detail/functional/operators.h new file mode 100644 index 0000000..0fc3539 --- /dev/null +++ b/compat/thrust/detail/functional/operators.h @@ -0,0 +1,25 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + diff --git a/compat/thrust/detail/functional/operators/arithmetic_operators.h b/compat/thrust/detail/functional/operators/arithmetic_operators.h new file mode 100644 index 0000000..a11e7ac --- /dev/null +++ b/compat/thrust/detail/functional/operators/arithmetic_operators.h @@ -0,0 +1,394 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +__host__ __device__ +operator-(const actor &_1) +{ + return compose(unary_operator(), _1); +} // end operator-() + +// there's no standard unary_plus functional, so roll an ad hoc one here +template + struct unary_plus + : public thrust::unary_function +{ + __host__ __device__ T operator()(const T &x) const {return +x;} +}; // end unary_plus + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +operator+(const actor &_1) +{ + return compose(unary_operator(), _1); +} // end operator+() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator+(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator+() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator+(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator+() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator+(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator+() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator-(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator-() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator-(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator-() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator-(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator-() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator*(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator*() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator*(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator*() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator*(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator*() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator/(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator/() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator/(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator/() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator/(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator/() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator%(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator%() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator%(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator%() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator%(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator%() + +// there's no standard prefix_increment functional, so roll an ad hoc one here +template + struct prefix_increment + : public thrust::unary_function +{ + __host__ __device__ T& operator()(T &x) const { return ++x; } +}; // end prefix_increment + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +operator++(const actor &_1) +{ + return compose(unary_operator(), _1); +} // end operator++() + +// there's no standard suffix_increment functional, so roll an ad hoc one here +template + struct suffix_increment + : public thrust::unary_function +{ + __host__ __device__ T operator()(T &x) const { return x++; } +}; // end suffix_increment + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +operator++(const actor &_1, int) +{ + return compose(unary_operator(), _1); +} // end operator++() + +// there's no standard prefix_decrement functional, so roll an ad hoc one here +template + struct prefix_decrement + : public thrust::unary_function +{ + __host__ __device__ T& operator()(T &x) const { return --x; } +}; // end prefix_decrement + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +operator--(const actor &_1) +{ + return compose(unary_operator(), _1); +} // end operator--() + +// there's no standard suffix_decrement functional, so roll an ad hoc one here +template + struct suffix_decrement + : public thrust::unary_function +{ + __host__ __device__ T operator()(T &x) const { return x--; } +}; // end suffix_decrement + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +operator--(const actor &_1, int) +{ + return compose(unary_operator(), _1); +} // end operator--() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators/assignment_operator.h b/compat/thrust/detail/functional/operators/assignment_operator.h new file mode 100644 index 0000000..e5d6620 --- /dev/null +++ b/compat/thrust/detail/functional/operators/assignment_operator.h @@ -0,0 +1,72 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +// XXX WAR circular inclusion with this forward declaration +template struct binary_function; + +namespace detail +{ +namespace functional +{ + +// XXX WAR circular inclusion with this forward declaration +template struct as_actor; + +// there's no standard assign functional, so roll an ad hoc one here +template + struct assign + : thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs = rhs; } +}; // end assign + +template + struct assign_result +{ + typedef actor< + composite< + binary_operator, + actor, + typename as_actor::type + > + > type; +}; // end assign_result + +template + __host__ __device__ + typename assign_result::type + do_assign(const actor &_1, const T &_2) +{ + return compose(binary_operator(), + _1, + as_actor::convert(_2)); +} // end do_assign() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators/bitwise_operators.h b/compat/thrust/detail/functional/operators/bitwise_operators.h new file mode 100644 index 0000000..c89c5d4 --- /dev/null +++ b/compat/thrust/detail/functional/operators/bitwise_operators.h @@ -0,0 +1,313 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator&(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator&(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator&(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator|(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator|(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator|(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator^(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator^() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator^(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator^() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator^(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator^() + +// there's no standard bit_not functional, so roll an ad hoc one here +template + struct bit_not + : public thrust::unary_function +{ + __host__ __device__ T operator()(const T &x) const {return ~x;} +}; // end bit_not + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +__host__ __device__ +operator~(const actor &_1) +{ + return compose(unary_operator(), _1); +} // end operator~() + +// there's no standard bit_lshift functional, so roll an ad hoc one here +template + struct bit_lshift + : public thrust::binary_function +{ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs << rhs;} +}; // end bit_lshift + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator<<(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<<() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator<<(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<<() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator<<(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<<() + +// there's no standard bit_rshift functional, so roll an ad hoc one here +template + struct bit_rshift + : public thrust::binary_function +{ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs >> rhs;} +}; // end bit_rshift + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator>>(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>>() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator>>(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>>() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator>>(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>>() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators/compound_assignment_operators.h b/compat/thrust/detail/functional/operators/compound_assignment_operators.h new file mode 100644 index 0000000..ef7389b --- /dev/null +++ b/compat/thrust/detail/functional/operators/compound_assignment_operators.h @@ -0,0 +1,424 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template + struct plus_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs += rhs; } +}; // end plus_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator+=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator+=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator+=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator+=() + +template + struct minus_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs -= rhs; } +}; // end minus_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator-=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator-=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator-=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator-=() + +template + struct multiplies_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs *= rhs; } +}; // end multiplies_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator*=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator*=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator*=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator*=() + +template + struct divides_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs /= rhs; } +}; // end divides_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator/=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator/=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator/=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator/=() + +template + struct modulus_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs %= rhs; } +}; // end modulus_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator%=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator%=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator%=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator%=() + +template + struct bit_and_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs &= rhs; } +}; // end bit_and_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator&=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator&=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&=() + +template + struct bit_or_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs |= rhs; } +}; // end bit_or_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator|=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator|=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|=() + +template + struct bit_xor_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs ^= rhs; } +}; // end bit_xor_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator^=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator^=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator|=() + +template + struct bit_lshift_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs <<= rhs; } +}; // end bit_lshift_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator<<=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<<=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator<<=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<<=() + +template + struct bit_rshift_equal + : public thrust::binary_function +{ + __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs >>= rhs; } +}; // end bit_rshift_equal + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator>>=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>>=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator>>=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>>=() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators/logical_operators.h b/compat/thrust/detail/functional/operators/logical_operators.h new file mode 100644 index 0000000..9c95262 --- /dev/null +++ b/compat/thrust/detail/functional/operators/logical_operators.h @@ -0,0 +1,144 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator&&(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator&&(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator&&(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator||(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator||(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&&() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator||(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator&&() + +template +__host__ __device__ +actor< + composite< + unary_operator, + actor + > +> +operator!(const actor &_1) +{ + return compose(unary_operator(), _1); +} // end operator!() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators/operator_adaptors.h b/compat/thrust/detail/functional/operators/operator_adaptors.h new file mode 100644 index 0000000..d35fe97 --- /dev/null +++ b/compat/thrust/detail/functional/operators/operator_adaptors.h @@ -0,0 +1,115 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +// this thing (which models Eval) is an adaptor for the unary +// functors inside functional.h +template class UnaryOperator> + struct unary_operator +{ + template + struct argument + : thrust::detail::eval_if< + (thrust::tuple_size::value == 0), + thrust::detail::identity_, + thrust::tuple_element<0,Env> + > + { + }; + + template + struct operator_type + { + typedef UnaryOperator< + typename thrust::detail::remove_reference< + typename argument::type + >::type + > type; + }; + + template + struct result + { + typedef typename operator_type::type op_type; + typedef typename op_type::result_type type; + }; + + template + __host__ __device__ + typename result::type eval(const Env &e) const + { + typename operator_type::type op; + return op(thrust::get<0>(e)); + } // end eval() +}; // end unary_operator + +// this thing (which models Eval) is an adaptor for the binary +// functors inside functional.h +template class BinaryOperator> + struct binary_operator +{ + template + struct first_argument + : thrust::detail::eval_if< + (thrust::tuple_size::value == 0), + thrust::detail::identity_, + thrust::tuple_element<0,Env> + > + { + }; + + template + struct operator_type + { + typedef BinaryOperator< + typename thrust::detail::remove_reference< + typename first_argument::type + >::type + > type; + }; + + template + struct result + { + typedef typename operator_type::type op_type; + typedef typename op_type::result_type type; + }; + + template + __host__ __device__ + typename result::type eval(const Env &e) const + { + typename operator_type::type op; + return op(thrust::get<0>(e), thrust::get<1>(e)); + } // end eval() +}; // end binary_operator + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/operators/relational_operators.h b/compat/thrust/detail/functional/operators/relational_operators.h new file mode 100644 index 0000000..6b26534 --- /dev/null +++ b/compat/thrust/detail/functional/operators/relational_operators.h @@ -0,0 +1,323 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator==(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator==() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator==(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator==() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator==(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator==() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator!=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator!=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator!=(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator!=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator!=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator!=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator>(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator>(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator>(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator<(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator<(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator<(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator>=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator>=(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator>=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator>=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + typename as_actor::type + > +> +operator<=(const actor &_1, const T2 &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + typename as_actor::type, + actor + > +> +operator<=(const T1 &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<=() + +template +__host__ __device__ +actor< + composite< + binary_operator, + actor, + actor + > +> +operator<=(const actor &_1, const actor &_2) +{ + return compose(binary_operator(), + make_actor(_1), + make_actor(_2)); +} // end operator<=() + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/placeholder.h b/compat/thrust/detail/functional/placeholder.h new file mode 100644 index 0000000..9acf6da --- /dev/null +++ b/compat/thrust/detail/functional/placeholder.h @@ -0,0 +1,39 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + +template + struct placeholder +{ + typedef actor > type; +}; + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/functional/value.h b/compat/thrust/detail/functional/value.h new file mode 100644 index 0000000..27e2802 --- /dev/null +++ b/compat/thrust/detail/functional/value.h @@ -0,0 +1,80 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Portions of this code are derived from +// +// Manjunath Kudlur's Carbon library +// +// and +// +// Based on Boost.Phoenix v1.2 +// Copyright (c) 2001-2002 Joel de Guzman + +#pragma once + +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace functional +{ + + +template struct actor; + + +template + class value +{ + public: + + template + struct result + { + typedef T type; + }; + + __host__ __device__ + value(const T &arg) + : m_val(arg) + {} + + template + __host__ __device__ + T eval(const Env &) const + { + return m_val; + } + + private: + T m_val; +}; // end value + +template +__host__ __device__ +actor > val(const T &x) +{ + return value(x); +} // end val() + + +} // end functional +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/gather.inl b/compat/thrust/detail/gather.inl new file mode 100644 index 0000000..4edecd0 --- /dev/null +++ b/compat/thrust/detail/gather.inl @@ -0,0 +1,160 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file gather.inl + * \brief Inline file for gather.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator gather(const thrust::detail::execution_policy_base &exec, + InputIterator map_first, + InputIterator map_last, + RandomAccessIterator input_first, + OutputIterator result) +{ + using thrust::system::detail::generic::gather; + return gather(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, input_first, result); +} // end gather() + + +template + OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result) +{ + using thrust::system::detail::generic::gather_if; + return gather_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, stencil, input_first, result); +} // end gather_if() + + +template + OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::gather_if; + return gather_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, stencil, input_first, result, pred); +} // end gather_if() + + +template + OutputIterator gather(InputIterator map_first, + InputIterator map_last, + RandomAccessIterator input_first, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::gather(select_system(system1,system2,system3), map_first, map_last, input_first, result); +} // end gather() + + +template + OutputIterator gather_if(InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::gather_if(select_system(system1,system2,system3,system4), map_first, map_last, stencil, input_first, result); +} // end gather_if() + + +template + OutputIterator gather_if(InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::gather_if(select_system(system1,system2,system3,system4), map_first, map_last, stencil, input_first, result, pred); +} // end gather_if() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/generate.inl b/compat/thrust/detail/generate.inl new file mode 100644 index 0000000..c125804 --- /dev/null +++ b/compat/thrust/detail/generate.inl @@ -0,0 +1,94 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file generate.inl + * \author Jared Hoberock + * \brief Inline file for generate.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void generate(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Generator gen) +{ + using thrust::system::detail::generic::generate; + return generate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, gen); +} // end generate() + + +template + OutputIterator generate_n(const thrust::detail::execution_policy_base &exec, + OutputIterator first, + Size n, + Generator gen) +{ + using thrust::system::detail::generic::generate_n; + return generate_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, gen); +} // end generate_n() + + +template + void generate(ForwardIterator first, + ForwardIterator last, + Generator gen) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::generate(select_system(system), first, last, gen); +} // end generate() + + +template + OutputIterator generate_n(OutputIterator first, + Size n, + Generator gen) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::generate_n(select_system(system), first, n, gen); +} // end generate_n() + + +} // end thrust + diff --git a/compat/thrust/detail/host_vector.inl b/compat/thrust/detail/host_vector.inl new file mode 100644 index 0000000..e5c60ab --- /dev/null +++ b/compat/thrust/detail/host_vector.inl @@ -0,0 +1,37 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file host_vector.inl + * \brief Inline file for host_vector.h. + */ + +#include + +namespace thrust +{ + +template + template + host_vector + ::host_vector(const device_vector &v) + :Parent(v) +{ + ; +} // end host_vector::host_vector() + +} // end namespace thrust + diff --git a/compat/thrust/detail/inner_product.inl b/compat/thrust/detail/inner_product.inl new file mode 100644 index 0000000..f7773d8 --- /dev/null +++ b/compat/thrust/detail/inner_product.inl @@ -0,0 +1,104 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file inner_product.inl + * \brief Inline file for inner_product.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +OutputType inner_product(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init) +{ + using thrust::system::detail::generic::inner_product; + return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init); +} // end inner_product() + + +template +OutputType inner_product(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init, + BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2) +{ + using thrust::system::detail::generic::inner_product; + return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init, binary_op1, binary_op2); +} // end inner_product() + + +template +OutputType +inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputType init) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::inner_product(select_system(system1,system2), first1, last1, first2, init); +} // end inner_product() + + +template +OutputType +inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputType init, + BinaryFunction1 binary_op1, BinaryFunction2 binary_op2) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::inner_product(select_system(system1,system2), first1, last1, first2, init, binary_op1, binary_op2); +} // end inner_product() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/integer_traits.h b/compat/thrust/detail/integer_traits.h new file mode 100644 index 0000000..e4cf5d1 --- /dev/null +++ b/compat/thrust/detail/integer_traits.h @@ -0,0 +1,132 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +template + class integer_traits +{ + public: + static const bool is_integral = false; +}; + +template + class integer_traits_base +{ + public: + static const bool is_integral = true; + static const T const_min = min_val; + static const T const_max = max_val; +}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + + +template<> + class integer_traits + : public std::numeric_limits, + public integer_traits_base +{}; + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/internal_functional.h b/compat/thrust/detail/internal_functional.h new file mode 100644 index 0000000..6d5264a --- /dev/null +++ b/compat/thrust/detail/internal_functional.h @@ -0,0 +1,678 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file internal_functional.inl + * \brief Non-public functionals used to implement algorithm internals. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include // for ::new + +namespace thrust +{ +namespace detail +{ + +// unary_negate does not need to know argument_type +template +struct unary_negate +{ + typedef bool result_type; + + Predicate pred; + + __host__ __device__ + explicit unary_negate(const Predicate& pred) : pred(pred) {} + + template + __host__ __device__ + bool operator()(const T& x) + { + return !bool(pred(x)); + } +}; + +// binary_negate does not need to know first_argument_type or second_argument_type +template +struct binary_negate +{ + typedef bool result_type; + + Predicate pred; + + __host__ __device__ + explicit binary_negate(const Predicate& pred) : pred(pred) {} + + template + __host__ __device__ + bool operator()(const T1& x, const T2& y) + { + return !bool(pred(x,y)); + } +}; + +template + __host__ __device__ + thrust::detail::unary_negate not1(const Predicate &pred) +{ + return thrust::detail::unary_negate(pred); +} + +template + __host__ __device__ + thrust::detail::binary_negate not2(const Predicate &pred) +{ + return thrust::detail::binary_negate(pred); +} + + +// convert a predicate to a 0 or 1 integral value +template +struct predicate_to_integral +{ + Predicate pred; + + __host__ __device__ + explicit predicate_to_integral(const Predicate& pred) : pred(pred) {} + + template + __host__ __device__ + bool operator()(const T& x) + { + return pred(x) ? IntegralType(1) : IntegralType(0); + } +}; + + +// note that detail::equal_to does not force conversion from T2 -> T1 as equal_to does +template +struct equal_to +{ + typedef bool result_type; + + template + __host__ __device__ + bool operator()(const T1& lhs, const T2& rhs) const + { + return lhs == rhs; + } +}; + +// note that equal_to_value does not force conversion from T2 -> T1 as equal_to does +template +struct equal_to_value +{ + T2 rhs; + + equal_to_value(const T2& rhs) : rhs(rhs) {} + + template + __host__ __device__ + bool operator()(const T1& lhs) const + { + return lhs == rhs; + } +}; + +template +struct tuple_binary_predicate +{ + typedef bool result_type; + + __host__ __device__ + tuple_binary_predicate(const Predicate& p) : pred(p) {} + + template + __host__ __device__ + bool operator()(const Tuple& t) const + { + return pred(thrust::get<0>(t), thrust::get<1>(t)); + } + + Predicate pred; +}; + +template +struct tuple_not_binary_predicate +{ + typedef bool result_type; + + __host__ __device__ + tuple_not_binary_predicate(const Predicate& p) : pred(p) {} + + template + __host__ __device__ + bool operator()(const Tuple& t) const + { + return !pred(thrust::get<0>(t), thrust::get<1>(t)); + } + + Predicate pred; +}; + +template + struct host_generate_functor +{ + typedef void result_type; + + __host__ __device__ + host_generate_functor(Generator g) + : gen(g) {} + + // operator() does not take an lvalue reference because some iterators + // produce temporary proxy references when dereferenced. for example, + // consider the temporary tuple of references produced by zip_iterator. + // such temporaries cannot bind to an lvalue reference. + // + // to WAR this, accept a const reference (which is bindable to a temporary), + // and const_cast in the implementation. + // + // XXX change to an rvalue reference upon c++0x (which either a named variable + // or temporary can bind to) + template + __host__ + void operator()(const T &x) + { + // we have to be naughty and const_cast this to get it to work + T &lvalue = const_cast(x); + + // this assigns correctly whether x is a true reference or proxy + lvalue = gen(); + } + + Generator gen; +}; + +template + struct device_generate_functor +{ + typedef void result_type; + + __host__ __device__ + device_generate_functor(Generator g) + : gen(g) {} + + // operator() does not take an lvalue reference because some iterators + // produce temporary proxy references when dereferenced. for example, + // consider the temporary tuple of references produced by zip_iterator. + // such temporaries cannot bind to an lvalue reference. + // + // to WAR this, accept a const reference (which is bindable to a temporary), + // and const_cast in the implementation. + // + // XXX change to an rvalue reference upon c++0x (which either a named variable + // or temporary can bind to) + template + __host__ __device__ + void operator()(const T &x) + { + // we have to be naughty and const_cast this to get it to work + T &lvalue = const_cast(x); + + // this assigns correctly whether x is a true reference or proxy + lvalue = gen(); + } + + Generator gen; +}; + +template + struct generate_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template + struct zipped_binary_op +{ + typedef ResultType result_type; + + __host__ __device__ + zipped_binary_op(BinaryFunction binary_op) + : m_binary_op(binary_op) {} + + template + __host__ __device__ + inline result_type operator()(Tuple t) + { + return m_binary_op(thrust::get<0>(t), thrust::get<1>(t)); + } + + BinaryFunction m_binary_op; +}; + + +template + struct is_non_const_reference + : thrust::detail::and_< + thrust::detail::not_ >, + thrust::detail::is_reference + > +{}; + +template struct is_tuple_of_iterator_references : thrust::detail::false_type {}; + +template + struct is_tuple_of_iterator_references< + thrust::detail::tuple_of_iterator_references< + T1,T2,T3,T4,T5,T6,T7,T8,T9,T10 + > + > + : thrust::detail::true_type +{}; + +// use this enable_if to avoid assigning to temporaries in the transform functors below +// XXX revisit this problem with c++11 perfect forwarding +template + struct enable_if_non_const_reference_or_tuple_of_iterator_references + : thrust::detail::enable_if< + is_non_const_reference::value || is_tuple_of_iterator_references::value + > +{}; + + +template + struct host_unary_transform_functor +{ + typedef void result_type; + + UnaryFunction f; + + host_unary_transform_functor(UnaryFunction f_) + :f(f_) {} + + template + inline __host__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<1,Tuple>::type + >::type + operator()(Tuple t) + { + thrust::get<1>(t) = f(thrust::get<0>(t)); + } +}; + +template + struct device_unary_transform_functor +{ + typedef void result_type; + + UnaryFunction f; + + device_unary_transform_functor(UnaryFunction f_) + :f(f_) {} + + // add __host__ to allow the omp backend compile with nvcc + template + inline __host__ __device__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<1,Tuple>::type + >::type + operator()(Tuple t) + { + thrust::get<1>(t) = f(thrust::get<0>(t)); + } +}; + + +template + struct unary_transform_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template + struct host_binary_transform_functor +{ + BinaryFunction f; + + host_binary_transform_functor(BinaryFunction f_) + :f(f_) + {} + + template + __host__ + void operator()(Tuple t) + { + thrust::get<2>(t) = f(thrust::get<0>(t), thrust::get<1>(t)); + } +}; // end binary_transform_functor + + +template + struct device_binary_transform_functor +{ + BinaryFunction f; + + device_binary_transform_functor(BinaryFunction f_) + :f(f_) + {} + + // add __host__ to allow the omp backend compile with nvcc + template + inline __host__ __device__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<2,Tuple>::type + >::type + operator()(Tuple t) + { + thrust::get<2>(t) = f(thrust::get<0>(t), thrust::get<1>(t)); + } +}; // end binary_transform_functor + + +template + struct binary_transform_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template +struct host_unary_transform_if_functor +{ + UnaryFunction unary_op; + Predicate pred; + + host_unary_transform_if_functor(UnaryFunction unary_op_, Predicate pred_) + : unary_op(unary_op_), pred(pred_) {} + + template + inline __host__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<1,Tuple>::type + >::type + operator()(Tuple t) + { + if(pred(thrust::get<0>(t))) + { + thrust::get<1>(t) = unary_op(thrust::get<0>(t)); + } + } +}; // end host_unary_transform_if_functor + + +template +struct device_unary_transform_if_functor +{ + UnaryFunction unary_op; + Predicate pred; + + device_unary_transform_if_functor(UnaryFunction unary_op_, Predicate pred_) + : unary_op(unary_op_), pred(pred_) {} + + template + inline __host__ __device__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<1,Tuple>::type + >::type + operator()(Tuple t) + { + if(pred(thrust::get<0>(t))) + { + thrust::get<1>(t) = unary_op(thrust::get<0>(t)); + } + } +}; // end device_unary_transform_if_functor + + +template + struct unary_transform_if_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template +struct host_unary_transform_if_with_stencil_functor +{ + UnaryFunction unary_op; + Predicate pred; + + host_unary_transform_if_with_stencil_functor(UnaryFunction _unary_op, Predicate _pred) + : unary_op(_unary_op), pred(_pred) {} + + template + inline __host__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<2,Tuple>::type + >::type + operator()(Tuple t) + { + if(pred(thrust::get<1>(t))) + thrust::get<2>(t) = unary_op(thrust::get<0>(t)); + } +}; // end host_unary_transform_if_with_stencil_functor + + +template +struct device_unary_transform_if_with_stencil_functor +{ + UnaryFunction unary_op; + Predicate pred; + + device_unary_transform_if_with_stencil_functor(UnaryFunction _unary_op, Predicate _pred) + : unary_op(_unary_op), pred(_pred) {} + + // add __host__ to allow the omp backend compile with nvcc + template + inline __host__ __device__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<2,Tuple>::type + >::type + operator()(Tuple t) + { + if(pred(thrust::get<1>(t))) + thrust::get<2>(t) = unary_op(thrust::get<0>(t)); + } +}; // end device_unary_transform_if_with_stencil_functor + + +template + struct unary_transform_if_with_stencil_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template +struct host_binary_transform_if_functor +{ + BinaryFunction binary_op; + Predicate pred; + + host_binary_transform_if_functor(BinaryFunction _binary_op, Predicate _pred) + : binary_op(_binary_op), pred(_pred) {} + + template + inline __host__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<3,Tuple>::type + >::type + operator()(Tuple t) + { + if(pred(thrust::get<2>(t))) + thrust::get<3>(t) = binary_op(thrust::get<0>(t), thrust::get<1>(t)); + } +}; // end host_binary_transform_if_functor + + +template +struct device_binary_transform_if_functor +{ + BinaryFunction binary_op; + Predicate pred; + + device_binary_transform_if_functor(BinaryFunction _binary_op, Predicate _pred) + : binary_op(_binary_op), pred(_pred) {} + + // add __host__ to allow the omp backend compile with nvcc + template + inline __host__ __device__ + typename enable_if_non_const_reference_or_tuple_of_iterator_references< + typename thrust::tuple_element<3,Tuple>::type + >::type + operator()(Tuple t) + { + if(pred(thrust::get<2>(t))) + thrust::get<3>(t) = binary_op(thrust::get<0>(t), thrust::get<1>(t)); + } +}; // end device_binary_transform_if_functor + + +template + struct binary_transform_if_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template + struct host_destroy_functor +{ + __host__ + void operator()(T &x) const + { + x.~T(); + } // end operator()() +}; // end host_destroy_functor + + +template + struct device_destroy_functor +{ + // add __host__ to allow the omp backend to compile with nvcc + __host__ __device__ + void operator()(T &x) const + { + x.~T(); + } // end operator()() +}; // end device_destroy_functor + + +template + struct destroy_functor + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + > +{}; + + +template +struct fill_functor +{ + const T exemplar; + + fill_functor(const T& _exemplar) + : exemplar(_exemplar) {} + + __host__ __device__ + T operator()(void) const + { + return exemplar; + } +}; + + +template + struct uninitialized_fill_functor +{ + T exemplar; + + uninitialized_fill_functor(T x):exemplar(x){} + + __host__ __device__ + void operator()(T &x) + { + ::new(static_cast(&x)) T(exemplar); + } // end operator()() +}; // end uninitialized_fill_functor + + +// this predicate tests two two-element tuples +// we first use a Compare for the first element +// if the first elements are equivalent, we use +// < for the second elements +template + struct compare_first_less_second +{ + compare_first_less_second(Compare c) + : comp(c) {} + + template + __host__ __device__ + bool operator()(T1 lhs, T2 rhs) + { + return comp(thrust::get<0>(lhs), thrust::get<0>(rhs)) || (!comp(thrust::get<0>(rhs), thrust::get<0>(lhs)) && thrust::get<1>(lhs) < thrust::get<1>(rhs)); + } + + Compare comp; +}; // end compare_first_less_second + + +template + struct compare_first +{ + Compare comp; + + compare_first(Compare comp) + : comp(comp) + {} + + template + __host__ __device__ + bool operator()(const Tuple1 &x, const Tuple2 &y) + { + return comp(thrust::raw_reference_cast(thrust::get<0>(x)), thrust::raw_reference_cast(thrust::get<0>(y))); + } +}; // end compare_first + + +} // end namespace detail +} // end namespace thrust + diff --git a/compat/thrust/detail/logical.inl b/compat/thrust/detail/logical.inl new file mode 100644 index 0000000..126a3e3 --- /dev/null +++ b/compat/thrust/detail/logical.inl @@ -0,0 +1,96 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file logical.inl + * \brief Inline file for logical.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +bool all_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::all_of; + return all_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end all_of() + + +template +bool any_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::any_of; + return any_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end any_of() + + +template +bool none_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::none_of; + return none_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end none_of() + + +template +bool all_of(InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::all_of(select_system(system), first, last, pred); +} + + +template +bool any_of(InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::any_of(select_system(system), first, last, pred); +} + + +template +bool none_of(InputIterator first, InputIterator last, Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::none_of(select_system(system), first, last, pred); +} + + +} // end namespace thrust + diff --git a/compat/thrust/detail/malloc_and_free.h b/compat/thrust/detail/malloc_and_free.h new file mode 100644 index 0000000..57b1685 --- /dev/null +++ b/compat/thrust/detail/malloc_and_free.h @@ -0,0 +1,79 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +template +pointer malloc(const thrust::detail::execution_policy_base &exec, std::size_t n) +{ + using thrust::system::detail::generic::malloc; + + // XXX should use a hypothetical thrust::static_pointer_cast here + void *raw_ptr = static_cast(thrust::raw_pointer_cast(malloc(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n))); + + return pointer(raw_ptr); +} + +template +pointer malloc(const thrust::detail::execution_policy_base &exec, std::size_t n) +{ + using thrust::system::detail::generic::malloc; + + T *raw_ptr = static_cast(thrust::raw_pointer_cast(malloc(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n))); + + return pointer(raw_ptr); +} + + +// XXX WAR nvbug 992955 +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC +#if CUDA_VERSION < 5000 + +// cudafe generates unqualified calls to free(int *volatile) +// which get confused with thrust::free +// spoof a thrust::free which simply maps to ::free +inline __host__ __device__ +void free(int *volatile ptr) +{ + ::free(ptr); +} + +#endif // CUDA_VERSION +#endif // THRUST_DEVICE_COMPILER + +template +void free(const thrust::detail::execution_policy_base &exec, Pointer ptr) +{ + using thrust::system::detail::generic::free; + + free(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), ptr); +} + +// XXX consider another form of free which does not take a system argument and +// instead infers the system from the pointer + +} // end namespace thrust + diff --git a/compat/thrust/detail/merge.inl b/compat/thrust/detail/merge.inl new file mode 100644 index 0000000..77f09f5 --- /dev/null +++ b/compat/thrust/detail/merge.inl @@ -0,0 +1,217 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file merge.inl + * \brief Inline file for merge.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator merge(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::merge; + return merge(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); +} // end merge() + + +template + OutputIterator merge(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::merge; + return merge(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); +} // end merge() + + +template + thrust::pair + merge_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::merge_by_key; + return merge_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end merge_by_key() + + +template + thrust::pair + merge_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + Compare comp) +{ + using thrust::system::detail::generic::merge_by_key; + return merge_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end merge_by_key() + + +template + OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::merge(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); +} // end merge() + + +template + OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::merge(select_system(system1,system2,system3), first1, last1, first2, last2, result); +} // end merge() + + +template + thrust::pair + merge_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::merge_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end merge_by_key() + + +template + thrust::pair + merge_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::merge_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end merge_by_key() + + +} // end thrust + diff --git a/compat/thrust/detail/minmax.h b/compat/thrust/detail/minmax.h new file mode 100644 index 0000000..a560ea1 --- /dev/null +++ b/compat/thrust/detail/minmax.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ + + +template +__host__ __device__ + T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp) +{ + return comp(rhs, lhs) ? rhs : lhs; +} // end min() + +template +__host__ __device__ + T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs) +{ + return rhs < lhs ? rhs : lhs; +} // end min() + +template +__host__ __device__ + T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp) +{ + return comp(lhs,rhs) ? rhs : lhs; +} // end max() + +template +__host__ __device__ + T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs) +{ + return lhs < rhs ? rhs : lhs; +} // end max() + + +} // end thrust + diff --git a/compat/thrust/detail/mismatch.inl b/compat/thrust/detail/mismatch.inl new file mode 100644 index 0000000..37ac663 --- /dev/null +++ b/compat/thrust/detail/mismatch.inl @@ -0,0 +1,93 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file mismatch.inl + * \brief Inline file for mismatch.h + */ + + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2) +{ + using thrust::system::detail::generic::mismatch; + return mismatch(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2); +} // end mismatch() + + +template +thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + BinaryPredicate pred) +{ + using thrust::system::detail::generic::mismatch; + return mismatch(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, pred); +} // end mismatch() + + +template +thrust::pair mismatch(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::mismatch(select_system(system1,system2), first1, last1, first2); +} // end mismatch() + + +template +thrust::pair mismatch(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + BinaryPredicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::mismatch(select_system(system1,system2), first1, last1, first2, pred); +} // end mismatch() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/mpl/math.h b/compat/thrust/detail/mpl/math.h new file mode 100644 index 0000000..80adfc1 --- /dev/null +++ b/compat/thrust/detail/mpl/math.h @@ -0,0 +1,174 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file math.h + * \brief Math-related metaprogramming functionality. + */ + + +#pragma once + +namespace thrust +{ + +namespace detail +{ + +namespace mpl +{ + +namespace math +{ + +namespace detail +{ + +// compute the log base-2 of an integer at compile time +template +struct log2 +{ + static const unsigned int value = log2::value; +}; + +template +struct log2<1, Cur> +{ + static const unsigned int value = Cur; +}; + +template +struct log2<0, Cur> +{ + // undefined +}; + +} // end namespace detail + + +template +struct log2 +{ + static const unsigned int value = detail::log2::value; +}; + + +template +struct min +{ + static const T value = (lhs < rhs) ? lhs : rhs; +}; + + +template +struct max +{ + static const T value = (!(lhs < rhs)) ? lhs : rhs; +}; + + +template + struct mul +{ + static const result_type value = x * y; +}; + + +template + struct mod +{ + static const result_type value = x % y; +}; + + +template + struct div +{ + static const result_type value = x / y; +}; + + +template + struct geq +{ + static const bool value = x >= y; +}; + + +template + struct lt +{ + static const bool value = x < y; +}; + + +template + struct gt +{ + static const bool value = x > y; +}; + + +template + struct or_ +{ + static const bool value = (x || y); +}; + + +template + struct bit_and +{ + static const result_type value = x & y; +}; + + +template + struct plus +{ + static const result_type value = x + y; +}; + + +template + struct minus +{ + static const result_type value = x - y; +}; + + +template + struct equal +{ + static const bool value = x == y; +}; + + +template + struct is_odd +{ + static const bool value = x & 1; +}; + + +} // end namespace math + +} // end namespace mpl + +} // end namespace detail + +} // end namespace thrust + diff --git a/compat/thrust/detail/numeric_traits.h b/compat/thrust/detail/numeric_traits.h new file mode 100644 index 0000000..a3bc56c --- /dev/null +++ b/compat/thrust/detail/numeric_traits.h @@ -0,0 +1,130 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +//#include // for intmax_t (not provided on MSVS 2005) + +namespace thrust +{ + +namespace detail +{ + +// XXX good enough for the platforms we care about +typedef long long intmax_t; + +template + struct is_signed + : integral_constant::is_signed> +{}; // end is_signed + + +template + struct num_digits + : eval_if< + std::numeric_limits::is_specialized, + integral_constant< + int, + std::numeric_limits::digits + >, + integral_constant< + int, + sizeof(T) * std::numeric_limits::digits - (is_signed::value ? 1 : 0) + > + >::type +{}; // end num_digits + + +template + struct integer_difference + //: eval_if< + // sizeof(Integer) >= sizeof(intmax_t), + // eval_if< + // is_signed::value, + // identity_, + // identity_ + // >, + // eval_if< + // sizeof(Integer) < sizeof(std::ptrdiff_t), + // identity_, + // identity_ + // > + // > +{ + private: + // XXX workaround a pedantic warning in old versions of g++ + // which complains about &&ing with a constant value + template + struct and_ + { + static const bool value = false; + }; + + template + struct and_ + { + static const bool value = y; + }; + + public: + typedef typename + eval_if< + and_< + std::numeric_limits::is_signed, + // digits is the number of no-sign bits + (!std::numeric_limits::is_bounded || (int(std::numeric_limits::digits) + 1 >= num_digits::value)) + >::value, + identity_, + eval_if< + int(std::numeric_limits::digits) + 1 < num_digits::value, + identity_, + eval_if< + int(std::numeric_limits::digits) + 1 < num_digits::value, + identity_, + identity_ + > + > + >::type type; +}; // end integer_difference + + +template + struct numeric_difference + : eval_if< + is_integral::value, + integer_difference, + identity_ + > +{}; // end numeric_difference + + +template +__host__ __device__ +typename numeric_difference::type +numeric_distance(Number x, Number y) +{ + typedef typename numeric_difference::type difference_type; + return difference_type(y) - difference_type(x); +} // end numeric_distance + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/overlapped_copy.h b/compat/thrust/detail/overlapped_copy.h new file mode 100644 index 0000000..a5540b8 --- /dev/null +++ b/compat/thrust/detail/overlapped_copy.h @@ -0,0 +1,131 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + OutputIterator sequential_copy(InputIterator first, + InputIterator last, + OutputIterator result) +{ + for(; first != last; ++first, ++result) + { + *result = *first; + } // end for + + return result; +} // end sequential_copy() + + +template + BidirectionalIterator2 sequential_copy_backward(BidirectionalIterator1 first, + BidirectionalIterator1 last, + BidirectionalIterator2 result) +{ + // yes, we preincrement + // the ranges are open on the right, i.e. [first, last) + while(first != last) + { + *--result = *--last; + } // end while + + return result; +} // end sequential_copy_backward() + + +namespace dispatch +{ + + +template + RandomAccessIterator2 overlapped_copy(thrust::system::cpp::detail::execution_policy &, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + RandomAccessIterator2 result) +{ + if(first < last && first <= result && result < last) + { + // result lies in [first, last) + // it's safe to use std::copy_backward here + thrust::detail::sequential_copy_backward(first, last, result + (last - first)); + result += (last - first); + } // end if + else + { + // result + (last - first) lies in [first, last) + // it's safe to use sequential_copy here + result = thrust::detail::sequential_copy(first, last, result); + } // end else + + return result; +} // end overlapped_copy() + + +template + RandomAccessIterator2 overlapped_copy(thrust::execution_policy &exec, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + RandomAccessIterator2 result) +{ + typedef typename thrust::iterator_value::type value_type; + + // make a temporary copy of [first,last), and copy into it first + thrust::detail::temporary_array temp(exec, first, last); + return thrust::copy(exec, temp.begin(), temp.end(), result); +} // end overlapped_copy() + +} // end dispatch + + +template + RandomAccessIterator2 overlapped_copy(RandomAccessIterator1 first, + RandomAccessIterator1 last, + RandomAccessIterator2 result) +{ + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + typedef typename thrust::detail::minimum_system::type System; + + // XXX presumes System is default constructible + System system; + + return thrust::detail::dispatch::overlapped_copy(system, first, last, result); +} // end overlapped_copy() + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/pair.inl b/compat/thrust/detail/pair.inl new file mode 100644 index 0000000..776bdc2 --- /dev/null +++ b/compat/thrust/detail/pair.inl @@ -0,0 +1,225 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace thrust +{ + +template + pair + ::pair(void) + :first(),second() +{ + ; +} // end pair::pair() + + +template + pair + ::pair(const T1 &x, const T2 &y) + :first(x),second(y) +{ + ; +} // end pair::pair() + + +template + template + pair + ::pair(const pair &p) + :first(p.first),second(p.second) +{ + ; +} // end pair::pair() + + +template + template + pair + ::pair(const std::pair &p) + :first(p.first),second(p.second) +{ + ; +} // end pair::pair() + + +template + inline __host__ __device__ + void pair + ::swap(thrust::pair &p) +{ + using thrust::swap; + + swap(first, p.first); + swap(second, p.second); +} // end pair::swap() + + +template + inline __host__ __device__ + bool operator==(const pair &x, const pair &y) +{ + return x.first == y.first && x.second == y.second; +} // end operator==() + + +template + inline __host__ __device__ + bool operator<(const pair &x, const pair &y) +{ + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} // end operator<() + + +template + inline __host__ __device__ + bool operator!=(const pair &x, const pair &y) +{ + return !(x == y); +} // end operator==() + + +template + inline __host__ __device__ + bool operator>(const pair &x, const pair &y) +{ + return y < x; +} // end operator<() + + +template + inline __host__ __device__ + bool operator<=(const pair &x, const pair &y) +{ + return !(y < x); +} // end operator<=() + + +template + inline __host__ __device__ + bool operator>=(const pair &x, const pair &y) +{ + return !(x < y); +} // end operator>=() + + +template + inline __host__ __device__ + void swap(pair &x, pair &y) +{ + return x.swap(y); +} // end swap() + + +template + inline __host__ __device__ + pair make_pair(T1 x, T2 y) +{ + return pair(x,y); +} // end make_pair() + + +// specializations of tuple_element for pair +template + struct tuple_element<0, pair > +{ + typedef T1 type; +}; // end tuple_element + +template + struct tuple_element<1, pair > +{ + typedef T2 type; +}; // end tuple_element + + +// specialization of tuple_size for pair +template + struct tuple_size< pair > +{ + static const unsigned int value = 2; +}; // end tuple_size + + + +namespace detail +{ + + +template struct pair_get {}; + +template + struct pair_get<0, Pair> +{ + inline __host__ __device__ + const typename tuple_element<0, Pair>::type & + operator()(const Pair &p) const + { + return p.first; + } // end operator()() + + inline __host__ __device__ + typename tuple_element<0, Pair>::type & + operator()(Pair &p) const + { + return p.first; + } // end operator()() +}; // end pair_get + + +template + struct pair_get<1, Pair> +{ + inline __host__ __device__ + const typename tuple_element<1, Pair>::type & + operator()(const Pair &p) const + { + return p.second; + } // end operator()() + + inline __host__ __device__ + typename tuple_element<1, Pair>::type & + operator()(Pair &p) const + { + return p.second; + } // end operator()() +}; // end pair_get + +} // end detail + + + +template + inline __host__ __device__ + typename tuple_element >::type & + get(pair &p) +{ + return detail::pair_get >()(p); +} // end get() + +template + inline __host__ __device__ + const typename tuple_element >::type & + get(const pair &p) +{ + return detail::pair_get >()(p); +} // end get() + + +} // end thrust + diff --git a/compat/thrust/detail/partition.inl b/compat/thrust/detail/partition.inl new file mode 100644 index 0000000..19ef08a --- /dev/null +++ b/compat/thrust/detail/partition.inl @@ -0,0 +1,398 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file partition.inl + * \brief Inline file for partition.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + ForwardIterator partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::partition; + return partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end partition() + + +template + ForwardIterator partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + using thrust::system::detail::generic::partition; + return partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); +} // end partition() + + +template + thrust::pair + partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::partition_copy; + return partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, out_true, out_false, pred); +} // end partition_copy() + + +template + thrust::pair + partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::partition_copy; + return partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, out_true, out_false, pred); +} // end partition_copy() + + +template + ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::stable_partition; + return stable_partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end stable_partition() + + +template + ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + using thrust::system::detail::generic::stable_partition; + return stable_partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); +} // end stable_partition() + + +template + thrust::pair + stable_partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::stable_partition_copy; + return stable_partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, out_true, out_false, pred); +} // end stable_partition_copy() + + +template + thrust::pair + stable_partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::stable_partition_copy; + return stable_partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, out_true, out_false, pred); +} // end stable_partition_copy() + + +template + ForwardIterator partition_point(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::partition_point; + return partition_point(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end partition_point() + + +template + bool is_partitioned(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::is_partitioned; + return is_partitioned(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end is_partitioned() + + +template + ForwardIterator partition(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::partition(select_system(system), first, last, pred); +} // end partition() + + +template + ForwardIterator partition(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::partition(select_system(system1,system2), first, last, stencil, pred); +} // end partition() + + +template + ForwardIterator stable_partition(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::stable_partition(select_system(system), first, last, pred); +} // end stable_partition() + + +template + ForwardIterator stable_partition(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::stable_partition(select_system(system1,system2), first, last, stencil, pred); +} // end stable_partition() + + +template + thrust::pair + partition_copy(InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::partition_copy(select_system(system1,system2,system3), first, last, out_true, out_false, pred); +} // end partition_copy() + + +template + thrust::pair + partition_copy(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::partition_copy(select_system(system1,system2,system3,system4), first, last, stencil, out_true, out_false, pred); +} // end partition_copy() + + +template + thrust::pair + stable_partition_copy(InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::stable_partition_copy(select_system(system1,system2,system3), first, last, out_true, out_false, pred); +} // end stable_partition_copy() + + +template + thrust::pair + stable_partition_copy(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::stable_partition_copy(select_system(system1,system2,system3,system4), first, last, stencil, out_true, out_false, pred); +} // end stable_partition_copy() + + +template + ForwardIterator partition_point(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::partition_point(select_system(system), first, last, pred); +} // end partition_point() + + +template + bool is_partitioned(InputIterator first, + InputIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::is_partitioned(select_system(system), first, last, pred); +} // end is_partitioned() + + +} // end thrust + diff --git a/compat/thrust/detail/pointer.h b/compat/thrust/detail/pointer.h new file mode 100644 index 0000000..bc97939 --- /dev/null +++ b/compat/thrust/detail/pointer.h @@ -0,0 +1,184 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +// declare pointer with default values of template parameters +template class pointer; + +} // end thrust + + +// specialize std::iterator_traits to avoid problems with the name of +// pointer's constructor shadowing its nested pointer type +// do this before pointer is defined so the specialization is correctly +// used inside the definition +namespace std +{ + +template + struct iterator_traits > +{ + private: + typedef thrust::pointer ptr; + + public: + typedef typename ptr::iterator_category iterator_category; + typedef typename ptr::value_type value_type; + typedef typename ptr::difference_type difference_type; + // XXX implement this type (the result of operator->) later + typedef void pointer; + typedef typename ptr::reference reference; +}; // end iterator_traits + +} // end std + + +namespace thrust +{ + +namespace detail +{ + +// this metafunction computes the type of iterator_adaptor thrust::pointer should inherit from +template + struct pointer_base +{ + // void pointers should have no element type + // note that we remove_cv from the Element type to get the value_type + typedef typename thrust::detail::eval_if< + thrust::detail::is_void::type>::value, + thrust::detail::identity_, + thrust::detail::remove_cv + >::type value_type; + + // if no Derived type is given, just use pointer + typedef typename thrust::detail::eval_if< + thrust::detail::is_same::value, + thrust::detail::identity_ >, + thrust::detail::identity_ + >::type derived_type; + + // void pointers should have no reference type + // if no Reference type is given, just use reference + typedef typename thrust::detail::eval_if< + thrust::detail::is_void::type>::value, + thrust::detail::identity_, + thrust::detail::eval_if< + thrust::detail::is_same::value, + thrust::detail::identity_ >, + thrust::detail::identity_ + > + >::type reference_arg; + + typedef thrust::iterator_adaptor< + derived_type, // pass along the type of our Derived class to iterator_adaptor + Element *, // we adapt a raw pointer + value_type, // the value type + Tag, // system tag + thrust::random_access_traversal_tag, // pointers have random access traversal + reference_arg, // pass along our Reference type + std::ptrdiff_t + > type; +}; // end pointer_base + + +} // end detail + + +// the base type for all of thrust's tagged pointers. +// for reasonable pointer-like semantics, derived types should reimplement the following: +// 1. no-argument constructor +// 2. constructor from OtherElement * +// 3. constructor from OtherPointer related by convertibility +// 4. assignment from OtherPointer related by convertibility +// These should just call the corresponding members of pointer. +template + class pointer + : public thrust::detail::pointer_base::type +{ + private: + typedef typename thrust::detail::pointer_base::type super_t; + + typedef typename thrust::detail::pointer_base::derived_type derived_type; + + // friend iterator_core_access to give it access to dereference + friend class thrust::iterator_core_access; + + __host__ __device__ + typename super_t::reference dereference() const; + + // don't provide access to this part of super_t's interface + using super_t::base; + using typename super_t::base_type; + + public: + typedef typename super_t::base_type raw_pointer; + + // constructors + + __host__ __device__ + pointer(); + + // OtherValue shall be convertible to Value + // XXX consider making the pointer implementation a template parameter which defaults to Element * + template + __host__ __device__ + explicit pointer(OtherElement *ptr); + + // OtherPointer's element_type shall be convertible to Element + // OtherPointer's system shall be convertible to Tag + template + __host__ __device__ + pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type * = 0); + + // assignment + + // OtherPointer's element_type shall be convertible to Element + // OtherPointer's system shall be convertible to Tag + template + __host__ __device__ + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + derived_type & + >::type + operator=(const OtherPointer &other); + + // observers + + __host__ __device__ + Element *get() const; +}; // end pointer + +} // end thrust + +#include + diff --git a/compat/thrust/detail/pointer.inl b/compat/thrust/detail/pointer.inl new file mode 100644 index 0000000..1d066b0 --- /dev/null +++ b/compat/thrust/detail/pointer.inl @@ -0,0 +1,143 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + + +namespace thrust +{ + + +template + pointer + ::pointer() + : super_t(static_cast(0)) +{} // end pointer::pointer + + +template + template + pointer + ::pointer(OtherElement *other) + : super_t(other) +{} // end pointer::pointer + + +template + template + pointer + ::pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type *) + : super_t(thrust::detail::pointer_traits::get(other)) +{} // end pointer::pointer + + +template + template + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + typename pointer::derived_type & + >::type + pointer + ::operator=(const OtherPointer &other) +{ + super_t::base_reference() = thrust::detail::pointer_traits::get(other); + return static_cast(*this); +} // end pointer::operator= + + +template + typename pointer::super_t::reference + pointer + ::dereference() const +{ + return typename super_t::reference(static_cast(*this)); +} // end pointer::dereference + + +template + Element *pointer + ::get() const +{ + return super_t::base(); +} // end pointer::get + + +namespace detail +{ + +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) +// XXX WAR MSVC 2005 problem with correctly implementing +// pointer_raw_pointer for pointer by specializing it here +template + struct pointer_raw_pointer< thrust::pointer > +{ + typedef typename pointer::raw_pointer type; +}; // end pointer_raw_pointer +#endif + + +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION < 40200) +// XXX WAR g++-4.1 problem with correctly implementing +// pointer_element for pointer by specializing it here +template + struct pointer_element< thrust::pointer > +{ + typedef Element type; +}; // end pointer_element + +template + struct pointer_element< thrust::pointer > + : pointer_element< thrust::pointer > +{}; // end pointer_element + +template + struct pointer_element< thrust::pointer > + : pointer_element< thrust::pointer > +{}; // end pointer_element + + + +// XXX WAR g++-4.1 problem with correctly implementing +// rebind_pointer for pointer by specializing it here +template + struct rebind_pointer, NewElement> +{ + // XXX note we don't attempt to rebind the pointer's Reference type (or Derived) + typedef thrust::pointer type; +}; + +template + struct rebind_pointer, NewElement> + : rebind_pointer, NewElement> +{}; + +template + struct rebind_pointer, NewElement> + : rebind_pointer, NewElement> +{}; +#endif + +} // end namespace detail + + +} // end thrust + diff --git a/compat/thrust/detail/range/tail_flags.h b/compat/thrust/detail/range/tail_flags.h new file mode 100644 index 0000000..06fd9f8 --- /dev/null +++ b/compat/thrust/detail/range/tail_flags.h @@ -0,0 +1,124 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template::type>, + typename IndexType = typename thrust::iterator_difference::type> + class tail_flags +{ + private: + struct tail_flag_functor + { + BinaryPredicate binary_pred; // this must be the first member for performance reasons + IndexType n; + + typedef bool result_type; + + tail_flag_functor(IndexType n) + : binary_pred(), n(n) + {} + + tail_flag_functor(IndexType n, BinaryPredicate binary_pred) + : binary_pred(binary_pred), n(n) + {} + + template + __host__ __device__ __thrust_forceinline__ + result_type operator()(const Tuple &t) + { + const IndexType i = thrust::get<0>(t); + + // note that we do not dereference the tuple's 2nd element when i >= n + // and therefore do not dereference a bad location at the boundary + return (i == (n - 1) || !binary_pred(thrust::get<1>(t), thrust::get<2>(t))); + } + }; + + typedef thrust::counting_iterator counting_iterator; + + public: + typedef thrust::transform_iterator< + tail_flag_functor, + thrust::zip_iterator > + > iterator; + + tail_flags(RandomAccessIterator first, RandomAccessIterator last) + : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first + 1)), + tail_flag_functor(last - first))), + m_end(m_begin + (last - first)) + {} + + tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) + : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first + 1)), + tail_flag_functor(last - first, binary_pred))), + m_end(m_begin + (last - first)) + {} + + iterator begin() const + { + return m_begin; + } + + iterator end() const + { + return m_end; + } + + template + typename iterator::reference operator[](OtherIndex i) + { + return *(begin() + i); + } + + private: + iterator m_begin, m_end; +}; + + +template + tail_flags + make_tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) +{ + return tail_flags(first, last, binary_pred); +} + + +template + tail_flags + make_tail_flags(RandomAccessIterator first, RandomAccessIterator last) +{ + return tail_flags(first, last); +} + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/raw_pointer_cast.h b/compat/thrust/detail/raw_pointer_cast.h new file mode 100644 index 0000000..05e1e6b --- /dev/null +++ b/compat/thrust/detail/raw_pointer_cast.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +template + inline __host__ __device__ typename thrust::detail::pointer_traits::raw_pointer + raw_pointer_cast(const Pointer &ptr) +{ + return thrust::detail::pointer_traits::get(ptr); +} // end raw_pointer_cast() + +} // end thrust + diff --git a/compat/thrust/detail/raw_reference_cast.h b/compat/thrust/detail/raw_reference_cast.h new file mode 100644 index 0000000..1ffd7e5 --- /dev/null +++ b/compat/thrust/detail/raw_reference_cast.h @@ -0,0 +1,121 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +__THRUST_DEFINE_HAS_NESTED_TYPE(is_wrapped_reference, wrapped_reference_hint) + +namespace raw_reference_detail +{ + +template + struct raw_reference + : add_reference +{}; + + +// XXX consider making raw_reference an error + + +template + struct raw_reference< + T, + typename thrust::detail::enable_if< + is_wrapped_reference< + typename remove_cv::type + >::value + >::type + > +{ + typedef typename add_reference< + typename pointer_element::type + >::type type; +}; + +} // end raw_reference_ns + +template + struct raw_reference : + raw_reference_detail::raw_reference +{}; + + +// wrapped reference-like things which aren't strictly wrapped references +// (e.g. tuples of wrapped references) are considered unwrappable +template + struct is_unwrappable + : is_wrapped_reference +{}; + + +template + struct enable_if_unwrappable + : enable_if< + is_unwrappable::value, + Result + > +{}; + + +} // end detail + + +template + inline __host__ __device__ typename detail::raw_reference::type raw_reference_cast(T &ref) +{ + return *thrust::raw_pointer_cast(&ref); +} // end raw_reference_cast + + +template + inline __host__ __device__ typename detail::raw_reference::type raw_reference_cast(const T &ref) +{ + return *thrust::raw_pointer_cast(&ref); +} // end raw_reference_cast + + +template< + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> +inline __host__ __device__ +typename detail::enable_if_unwrappable< + thrust::detail::tuple_of_iterator_references, + typename detail::raw_reference< + thrust::detail::tuple_of_iterator_references + >::type +>::type +raw_reference_cast(detail::tuple_of_iterator_references t); + + +} // end thrust + +#include + diff --git a/compat/thrust/detail/raw_reference_cast.inl b/compat/thrust/detail/raw_reference_cast.inl new file mode 100644 index 0000000..ea619ec --- /dev/null +++ b/compat/thrust/detail/raw_reference_cast.inl @@ -0,0 +1,277 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +// specialize is_unwrappable +// a tuple is_unwrappable if any of its elements is_unwrappable +template< + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + struct is_unwrappable< + thrust::tuple + > + : or_< + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable + > +{}; + + +// specialize is_unwrappable +// a tuple_of_iterator_references is_unwrappable if any of its elements is_unwrappable +template< + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + struct is_unwrappable< + thrust::detail::tuple_of_iterator_references + > + : or_< + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable, + is_unwrappable + > +{}; + + +namespace raw_reference_detail +{ + +// unlike raw_reference, +// raw_reference_tuple_helper needs to return a value +// when it encounters one, rather than a reference +// upon encountering tuple, recurse +// +// we want the following behavior: +// 1. T -> T +// 2. T& -> T& +// 3. null_type -> null_type +// 4. reference -> T& +// 5. tuple_of_iterator_references -> tuple_of_iterator_references::type> + + +// wrapped references are unwrapped using raw_reference, otherwise, return T +template + struct raw_reference_tuple_helper + : eval_if< + is_unwrappable< + typename remove_cv::type + >::value, + raw_reference, + identity_ + > +{}; + + +// recurse on tuples +template < + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + struct raw_reference_tuple_helper< + thrust::tuple + > +{ + typedef thrust::tuple< + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type + > type; +}; + + +template < + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + struct raw_reference_tuple_helper< + thrust::detail::tuple_of_iterator_references + > +{ + typedef thrust::detail::tuple_of_iterator_references< + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type, + typename raw_reference_tuple_helper::type + > type; +}; + + +} // end raw_reference_detail + + +// if a tuple "tuple_type" is_unwrappable, +// then the raw_reference of tuple_type is a tuple of its members' raw_references +// else the raw_reference of tuple_type is tuple_type & +template < + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + struct raw_reference< + thrust::tuple + > +{ + private: + typedef thrust::tuple tuple_type; + + public: + typedef typename eval_if< + is_unwrappable::value, + raw_reference_detail::raw_reference_tuple_helper, + add_reference + >::type type; +}; + + +template < + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + struct raw_reference< + thrust::detail::tuple_of_iterator_references + > +{ + private: + typedef detail::tuple_of_iterator_references tuple_type; + + public: + typedef typename raw_reference_detail::raw_reference_tuple_helper::type type; + + // XXX figure out why is_unwrappable seems to be broken for tuple_of_iterator_references + //typedef typename eval_if< + // is_unwrappable::value, + // raw_reference_detail::raw_reference_tuple_helper, + // add_reference + //>::type type; +}; + + +struct raw_reference_caster +{ + template + __host__ __device__ + typename detail::raw_reference::type operator()(T &ref) + { + return thrust::raw_reference_cast(ref); + } + + template + __host__ __device__ + typename detail::raw_reference::type operator()(const T &ref) + { + return thrust::raw_reference_cast(ref); + } + + template< + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 + > + __host__ __device__ + typename detail::raw_reference< + thrust::detail::tuple_of_iterator_references + >::type + operator()(thrust::detail::tuple_of_iterator_references t, + typename enable_if< + is_unwrappable >::value + >::type * = 0) + { + return thrust::raw_reference_cast(t); + } +}; // end raw_reference_caster + + +} // end detail + + +template< + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> +__host__ __device__ +typename detail::enable_if_unwrappable< + thrust::detail::tuple_of_iterator_references, + typename detail::raw_reference< + thrust::detail::tuple_of_iterator_references + >::type +>::type +raw_reference_cast(thrust::detail::tuple_of_iterator_references t) +{ + thrust::detail::raw_reference_caster f; + + // note that we pass raw_reference_tuple_helper, not raw_reference as the unary metafunction + // the subtle difference is important + return thrust::detail::tuple_host_device_transform(t, f); +} // end raw_reference_cast + + +} // end thrust + diff --git a/compat/thrust/detail/reduce.inl b/compat/thrust/detail/reduce.inl new file mode 100644 index 0000000..ba84423 --- /dev/null +++ b/compat/thrust/detail/reduce.inl @@ -0,0 +1,261 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.inl + * \brief Inline file for reduce.h. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + typename thrust::iterator_traits::value_type + reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last) +{ + using thrust::system::detail::generic::reduce; + return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end reduce() + + +template + T reduce(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + T init) +{ + using thrust::system::detail::generic::reduce; + return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init); +} // end reduce() + + +template + T reduce(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + T init, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::reduce; + return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, binary_op); +} // end reduce() + + +template + thrust::pair + reduce_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output) +{ + using thrust::system::detail::generic::reduce_by_key; + return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output); +} // end reduce_by_key() + + +template + thrust::pair + reduce_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::reduce_by_key; + return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); +} // end reduce_by_key() + + +template + thrust::pair + reduce_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::reduce_by_key; + return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); +} // end reduce_by_key() + + +template +typename thrust::iterator_traits::value_type + reduce(InputIterator first, + InputIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::reduce(select_system(system), first, last); +} + +template + T reduce(InputIterator first, + InputIterator last, + T init) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::reduce(select_system(system), first, last, init); +} + + +template + T reduce(InputIterator first, + InputIterator last, + T init, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::reduce(select_system(system), first, last, init, binary_op); +} + +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output); +} + +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); +} + +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); +} + +} // end namespace thrust + diff --git a/compat/thrust/detail/reference.h b/compat/thrust/detail/reference.h new file mode 100644 index 0000000..8c0b061 --- /dev/null +++ b/compat/thrust/detail/reference.h @@ -0,0 +1,167 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + + +namespace thrust +{ +namespace detail +{ + +template struct is_wrapped_reference; + +} + +// the base type for all of thrust's system-annotated references. +// for reasonable reference-like semantics, derived types must reimplement the following: +// 1. constructor from pointer +// 2. copy constructor +// 3. templated copy constructor from other reference +// 4. templated assignment from other reference +// 5. assignment from value_type +template + class reference +{ + private: + typedef typename thrust::detail::eval_if< + thrust::detail::is_same::value, + thrust::detail::identity_, + thrust::detail::identity_ + >::type derived_type; + + // hint for is_wrapped_reference lets it know that this type (or a derived type) + // is a wrapped reference + struct wrapped_reference_hint {}; + template friend struct thrust::detail::is_wrapped_reference; + + public: + typedef Pointer pointer; + typedef typename thrust::detail::remove_const::type value_type; + + __host__ __device__ + explicit reference(const pointer &ptr); + + template + __host__ __device__ + reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type * = 0); + + __host__ __device__ + derived_type &operator=(const reference &other); + + // XXX this may need an enable_if + template + __host__ __device__ + derived_type &operator=(const reference &other); + + __host__ __device__ + derived_type &operator=(const value_type &x); + + __host__ __device__ + pointer operator&() const; + + __host__ __device__ + operator value_type () const; + + __host__ __device__ + void swap(derived_type &other); + + derived_type &operator++(); + + value_type operator++(int); + + // XXX parameterize the type of rhs + derived_type &operator+=(const value_type &rhs); + + derived_type &operator--(); + + value_type operator--(int); + + // XXX parameterize the type of rhs + derived_type &operator-=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator*=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator/=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator%=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator<<=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator>>=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator&=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator|=(const value_type &rhs); + + // XXX parameterize the type of rhs + derived_type &operator^=(const value_type &rhs); + + private: + const pointer m_ptr; + + // allow access to m_ptr for other references + template friend class reference; + + template + __host__ __device__ + inline value_type strip_const_get_value(const System &system) const; + + template + __host__ __device__ + inline void assign_from(OtherPointer src); + + // XXX this helper exists only to avoid warnings about null references from the other assign_from + template + inline __host__ __device__ + void assign_from(System1 *system1, System2 *system2, OtherPointer src); + + template + __host__ __device__ + inline void strip_const_assign_value(const System &system, OtherPointer src); + + // XXX this helper exists only to avoid warnings about null references from the other swap + template + inline __host__ __device__ + void swap(System *system, derived_type &other); + + // XXX this helper exists only to avoid warnings about null references from operator value_type () + template + inline __host__ __device__ + value_type convert_to_value_type(System *system) const; +}; // end reference + + +} // end thrust + +#include + diff --git a/compat/thrust/detail/reference.inl b/compat/thrust/detail/reference.inl new file mode 100644 index 0000000..8b55edb --- /dev/null +++ b/compat/thrust/detail/reference.inl @@ -0,0 +1,361 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace thrust +{ + + +template + template + reference + ::reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type *) + : m_ptr(other.m_ptr) +{} + + +template + reference + ::reference(const pointer &ptr) + : m_ptr(ptr) +{} + + +template + typename reference::pointer + reference + ::operator&() const +{ + return m_ptr; +} // end reference::operator&() + + +template + typename reference::derived_type & + reference + ::operator=(const value_type &v) +{ + assign_from(&v); + return static_cast(*this); +} // end reference::operator=() + + +template + typename reference::derived_type & + reference + ::operator=(const reference &other) +{ + assign_from(&other); + return static_cast(*this); +} // end reference::operator=() + + +template + template + typename reference::derived_type & + reference + ::operator=(const reference &other) +{ + assign_from(&other); + return static_cast(*this); +} // end reference::operator=() + + +template + template + typename reference::value_type + reference + ::convert_to_value_type(System *system) const +{ + using thrust::system::detail::generic::select_system; + return strip_const_get_value(select_system(*system)); +} // end convert_to_value_type() + + +template + reference + ::operator typename reference::value_type () const +{ + typedef typename thrust::iterator_system::type System; + + // XXX avoid default-constructing a system + // XXX use null a reference for dispatching + // XXX this assumes that the eventual invocation of + // XXX get_value will not access system state + System *system = 0; + + return convert_to_value_type(system); +} // end reference::operator value_type () + + +template + template + typename reference::value_type + reference + ::strip_const_get_value(const System &system) const +{ + System &non_const_system = const_cast(system); + + using thrust::system::detail::generic::get_value; + + return get_value(thrust::detail::derived_cast(non_const_system), m_ptr); +} // end reference::strip_const_get_value() + + +template + template + void reference + ::assign_from(System1 *system1, System2 *system2, OtherPointer src) +{ + using thrust::system::detail::generic::select_system; + + strip_const_assign_value(select_system(*system1, *system2), src); +} // end assign_from() + + +template + template + void reference + ::assign_from(OtherPointer src) +{ + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + // XXX avoid default-constructing a system + // XXX use null references for dispatching + // XXX this assumes that the eventual invocation of + // XXX assign_value will not access system state + System1 *system1 = 0; + System2 *system2 = 0; + + assign_from(system1, system2, src); +} // end assign_from() + + +template + template + void reference + ::strip_const_assign_value(const System &system, OtherPointer src) +{ + System &non_const_system = const_cast(system); + + using thrust::system::detail::generic::assign_value; + + assign_value(thrust::detail::derived_cast(non_const_system), m_ptr, src); +} // end strip_const_assign_value() + + +template + template + void reference + ::swap(System *system, derived_type &other) +{ + using thrust::system::detail::generic::select_system; + using thrust::system::detail::generic::iter_swap; + + iter_swap(select_system(*system, *system), m_ptr, other.m_ptr); +} // end reference::swap() + + +template + void reference + ::swap(derived_type &other) +{ + typedef typename thrust::iterator_system::type System; + + // XXX avoid default-constructing a system + // XXX use null references for dispatching + // XXX this assumes that the eventual invocation + // XXX of iter_swap will not access system state + System *system = 0; + + swap(system, other); +} // end reference::swap() + + +template + typename reference::derived_type & + reference + ::operator++(void) +{ + value_type temp = *this; + ++temp; + *this = temp; + return static_cast(*this); +} // end reference::operator++() + + +template + typename reference::value_type + reference + ::operator++(int) +{ + value_type temp = *this; + value_type result = temp++; + *this = temp; + return result; +} // end reference::operator++() + + +template + typename reference::derived_type & + reference + ::operator+=(const value_type &rhs) +{ + value_type temp = *this; + temp += rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator+=() + +template + typename reference::derived_type & + reference + ::operator--(void) +{ + value_type temp = *this; + --temp; + *this = temp; + return static_cast(*this); +} // end reference::operator--() + +template + typename reference::value_type + reference + ::operator--(int) +{ + value_type temp = *this; + value_type result = temp--; + *this = temp; + return result; +} // end reference::operator--() + +template + typename reference::derived_type & + reference + ::operator-=(const value_type &rhs) +{ + value_type temp = *this; + temp -= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator-=() + +template + typename reference::derived_type & + reference + ::operator*=(const value_type &rhs) +{ + value_type temp = *this; + temp *= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator*=() + +template + typename reference::derived_type & + reference + ::operator/=(const value_type &rhs) +{ + value_type temp = *this; + temp /= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator/=() + +template + typename reference::derived_type & + reference + ::operator%=(const value_type &rhs) +{ + value_type temp = *this; + temp %= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator%=() + +template + typename reference::derived_type & + reference + ::operator<<=(const value_type &rhs) +{ + value_type temp = *this; + temp <<= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator<<=() + +template + typename reference::derived_type & + reference + ::operator>>=(const value_type &rhs) +{ + value_type temp = *this; + temp >>= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator>>=() + +template + typename reference::derived_type & + reference + ::operator&=(const value_type &rhs) +{ + value_type temp = *this; + temp &= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator&=() + +template + typename reference::derived_type & + reference + ::operator|=(const value_type &rhs) +{ + value_type temp = *this; + temp |= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator|=() + +template + typename reference::derived_type & + reference + ::operator^=(const value_type &rhs) +{ + value_type temp = *this; + temp ^= rhs; + *this = temp; + return static_cast(*this); +} // end reference::operator^=() + + +} // end thrust + diff --git a/compat/thrust/detail/reference_forward_declaration.h b/compat/thrust/detail/reference_forward_declaration.h new file mode 100644 index 0000000..60524d3 --- /dev/null +++ b/compat/thrust/detail/reference_forward_declaration.h @@ -0,0 +1,28 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +template class reference; + +} // end thrust + diff --git a/compat/thrust/detail/remove.inl b/compat/thrust/detail/remove.inl new file mode 100644 index 0000000..5675243 --- /dev/null +++ b/compat/thrust/detail/remove.inl @@ -0,0 +1,238 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file remove.inl + * \brief Inline file for remove.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + ForwardIterator remove(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value) +{ + using thrust::system::detail::generic::remove; + return remove(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); +} // end remove() + + +template + OutputIterator remove_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + const T &value) +{ + using thrust::system::detail::generic::remove_copy; + return remove_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, value); +} // end remove_copy() + + +template + ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::remove_if; + return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); +} // end remove_if() + + +template + OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::remove_copy_if; + return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred); +} // end remove_copy_if() + + +template + ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + using thrust::system::detail::generic::remove_if; + return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); +} // end remove_if() + + +template + OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::remove_copy_if; + return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred); +} // end remove_copy_if() + + +template + ForwardIterator remove(ForwardIterator first, + ForwardIterator last, + const T &value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::remove(select_system(system), first, last, value); +} // end remove() + + +template + OutputIterator remove_copy(InputIterator first, + InputIterator last, + OutputIterator result, + const T &value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::remove_copy(select_system(system1,system2), first, last, result, value); +} // end remove_copy() + + +template + ForwardIterator remove_if(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::remove_if(select_system(system), first, last, pred); +} // end remove_if() + + +template + ForwardIterator remove_if(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::remove_if(select_system(system1,system2), first, last, stencil, pred); +} // end remove_if() + + +template + OutputIterator remove_copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::remove_copy_if(select_system(system1,system2), first, last, result, pred); +} // end remove_copy_if() + + +template + OutputIterator remove_copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::remove_copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred); +} // end remove_copy_if() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/replace.inl b/compat/thrust/detail/replace.inl new file mode 100644 index 0000000..1eaf24d --- /dev/null +++ b/compat/thrust/detail/replace.inl @@ -0,0 +1,210 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file replace.inl + * \brief Inline file for replace.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void replace(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, ForwardIterator last, + const T &old_value, + const T &new_value) +{ + using thrust::system::detail::generic::replace; + return replace(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, old_value, new_value); +} // end replace() + + +template + void replace_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, ForwardIterator last, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::replace_if; + return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred, new_value); +} // end replace_if() + + +template + void replace_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, ForwardIterator last, + InputIterator stencil, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::replace_if; + return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred, new_value); +} // end replace_if() + + +template + OutputIterator replace_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + const T &old_value, + const T &new_value) +{ + using thrust::system::detail::generic::replace_copy; + return replace_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, old_value, new_value); +} // end replace_copy() + + +template + OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::replace_copy_if; + return replace_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred, new_value); +} // end replace_copy_if() + + +template + OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::replace_copy_if; + return replace_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred, new_value); +} // end replace_copy_if() + + +template + OutputIterator replace_copy_if(InputIterator first, InputIterator last, + OutputIterator result, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::replace_copy_if(select_system(system1,system2), first, last, result, pred, new_value); +} // end replace_copy_if() + + +template + OutputIterator replace_copy_if(InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::replace_copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred, new_value); +} // end replace_copy_if() + + +template + OutputIterator replace_copy(InputIterator first, InputIterator last, + OutputIterator result, + const T &old_value, + const T &new_value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::replace_copy(select_system(system1,system2), first, last, result, old_value, new_value); +} // end replace_copy() + + +template + void replace_if(ForwardIterator first, ForwardIterator last, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::replace_if(select_system(system), first, last, pred, new_value); +} // end replace_if() + + +template + void replace_if(ForwardIterator first, ForwardIterator last, + InputIterator stencil, + Predicate pred, + const T &new_value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::replace_if(select_system(system1,system2), first, last, stencil, pred, new_value); +} // end replace_if() + + +template + void replace(ForwardIterator first, ForwardIterator last, + const T &old_value, + const T &new_value) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::replace(select_system(system), first, last, old_value, new_value); +} // end replace() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/reverse.inl b/compat/thrust/detail/reverse.inl new file mode 100644 index 0000000..18c26c0 --- /dev/null +++ b/compat/thrust/detail/reverse.inl @@ -0,0 +1,87 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reverse.inl + * \brief Inline file for reverse.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void reverse(const thrust::detail::execution_policy_base &exec, + BidirectionalIterator first, + BidirectionalIterator last) +{ + using thrust::system::detail::generic::reverse; + return reverse(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end reverse() + + +template + OutputIterator reverse_copy(const thrust::detail::execution_policy_base &exec, + BidirectionalIterator first, + BidirectionalIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::reverse_copy; + return reverse_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); +} // end reverse_copy() + + +template + void reverse(BidirectionalIterator first, + BidirectionalIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::reverse(select_system(system), first, last); +} // end reverse() + + +template + OutputIterator reverse_copy(BidirectionalIterator first, + BidirectionalIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::reverse_copy(select_system(system1,system2), first, last, result); +} // end reverse_copy() + + +} // end thrust + diff --git a/compat/thrust/detail/scan.inl b/compat/thrust/detail/scan.inl new file mode 100644 index 0000000..3e5fd9b --- /dev/null +++ b/compat/thrust/detail/scan.inl @@ -0,0 +1,502 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.inl + * \brief Inline file for scan.h. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::inclusive_scan; + return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); +} // end inclusive_scan() + + +template + OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::inclusive_scan; + return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op); +} // end inclusive_scan() + + +template + OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::exclusive_scan; + return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); +} // end exclusive_scan() + + +template + OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init) +{ + using thrust::system::detail::generic::exclusive_scan; + return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init); +} // end exclusive_scan() + + +template + OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::exclusive_scan; + return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init, binary_op); +} // end exclusive_scan() + + +template + OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result) +{ + using thrust::system::detail::generic::inclusive_scan_by_key; + return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result); +} // end inclusive_scan_by_key() + + +template + OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::inclusive_scan_by_key; + return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred); +} // end inclusive_scan_by_key() + + +template + OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::inclusive_scan_by_key; + return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred, binary_op); +} // end inclusive_scan_by_key() + + +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result) +{ + using thrust::system::detail::generic::exclusive_scan_by_key; + return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result); +} // end exclusive_scan_by_key() + + +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init) +{ + using thrust::system::detail::generic::exclusive_scan_by_key; + return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init); +} // end exclusive_scan_by_key() + + +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::exclusive_scan_by_key; + return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred); +} // end exclusive_scan_by_key() + + +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::exclusive_scan_by_key; + return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred, binary_op); +} // end exclusive_scan_by_key() + + +template + OutputIterator inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::inclusive_scan(select_system(system1,system2), first, last, result); +} // end inclusive_scan() + + +template + OutputIterator inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::inclusive_scan(select_system(system1,system2), first, last, result, binary_op); +} // end inclusive_scan() + + +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::exclusive_scan(select_system(system1,system2), first, last, result); +} // end exclusive_scan() + + +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::exclusive_scan(select_system(system1,system2), first, last, result, init); +} // end exclusive_scan() + + +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::exclusive_scan(select_system(system1,system2), first, last, result, init, binary_op); +} // end exclusive_scan() + + +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result); +} + + +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, binary_pred); +} + + +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, binary_pred, binary_op); +} + + +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result); +} + + +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init); +} + + +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init, binary_pred); +} + + +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init, binary_pred, binary_op); +} + + +} // end namespace thrust + diff --git a/compat/thrust/detail/scatter.inl b/compat/thrust/detail/scatter.inl new file mode 100644 index 0000000..934addb --- /dev/null +++ b/compat/thrust/detail/scatter.inl @@ -0,0 +1,159 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scatter.inl + * \brief Inline file for scatter.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void scatter(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + RandomAccessIterator output) +{ + using thrust::system::detail::generic::scatter; + return scatter(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, output); +} // end scatter() + + +template + void scatter_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output) +{ + using thrust::system::detail::generic::scatter_if; + return scatter_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, stencil, output); +} // end scatter_if() + + +template + void scatter_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output, + Predicate pred) +{ + using thrust::system::detail::generic::scatter_if; + return scatter_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, stencil, output, pred); +} // end scatter_if() + + +template + void scatter(InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + RandomAccessIterator output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::scatter(select_system(system1,system2,system3), first, last, map, output); +} // end scatter() + + +template + void scatter_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::scatter_if(select_system(system1,system2,system3,system4), first, last, map, stencil, output); +} // end scatter_if() + + +template + void scatter_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::scatter_if(select_system(system1,system2,system3,system4), first, last, map, stencil, output, pred); +} // end scatter_if() + +} // end namespace thrust + diff --git a/compat/thrust/detail/sequence.inl b/compat/thrust/detail/sequence.inl new file mode 100644 index 0000000..f174187 --- /dev/null +++ b/compat/thrust/detail/sequence.inl @@ -0,0 +1,112 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file sequence.inl + * \brief Inline file for sequence.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void sequence(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::sequence; + return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end sequence() + + +template + void sequence(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + T init) +{ + using thrust::system::detail::generic::sequence; + return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init); +} // end sequence() + + +template + void sequence(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + T init, + T step) +{ + using thrust::system::detail::generic::sequence; + return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, step); +} // end sequence() + + +template + void sequence(ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::sequence(select_system(system), first, last); +} // end sequence() + + +template + void sequence(ForwardIterator first, + ForwardIterator last, + T init) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::sequence(select_system(system), first, last, init); +} // end sequence() + + +template + void sequence(ForwardIterator first, + ForwardIterator last, + T init, + T step) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::sequence(select_system(system), first, last, init, step); +} // end sequence() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/set_operations.inl b/compat/thrust/detail/set_operations.inl new file mode 100644 index 0000000..daec461 --- /dev/null +++ b/compat/thrust/detail/set_operations.inl @@ -0,0 +1,836 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file set_operations.inl + * \brief Inline file for set_operations.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::set_difference; + return set_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); +} // end set_difference() + + +template + OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_difference; + return set_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); +} // end set_difference() + + +template + thrust::pair + set_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::set_difference_by_key; + return set_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end set_difference_by_key() + + +template + thrust::pair + set_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_difference_by_key; + return set_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end set_difference_by_key() + + +template + OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::set_intersection; + return set_intersection(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); +} // end set_intersection() + + +template + OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_intersection; + return set_intersection(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); +} // end set_intersection() + + +template + thrust::pair + set_intersection_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::set_intersection_by_key; + return set_intersection_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result); +} // end set_intersection_by_key() + + +template + thrust::pair + set_intersection_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_intersection_by_key; + return set_intersection_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, comp); +} // end set_intersection_by_key() + + +template + OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::set_symmetric_difference; + return set_symmetric_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); +} // end set_symmetric_difference() + + +template + OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_symmetric_difference; + return set_symmetric_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); +} // end set_symmetric_difference() + + +template + thrust::pair + set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::set_symmetric_difference_by_key; + return set_symmetric_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end set_symmetric_difference_by_key() + + +template + thrust::pair + set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_symmetric_difference_by_key; + return set_symmetric_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end set_symmetric_difference_by_key() + + +template + OutputIterator set_union(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::set_union; + return set_union(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); +} // end set_union() + + +template + OutputIterator set_union(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_union; + return set_union(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); +} // end set_union() + + +template + thrust::pair + set_union_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::set_union_by_key; + return set_union_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end set_union_by_key() + + +template + thrust::pair + set_union_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp) +{ + using thrust::system::detail::generic::set_union_by_key; + return set_union_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end set_union_by_key() + + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); +} // end set_difference() + + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result); +} // end set_difference() + + +template + thrust::pair + set_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::set_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end set_difference_by_key() + + +template + thrust::pair + set_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::set_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end set_difference_by_key() + + +template + OutputIterator set_intersection(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_intersection(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); +} // end set_intersection() + + +template + OutputIterator set_intersection(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_intersection(select_system(system1,system2,system3), first1, last1, first2, last2, result); +} // end set_intersection() + + +template + thrust::pair + set_intersection_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + + return thrust::set_intersection_by_key(select_system(system1,system2,system3,system4,system5), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, comp); +} // end set_intersection_by_key() + + +template + thrust::pair + set_intersection_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + + return thrust::set_intersection_by_key(select_system(system1,system2,system3,system4,system5), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result); +} // end set_intersection_by_key() + + +template + OutputIterator set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_symmetric_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); +} // end set_symmetric_difference() + + +template + OutputIterator set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_symmetric_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result); +} // end set_symmetric_difference() + + +template + thrust::pair + set_symmetric_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::set_symmetric_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end set_symmetric_difference_by_key() + + +template + thrust::pair + set_symmetric_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::set_symmetric_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end set_symmetric_difference_by_key() + + +template + OutputIterator set_union(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_union(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); +} // end set_union() + + +template + OutputIterator set_union(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::set_union(select_system(system1,system2,system3), first1, last1, first2, last2, result); +} // end set_union() + + +template + thrust::pair + set_union_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::set_union_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} // end set_union_by_key() + + +template + thrust::pair + set_union_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + typedef typename thrust::iterator_system::type System5; + typedef typename thrust::iterator_system::type System6; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + System5 system5; + System6 system6; + + return thrust::set_union_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); +} // end set_union_by_key() + + +} // end thrust + diff --git a/compat/thrust/detail/sort.inl b/compat/thrust/detail/sort.inl new file mode 100644 index 0000000..08be55a --- /dev/null +++ b/compat/thrust/detail/sort.inl @@ -0,0 +1,383 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file sort.inl + * \brief Inline file for sort.h. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + using thrust::system::detail::generic::sort; + return sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end sort() + + +template + void sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::sort; + return sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end sort() + + +template + void stable_sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + using thrust::system::detail::generic::stable_sort; + return stable_sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end stable_sort() + + +template + void stable_sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::stable_sort; + return stable_sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end stable_sort() + + +template + void sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + using thrust::system::detail::generic::sort_by_key; + return sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first); +} // end sort_by_key() + + +template + void sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::sort_by_key; + return sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, comp); +} // end sort_by_key() + + +template + void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + using thrust::system::detail::generic::stable_sort_by_key; + return stable_sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first); +} // end stable_sort_by_key() + + +template + void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::stable_sort_by_key; + return stable_sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, comp); +} // end stable_sort_by_key() + + +template + bool is_sorted(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::is_sorted; + return is_sorted(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end is_sorted() + + +template + bool is_sorted(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp) +{ + using thrust::system::detail::generic::is_sorted; + return is_sorted(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end is_sorted() + + +template + ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::is_sorted_until; + return is_sorted_until(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end is_sorted_until() + + +template + ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp) +{ + using thrust::system::detail::generic::is_sorted_until; + return is_sorted_until(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); +} // end is_sorted_until() + + +/////////////// +// Key Sorts // +/////////////// + +template + void sort(RandomAccessIterator first, + RandomAccessIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::sort(select_system(system), first, last); +} // end sort() + + +template + void sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::sort(select_system(system), first, last, comp); +} // end sort() + + +template + void stable_sort(RandomAccessIterator first, + RandomAccessIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::stable_sort(select_system(system), first, last); +} // end stable_sort() + + +template + void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::stable_sort(select_system(system), first, last, comp); +} // end stable_sort() + + + +///////////////////// +// Key-Value Sorts // +///////////////////// + +template + void sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first); +} // end sort_by_key() + + +template + void sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first, comp); +} // end sort_by_key() + + +template + void stable_sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::stable_sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first); +} // end stable_sort_by_key() + + +template + void stable_sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::stable_sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first, comp); +} // end stable_sort_by_key() + + +template + bool is_sorted(ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::is_sorted(select_system(system), first, last); +} // end is_sorted() + + +template + bool is_sorted(ForwardIterator first, + ForwardIterator last, + Compare comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::is_sorted(select_system(system), first, last, comp); +} // end is_sorted() + + +template + ForwardIterator is_sorted_until(ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::is_sorted_until(select_system(system), first, last); +} // end is_sorted_until() + + +template + ForwardIterator is_sorted_until(ForwardIterator first, + ForwardIterator last, + Compare comp) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::is_sorted_until(select_system(system), first, last, comp); +} // end is_sorted_until() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/static_assert.h b/compat/thrust/detail/static_assert.h new file mode 100644 index 0000000..ccc0842 --- /dev/null +++ b/compat/thrust/detail/static_assert.h @@ -0,0 +1,71 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +/* + * (C) Copyright John Maddock 2000. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +// +// Helper macro THRUST_JOIN (based on BOOST_JOIN): +// The following piece of macro magic joins the two +// arguments together, even when one of the arguments is +// itself a macro (see 16.3.1 in C++ standard). The key +// is that macro expansion of macro arguments does not +// occur in THRUST_DO_JOIN2 but does in THRUST_DO_JOIN. +// +#define THRUST_JOIN( X, Y ) THRUST_DO_JOIN( X, Y ) +#define THRUST_DO_JOIN( X, Y ) THRUST_DO_JOIN2(X,Y) +#define THRUST_DO_JOIN2( X, Y ) X##Y + +namespace thrust +{ + +namespace detail +{ + +// HP aCC cannot deal with missing names for template value parameters +template struct STATIC_ASSERTION_FAILURE; + +template <> struct STATIC_ASSERTION_FAILURE { enum { value = 1 }; }; + +// HP aCC cannot deal with missing names for template value parameters +template struct static_assert_test{}; + +template + struct depend_on_instantiation +{ + static const bool value = x; +}; + +} // end detail + +} // end thrust + +#define THRUST_STATIC_ASSERT( B ) \ + typedef ::thrust::detail::static_assert_test<\ + sizeof(::thrust::detail::STATIC_ASSERTION_FAILURE< (bool)( B ) >)>\ + THRUST_JOIN(thrust_static_assert_typedef_, __LINE__) + diff --git a/compat/thrust/detail/swap.h b/compat/thrust/detail/swap.h new file mode 100644 index 0000000..9f82ac2 --- /dev/null +++ b/compat/thrust/detail/swap.h @@ -0,0 +1,35 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace thrust +{ + +template +__host__ __device__ +inline void swap(Assignable1 &a, Assignable2 &b) +{ + Assignable1 temp = a; + a = b; + b = temp; +} // end swap() + +} // end namespace thrust + diff --git a/compat/thrust/detail/swap.inl b/compat/thrust/detail/swap.inl new file mode 100644 index 0000000..eafd70a --- /dev/null +++ b/compat/thrust/detail/swap.inl @@ -0,0 +1,21 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include + diff --git a/compat/thrust/detail/swap_ranges.inl b/compat/thrust/detail/swap_ranges.inl new file mode 100644 index 0000000..e3b06de --- /dev/null +++ b/compat/thrust/detail/swap_ranges.inl @@ -0,0 +1,64 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file swap_ranges.inl + * \brief Inline file for swap_ranges.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base &exec, + ForwardIterator1 first1, + ForwardIterator1 last1, + ForwardIterator2 first2) +{ + using thrust::system::detail::generic::swap_ranges; + return swap_ranges(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2); +} // end swap_ranges() + + +template + ForwardIterator2 swap_ranges(ForwardIterator1 first1, + ForwardIterator1 last1, + ForwardIterator2 first2) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::swap_ranges(select_system(system1,system2), first1, last1, first2); +} // end swap_ranges() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/tabulate.inl b/compat/thrust/detail/tabulate.inl new file mode 100644 index 0000000..961c76e --- /dev/null +++ b/compat/thrust/detail/tabulate.inl @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void tabulate(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + UnaryOperation unary_op) +{ + using thrust::system::detail::generic::tabulate; + return tabulate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op); +} // end tabulate() + + +template + void tabulate(ForwardIterator first, + ForwardIterator last, + UnaryOperation unary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::tabulate(select_system(system), first, last, unary_op); +} // end tabulate() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/temporary_array.h b/compat/thrust/detail/temporary_array.h new file mode 100644 index 0000000..3a9e084 --- /dev/null +++ b/compat/thrust/detail/temporary_array.h @@ -0,0 +1,158 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file temporary_array.h + * \brief Container-like class temporary storage inside algorithms. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + class temporary_array + : public contiguous_storage< + T, + no_throw_allocator< + temporary_allocator + > + > +{ + private: + typedef contiguous_storage< + T, + no_throw_allocator< + temporary_allocator + > + > super_t; + + // to help out the constructor + typedef no_throw_allocator > alloc_type; + + public: + typedef typename super_t::size_type size_type; + + temporary_array(thrust::execution_policy &system, size_type n); + + // provide a kill-switch to explicitly avoid initialization + temporary_array(int uninit, thrust::execution_policy &system, size_type n); + + template + temporary_array(thrust::execution_policy &system, + InputIterator first, + size_type n); + + template + temporary_array(thrust::execution_policy &system, + thrust::execution_policy &input_system, + InputIterator first, + size_type n); + + template + temporary_array(thrust::execution_policy &system, + InputIterator first, + InputIterator last); + + template + temporary_array(thrust::execution_policy &system, + thrust::execution_policy &input_system, + InputIterator first, + InputIterator last); + + ~temporary_array(); +}; // end temporary_array + + +// XXX eliminate this when we do ranges for real +template + class tagged_iterator_range +{ + public: + typedef thrust::detail::tagged_iterator iterator; + + template + tagged_iterator_range(const Ignored1 &, const Ignored2 &, Iterator first, Iterator last) + : m_begin(reinterpret_tag(first)), + m_end(reinterpret_tag(last)) + {} + + iterator begin(void) const { return m_begin; } + iterator end(void) const { return m_end; } + + private: + iterator m_begin, m_end; +}; + + +// if FromSystem is convertible to ToSystem, then just make a shallow +// copy of the range. else, use a temporary_array +// note that the resulting iterator is explicitly tagged with ToSystem either way +template + struct move_to_system_base + : public eval_if< + is_convertible< + FromSystem, + ToSystem + >::value, + identity_< + tagged_iterator_range + >, + identity_< + temporary_array< + typename thrust::iterator_value::type, + ToSystem + > + > + > +{}; + + +template + class move_to_system + : public move_to_system_base< + Iterator, + FromSystem, + ToSystem + >::type +{ + typedef typename move_to_system_base::type super_t; + + public: + move_to_system(thrust::execution_policy &from_system, + thrust::execution_policy &to_system, + Iterator first, + Iterator last) + : super_t(to_system, from_system, first, last) {} +}; + + +} // end detail +} // end thrust + +#include + diff --git a/compat/thrust/detail/temporary_array.inl b/compat/thrust/detail/temporary_array.inl new file mode 100644 index 0000000..36ed167 --- /dev/null +++ b/compat/thrust/detail/temporary_array.inl @@ -0,0 +1,148 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + + +namespace thrust +{ + +namespace detail +{ +namespace temporary_array_detail +{ + + +template struct avoid_initialization : thrust::detail::has_trivial_copy_constructor {}; + + +template +typename thrust::detail::enable_if< + avoid_initialization::value +>::type + construct_values(TemporaryArray &, + Size) +{ + // avoid the overhead of initialization +} // end construct_values() + + +template +typename thrust::detail::disable_if< + avoid_initialization::value +>::type + construct_values(TemporaryArray &a, + Size n) +{ + a.default_construct_n(a.begin(), n); +} // end construct_values() + + +} // end temporary_array_detail + + +template + temporary_array + ::temporary_array(thrust::execution_policy &system, size_type n) + :super_t(n, alloc_type(temporary_allocator(system))) +{ + temporary_array_detail::construct_values(*this, n); +} // end temporary_array::temporary_array() + + +template + temporary_array + ::temporary_array(int, thrust::execution_policy &system, size_type n) + :super_t(n, alloc_type(temporary_allocator(system))) +{ + // avoid initialization + ; +} // end temporary_array::temporary_array() + + +template + template + temporary_array + ::temporary_array(thrust::execution_policy &system, + InputIterator first, + size_type n) + : super_t(alloc_type(temporary_allocator(system))) +{ + super_t::allocate(n); + + super_t::uninitialized_copy_n(system, first, n, super_t::begin()); +} // end temporary_array::temporary_array() + + +template + template + temporary_array + ::temporary_array(thrust::execution_policy &system, + thrust::execution_policy &input_system, + InputIterator first, + size_type n) + : super_t(alloc_type(temporary_allocator(system))) +{ + super_t::allocate(n); + + super_t::uninitialized_copy_n(input_system, first, n, super_t::begin()); +} // end temporary_array::temporary_array() + + +template + template + temporary_array + ::temporary_array(thrust::execution_policy &system, + InputIterator first, + InputIterator last) + : super_t(alloc_type(temporary_allocator(system))) +{ + super_t::allocate(thrust::distance(first,last)); + + super_t::uninitialized_copy(system, first, last, super_t::begin()); +} // end temporary_array::temporary_array() + + +template + template + temporary_array + ::temporary_array(thrust::execution_policy &system, + thrust::execution_policy &input_system, + InputIterator first, + InputIterator last) + : super_t(alloc_type(temporary_allocator(system))) +{ + super_t::allocate(thrust::distance(first,last)); + + super_t::uninitialized_copy(input_system, first, last, super_t::begin()); +} // end temporary_array::temporary_array() + + +template + temporary_array + ::~temporary_array() +{ + // note that super_t::destroy will ignore trivial destructors automatically + super_t::destroy(super_t::begin(), super_t::end()); +} // end temporary_array::~temporary_array() + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/temporary_buffer.h b/compat/thrust/detail/temporary_buffer.h new file mode 100644 index 0000000..046a3b3 --- /dev/null +++ b/compat/thrust/detail/temporary_buffer.h @@ -0,0 +1,71 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace get_temporary_buffer_detail +{ + + +template + thrust::pair, typename thrust::pointer::difference_type> + down_cast_pair(Pair p) +{ + // XXX should use a hypothetical thrust::static_pointer_cast here + thrust::pointer ptr = thrust::pointer(static_cast(thrust::raw_pointer_cast(p.first))); + + typedef thrust::pair, typename thrust::pointer::difference_type> result_type; + return result_type(ptr, p.second); +} // end down_cast_pair() + + +} // end get_temporary_buffer_detail +} // end detail + + +template + thrust::pair, typename thrust::pointer::difference_type> + get_temporary_buffer(const thrust::detail::execution_policy_base &exec, typename thrust::pointer::difference_type n) +{ + using thrust::system::detail::generic::get_temporary_buffer; + + return thrust::detail::get_temporary_buffer_detail::down_cast_pair(get_temporary_buffer(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n)); +} // end get_temporary_buffer() + + +template + void return_temporary_buffer(const thrust::detail::execution_policy_base &exec, Pointer p) +{ + using thrust::system::detail::generic::return_temporary_buffer; + + return return_temporary_buffer(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), p); +} // end return_temporary_buffer() + + +} // end thrust + diff --git a/compat/thrust/detail/transform.inl b/compat/thrust/detail/transform.inl new file mode 100644 index 0000000..ae303bc --- /dev/null +++ b/compat/thrust/detail/transform.inl @@ -0,0 +1,239 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file transform.inl + * \brief Inline file for transform.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator transform(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + UnaryFunction op) +{ + using thrust::system::detail::generic::transform; + return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op); +} // end transform() + + +template + OutputIterator transform(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryFunction op) +{ + using thrust::system::detail::generic::transform; + return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, op); +} // end transform() + + +template + ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + ForwardIterator result, + UnaryFunction op, + Predicate pred) +{ + using thrust::system::detail::generic::transform_if; + return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op, pred); +} // end transform_if() + + +template + ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + ForwardIterator result, + UnaryFunction op, + Predicate pred) +{ + using thrust::system::detail::generic::transform_if; + return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, op, pred); +} // end transform_if() + + +template + ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + InputIterator3 stencil, + ForwardIterator result, + BinaryFunction binary_op, + Predicate pred) +{ + using thrust::system::detail::generic::transform_if; + return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, stencil, result, binary_op, pred); +} // end transform_if() + + +template + OutputIterator transform(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::transform(select_system(system1,system2), first, last, result, op); +} // end transform() + + +template + OutputIterator transform(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryFunction op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::transform(select_system(system1,system2,system3), first1, last1, first2, result, op); +} // end transform() + + +template + ForwardIterator transform_if(InputIterator first, + InputIterator last, + ForwardIterator result, + UnaryFunction unary_op, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::transform_if(select_system(system1,system2), first, last, result, unary_op, pred); +} // end transform_if() + + +template + ForwardIterator transform_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + ForwardIterator result, + UnaryFunction unary_op, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + + System1 system1; + System2 system2; + System3 system3; + + return thrust::transform_if(select_system(system1,system2,system3), first, last, stencil, result, unary_op, pred); +} // end transform_if() + + +template + ForwardIterator transform_if(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator3 stencil, + ForwardIterator result, + BinaryFunction binary_op, + Predicate pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::transform_if(select_system(system1,system2,system3,system4), first1, last1, first2, stencil, result, binary_op, pred); +} // end transform_if() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/transform_reduce.inl b/compat/thrust/detail/transform_reduce.inl new file mode 100644 index 0000000..ede6503 --- /dev/null +++ b/compat/thrust/detail/transform_reduce.inl @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file transform_reduce.inl + * \brief Inline file for transform_reduce.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputType transform_reduce(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + UnaryFunction unary_op, + OutputType init, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::transform_reduce; + return transform_reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op, init, binary_op); +} // end transform_reduce() + + +template + OutputType transform_reduce(InputIterator first, + InputIterator last, + UnaryFunction unary_op, + OutputType init, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::transform_reduce(select_system(system), first, last, unary_op, init, binary_op); +} // end transform_reduce() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/transform_scan.inl b/compat/thrust/detail/transform_scan.inl new file mode 100644 index 0000000..0187c4b --- /dev/null +++ b/compat/thrust/detail/transform_scan.inl @@ -0,0 +1,115 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file transform_scan.inl + * \brief Inline file for transform_scan.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::transform_inclusive_scan; + return transform_inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, unary_op, binary_op); +} // end transform_inclusive_scan() + + +template + OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + T init, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::transform_exclusive_scan; + return transform_exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, unary_op, init, binary_op); +} // end transform_exclusive_scan() + + +template + OutputIterator transform_inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + BinaryFunction binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::transform_inclusive_scan(select_system(system1,system2), first, last, result, unary_op, binary_op); +} // end transform_inclusive_scan() + + +template + OutputIterator transform_exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + T init, + AssociativeOperator binary_op) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::transform_exclusive_scan(select_system(system1,system2), first, last, result, unary_op, init, binary_op); +} // end transform_exclusive_scan() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/trivial_sequence.h b/compat/thrust/detail/trivial_sequence.h new file mode 100644 index 0000000..cc7e32b --- /dev/null +++ b/compat/thrust/detail/trivial_sequence.h @@ -0,0 +1,87 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file trivial_sequence.h + * \brief Container-like class for wrapping sequences. The wrapped + * sequence always has trivial iterators, even when the input + * sequence does not. + */ + + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +// never instantiated +template struct _trivial_sequence { }; + +// trivial case +template +struct _trivial_sequence +{ + typedef Iterator iterator_type; + Iterator first, last; + + _trivial_sequence(thrust::execution_policy &, Iterator _first, Iterator _last) : first(_first), last(_last) + { +// std::cout << "trivial case" << std::endl; + } + + iterator_type begin() { return first; } + iterator_type end() { return last; } +}; + +// non-trivial case +template +struct _trivial_sequence +{ + typedef typename thrust::iterator_value::type iterator_value; + typedef typename thrust::detail::temporary_array::iterator iterator_type; + + thrust::detail::temporary_array buffer; + + _trivial_sequence(thrust::execution_policy &exec, Iterator first, Iterator last) + : buffer(exec, first, last) + { +// std::cout << "non-trivial case" << std::endl; + } + + iterator_type begin() { return buffer.begin(); } + iterator_type end() { return buffer.end(); } +}; + +template +struct trivial_sequence + : detail::_trivial_sequence::type> +{ + typedef _trivial_sequence::type> super_t; + + trivial_sequence(thrust::execution_policy &exec, Iterator first, Iterator last) : super_t(exec, first, last) { } +}; + +} // end namespace detail + +} // end namespace thrust + diff --git a/compat/thrust/detail/tuple.inl b/compat/thrust/detail/tuple.inl new file mode 100644 index 0000000..067ad63 --- /dev/null +++ b/compat/thrust/detail/tuple.inl @@ -0,0 +1,948 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace thrust +{ + +// define null_type +struct null_type {}; + +// null_type comparisons +__host__ __device__ inline +bool operator==(const null_type&, const null_type&) { return true; } + +__host__ __device__ inline +bool operator>=(const null_type&, const null_type&) { return true; } + +__host__ __device__ inline +bool operator<=(const null_type&, const null_type&) { return true; } + +__host__ __device__ inline +bool operator!=(const null_type&, const null_type&) { return false; } + +__host__ __device__ inline +bool operator<(const null_type&, const null_type&) { return false; } + +__host__ __device__ inline +bool operator>(const null_type&, const null_type&) { return false; } + +// forward declaration for tuple +template < + class T0 = null_type, class T1 = null_type, class T2 = null_type, + class T3 = null_type, class T4 = null_type, class T5 = null_type, + class T6 = null_type, class T7 = null_type, class T8 = null_type, + class T9 = null_type> +class tuple; + +// forward declaration of tuple_element +template struct tuple_element; + +// specializations for tuple_element +template + struct tuple_element<0,T> +{ + typedef typename T::head_type type; +}; // end tuple_element<0,T> + +template + struct tuple_element +{ + private: + typedef typename T::tail_type Next; + typedef typename tuple_element::type unqualified_type; + + public: + typedef typename thrust::detail::add_const::type type; +}; // end tuple_element + +template + struct tuple_element<0,const T> +{ + typedef typename thrust::detail::add_const::type type; +}; // end tuple_element<0,const T> + + + +// forward declaration of tuple_size +template struct tuple_size; + +// specializations for tuple_size +template<> + struct tuple_size< tuple<> > +{ + static const int value = 0; +}; // end tuple_size< tuple<> > + +template<> + struct tuple_size +{ + static const int value = 0; +}; // end tuple_size + + + +// forward declaration of detail::cons +namespace detail +{ + +template struct cons; + +} // end detail + + +// -- some traits classes for get functions +template struct access_traits +{ + typedef const T& const_type; + typedef T& non_const_type; + + typedef const typename thrust::detail::remove_cv::type& parameter_type; + +// used as the tuple constructors parameter types +// Rationale: non-reference tuple element types can be cv-qualified. +// It should be possible to initialize such types with temporaries, +// and when binding temporaries to references, the reference must +// be non-volatile and const. 8.5.3. (5) +}; // end access_traits + +template struct access_traits +{ + typedef T& const_type; + typedef T& non_const_type; + + typedef T& parameter_type; +}; // end access_traits + +// forward declarations of get() +template +__host__ __device__ +inline typename access_traits< + typename tuple_element >::type + >::non_const_type +// XXX we probably don't need to do this for any compiler we care about -jph +//get(cons& c BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(int, N)); +get(detail::cons& c); + +template +__host__ __device__ +inline typename access_traits< + typename tuple_element >::type + >::const_type +// XXX we probably don't need to do this for any compiler we care about -jph +//get(const cons& c BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(int, N)); +get(const detail::cons& c); + +namespace detail +{ + +// -- generate error template, referencing to non-existing members of this +// template is used to produce compilation errors intentionally +template +class generate_error; + +// - cons getters -------------------------------------------------------- +// called: get_class::get(aTuple) + +template< int N > +struct get_class +{ + template + __host__ __device__ + inline static RET get(const cons& t) + { + // XXX we may not need to deal with this for any compiler we care about -jph + //return get_class::BOOST_NESTED_TEMPLATE get(t.tail); + return get_class::template get(t.tail); + + // gcc 4.3 couldn't compile this: + //return get_class::get(t.tail); + } + + template + __host__ __device__ + inline static RET get(cons& t) + { + // XXX we may not need to deal with this for any compiler we care about -jph + //return get_class::BOOST_NESTED_TEMPLATE get(t.tail); + return get_class::template get(t.tail); + + // gcc 4.3 couldn't compile this: + //return get_class::get(t.tail); + } +}; // end get_class + +template<> +struct get_class<0> +{ + template + __host__ __device__ + inline static RET get(const cons& t) + { + return t.head; + } + + template + __host__ __device__ + inline static RET get(cons& t) + { + return t.head; + } +}; // get get_class<0> + + +template struct IF +{ + typedef Then RET; +}; + +template struct IF +{ + typedef Else RET; +}; + +// These helper templates wrap void types and plain function types. +// The rationale is to allow one to write tuple types with those types +// as elements, even though it is not possible to instantiate such object. +// E.g: typedef tuple some_type; // ok +// but: some_type x; // fails + +template class non_storeable_type +{ + __host__ __device__ + non_storeable_type(); +}; + +template struct wrap_non_storeable_type +{ + // XXX is_function looks complicated; punt for now -jph + //typedef typename IF< + // ::thrust::detail::is_function::value, non_storeable_type, T + //>::RET type; + + typedef T type; +}; + +template <> struct wrap_non_storeable_type +{ + typedef non_storeable_type type; +}; + + +template + struct cons +{ + typedef HT head_type; + typedef TT tail_type; + + typedef typename + wrap_non_storeable_type::type stored_head_type; + + stored_head_type head; + tail_type tail; + + inline __host__ __device__ + typename access_traits::non_const_type + get_head() { return head; } + + inline __host__ __device__ + typename access_traits::non_const_type + get_tail() { return tail; } + + inline __host__ __device__ + typename access_traits::const_type + get_head() const { return head; } + + inline __host__ __device__ + typename access_traits::const_type + get_tail() const { return tail; } + + inline __host__ __device__ + cons(void) : head(), tail() {} + // cons() : head(detail::default_arg::f()), tail() {} + + // the argument for head is not strictly needed, but it prevents + // array type elements. This is good, since array type elements + // cannot be supported properly in any case (no assignment, + // copy works only if the tails are exactly the same type, ...) + + inline __host__ __device__ + cons(typename access_traits::parameter_type h, + const tail_type& t) + : head (h), tail(t) {} + + template + inline __host__ __device__ + cons( T1& t1, T2& t2, T3& t3, T4& t4, T5& t5, + T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 ) + : head (t1), + tail (t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(null_type())) + {} + + template + inline __host__ __device__ + cons( const null_type& /*t1*/, T2& t2, T3& t3, T4& t4, T5& t5, + T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 ) + : head (), + tail (t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(null_type())) + {} + + + template + inline __host__ __device__ + cons( const cons& u ) : head(u.head), tail(u.tail) {} + + template + inline __host__ __device__ + cons& operator=( const cons& u ) { + head=u.head; tail=u.tail; return *this; + } + + // must define assignment operator explicitly, implicit version is + // illformed if HT is a reference (12.8. (12)) + inline __host__ __device__ + cons& operator=(const cons& u) { + head = u.head; tail = u.tail; return *this; + } + + // XXX enable when we support std::pair -jph + //template + //__host__ __device__ + //cons& operator=( const std::pair& u ) { + // //BOOST_STATIC_ASSERT(length::value == 2); // check length = 2 + // head = u.first; tail.head = u.second; return *this; + //} + + // get member functions (non-const and const) + template + __host__ __device__ + typename access_traits< + typename tuple_element >::type + >::non_const_type + get() { + return thrust::get(*this); // delegate to non-member get + } + + template + __host__ __device__ + typename access_traits< + typename tuple_element >::type + >::const_type + get() const { + return thrust::get(*this); // delegate to non-member get + } + + inline __host__ __device__ + void swap(cons &c) + { + using thrust::swap; + + swap(head, c.head); + tail.swap(c.tail); + } +}; + +template + struct cons +{ + typedef HT head_type; + typedef null_type tail_type; + typedef cons self_type; + + typedef typename + wrap_non_storeable_type::type stored_head_type; + stored_head_type head; + + typename access_traits::non_const_type + inline __host__ __device__ + get_head() { return head; } + + inline __host__ __device__ + null_type get_tail() { return null_type(); } + + inline __host__ __device__ + typename access_traits::const_type + get_head() const { return head; } + + inline __host__ __device__ + null_type get_tail() const { return null_type(); } + + inline __host__ __device__ + cons() : head() {} + + inline __host__ __device__ + cons(typename access_traits::parameter_type h, + const null_type& = null_type()) + : head (h) {} + + template + inline __host__ __device__ + cons(T1& t1, const null_type&, const null_type&, const null_type&, + const null_type&, const null_type&, const null_type&, + const null_type&, const null_type&, const null_type&) + : head (t1) {} + + inline __host__ __device__ + cons(const null_type&, + const null_type&, const null_type&, const null_type&, + const null_type&, const null_type&, const null_type&, + const null_type&, const null_type&, const null_type&) + : head () {} + + template + inline __host__ __device__ + cons( const cons& u ) : head(u.head) {} + + template + inline __host__ __device__ + cons& operator=(const cons& u ) + { + head = u.head; + return *this; + } + + // must define assignment operator explicitly, implicit version + // is illformed if HT is a reference + inline __host__ __device__ + cons& operator=(const cons& u) { head = u.head; return *this; } + + template + inline __host__ __device__ + typename access_traits< + typename tuple_element::type + >::non_const_type + // XXX we probably don't need this for the compilers we care about -jph + //get(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, N)) + get(void) + { + return thrust::get(*this); + } + + template + inline __host__ __device__ + typename access_traits< + typename tuple_element::type + >::const_type + // XXX we probably don't need this for the compilers we care about -jph + //get(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, N)) const + get(void) const + { + return thrust::get(*this); + } + + inline __host__ __device__ + void swap(cons &c) + { + using thrust::swap; + + swap(head, c.head); + } +}; // end cons + +template + struct map_tuple_to_cons +{ + typedef cons::type + > type; +}; // end map_tuple_to_cons + +// The empty tuple is a null_type +template <> + struct map_tuple_to_cons +{ + typedef null_type type; +}; // end map_tuple_to_cons<...> + + + +// --------------------------------------------------------------------------- +// The call_traits for make_tuple + +// Must be instantiated with plain or const plain types (not with references) + +// from template foo(const T& t) : make_tuple_traits::type +// from template foo(T& t) : make_tuple_traits::type + +// Conversions: +// T -> T, +// references -> compile_time_error +// array -> const ref array + + +template +struct make_tuple_traits { + typedef T type; + + // commented away, see below (JJ) + // typedef typename IF< + // boost::is_function::value, + // T&, + // T>::RET type; + +}; + +// The is_function test was there originally for plain function types, +// which can't be stored as such (we must either store them as references or +// pointers). Such a type could be formed if make_tuple was called with a +// reference to a function. +// But this would mean that a const qualified function type was formed in +// the make_tuple function and hence make_tuple can't take a function +// reference as a parameter, and thus T can't be a function type. +// So is_function test was removed. +// (14.8.3. says that type deduction fails if a cv-qualified function type +// is created. (It only applies for the case of explicitly specifying template +// args, though?)) (JJ) + +template +struct make_tuple_traits { + typedef typename + detail::generate_error:: + do_not_use_with_reference_type error; +}; + +// Arrays can't be stored as plain types; convert them to references. +// All arrays are converted to const. This is because make_tuple takes its +// parameters as const T& and thus the knowledge of the potential +// non-constness of actual argument is lost. +template struct make_tuple_traits { + typedef const T (&type)[n]; +}; + +template +struct make_tuple_traits { + typedef const T (&type)[n]; +}; + +template struct make_tuple_traits { + typedef const volatile T (&type)[n]; +}; + +template +struct make_tuple_traits { + typedef const volatile T (&type)[n]; +}; + +// XXX enable these if we ever care about reference_wrapper -jph +//template +//struct make_tuple_traits >{ +// typedef T& type; +//}; +// +//template +//struct make_tuple_traits >{ +// typedef T& type; +//}; + + +// a helper traits to make the make_tuple functions shorter (Vesa Karvonen's +// suggestion) +template < + class T0 = null_type, class T1 = null_type, class T2 = null_type, + class T3 = null_type, class T4 = null_type, class T5 = null_type, + class T6 = null_type, class T7 = null_type, class T8 = null_type, + class T9 = null_type +> +struct make_tuple_mapper { + typedef + tuple::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type, + typename make_tuple_traits::type> type; +}; + +} // end detail + + +template +__host__ __device__ +inline typename access_traits< + typename tuple_element >::type + >::non_const_type +get(detail::cons& c) +{ + //return detail::get_class::BOOST_NESTED_TEMPLATE + + // gcc 4.3 couldn't compile this: + //return detail::get_class:: + + return detail::get_class::template + get< + typename access_traits< + typename tuple_element >::type + >::non_const_type, + HT,TT + >(c); +} + + +// get function for const cons-lists, returns a const reference to +// the element. If the element is a reference, returns the reference +// as such (that is, can return a non-const reference) +template +__host__ __device__ +inline typename access_traits< + typename tuple_element >::type + >::const_type +get(const detail::cons& c) +{ + //return detail::get_class::BOOST_NESTED_TEMPLATE + + // gcc 4.3 couldn't compile this: + //return detail::get_class:: + + return detail::get_class::template + get< + typename access_traits< + typename tuple_element >::type + >::const_type, + HT,TT + >(c); +} + + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3,t4); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3,t4,t5); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3,t4,t5,t6); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3,t4,t5,t6,t7); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3,t4,t5,t6,t7,t8); +} // end make_tuple() + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9) +{ + typedef typename detail::make_tuple_mapper::type t; + return t(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9); +} // end make_tuple() + + +template +__host__ __device__ inline +tuple tie(T0 &t0) +{ + return tuple(t0); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1) +{ + return tuple(t0,t1); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2) +{ + return tuple(t0,t1,t2); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3) +{ + return tuple(t0,t1,t2,t3); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4) +{ + return tuple(t0,t1,t2,t3,t4); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5) +{ + return tuple(t0,t1,t2,t3,t4,t5); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6) +{ + return tuple(t0,t1,t2,t3,t4,t5,t6); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7) +{ + return tuple(t0,t1,t2,t3,t4,t5,t6,t7); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8) +{ + return tuple(t0,t1,t2,t3,t4,t5,t6,t7,t8); +} + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9) +{ + return tuple(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9); +} + +template< + typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, + typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9 +> +__host__ __device__ inline +void swap(thrust::tuple &x, + thrust::tuple &y) +{ + return x.swap(y); +} + + + +namespace detail +{ + +template +__host__ __device__ +inline bool eq(const T1& lhs, const T2& rhs) { + return lhs.get_head() == rhs.get_head() && + eq(lhs.get_tail(), rhs.get_tail()); +} +template<> +inline bool eq(const null_type&, const null_type&) { return true; } + +template +__host__ __device__ +inline bool neq(const T1& lhs, const T2& rhs) { + return lhs.get_head() != rhs.get_head() || + neq(lhs.get_tail(), rhs.get_tail()); +} +template<> +__host__ __device__ +inline bool neq(const null_type&, const null_type&) { return false; } + +template +__host__ __device__ +inline bool lt(const T1& lhs, const T2& rhs) { + return (lhs.get_head() < rhs.get_head()) || + (!(rhs.get_head() < lhs.get_head()) && + lt(lhs.get_tail(), rhs.get_tail())); +} +template<> +__host__ __device__ +inline bool lt(const null_type&, const null_type&) { return false; } + +template +__host__ __device__ +inline bool gt(const T1& lhs, const T2& rhs) { + return (lhs.get_head() > rhs.get_head()) || + (!(rhs.get_head() > lhs.get_head()) && + gt(lhs.get_tail(), rhs.get_tail())); +} +template<> +__host__ __device__ +inline bool gt(const null_type&, const null_type&) { return false; } + +template +__host__ __device__ +inline bool lte(const T1& lhs, const T2& rhs) { + return lhs.get_head() <= rhs.get_head() && + ( !(rhs.get_head() <= lhs.get_head()) || + lte(lhs.get_tail(), rhs.get_tail())); +} +template<> +__host__ __device__ +inline bool lte(const null_type&, const null_type&) { return true; } + +template +__host__ __device__ +inline bool gte(const T1& lhs, const T2& rhs) { + return lhs.get_head() >= rhs.get_head() && + ( !(rhs.get_head() >= lhs.get_head()) || + gte(lhs.get_tail(), rhs.get_tail())); +} +template<> +__host__ __device__ +inline bool gte(const null_type&, const null_type&) { return true; } + +} // end detail + + + +// equal ---- + +template +__host__ __device__ +inline bool operator==(const detail::cons& lhs, const detail::cons& rhs) +{ + // XXX support this eventually -jph + //// check that tuple lengths are equal + //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); + + return detail::eq(lhs, rhs); +} // end operator==() + +// not equal ----- + +template +__host__ __device__ +inline bool operator!=(const detail::cons& lhs, const detail::cons& rhs) +{ + // XXX support this eventually -jph + //// check that tuple lengths are equal + //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); + + return detail::neq(lhs, rhs); +} // end operator!=() + +// < +template +__host__ __device__ +inline bool operator<(const detail::cons& lhs, const detail::cons& rhs) +{ + // XXX support this eventually -jph + //// check that tuple lengths are equal + //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); + + return detail::lt(lhs, rhs); +} // end operator<() + +// > +template +__host__ __device__ +inline bool operator>(const detail::cons& lhs, const detail::cons& rhs) +{ + // XXX support this eventually -jph + //// check that tuple lengths are equal + //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); + + return detail::gt(lhs, rhs); +} // end operator>() + +// <= +template +__host__ __device__ +inline bool operator<=(const detail::cons& lhs, const detail::cons& rhs) +{ + // XXX support this eventually -jph + //// check that tuple lengths are equal + //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); + + return detail::lte(lhs, rhs); +} // end operator<=() + +// >= +template +__host__ __device__ +inline bool operator>=(const detail::cons& lhs, const detail::cons& rhs) +{ + // XXX support this eventually -jph + //// check that tuple lengths are equal + //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); + + return detail::gte(lhs, rhs); +} // end operator>=() + +} // end thrust + diff --git a/compat/thrust/detail/tuple_meta_transform.h b/compat/thrust/detail/tuple_meta_transform.h new file mode 100644 index 0000000..ff99709 --- /dev/null +++ b/compat/thrust/detail/tuple_meta_transform.h @@ -0,0 +1,177 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ + +namespace detail +{ + +template class UnaryMetaFunction, + unsigned int sz = thrust::tuple_size::value> + struct tuple_meta_transform; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef null_type type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +template class UnaryMetaFunction> + struct tuple_meta_transform +{ + typedef thrust::tuple< + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type, + typename UnaryMetaFunction::type>::type + > type; +}; + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/tuple_transform.h b/compat/thrust/detail/tuple_transform.h new file mode 100644 index 0000000..f18b872 --- /dev/null +++ b/compat/thrust/detail/tuple_transform.h @@ -0,0 +1,418 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace detail +{ + +template class UnaryMetaFunction, + typename UnaryFunction, + unsigned int sz = thrust::tuple_size::value> + struct tuple_transform_functor; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + return thrust::null_type(); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + return thrust::null_type(); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t)), + f(thrust::get<7>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t)), + f(thrust::get<7>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t)), + f(thrust::get<7>(t)), + f(thrust::get<8>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t)), + f(thrust::get<7>(t)), + f(thrust::get<8>(t))); + } +}; + + +template class UnaryMetaFunction, + typename UnaryFunction> + struct tuple_transform_functor +{ + static __host__ + typename tuple_meta_transform::type + do_it_on_the_host(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t)), + f(thrust::get<7>(t)), + f(thrust::get<8>(t)), + f(thrust::get<9>(t))); + } + + static __host__ __device__ + typename tuple_meta_transform::type + do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) + { + typedef typename tuple_meta_transform::type XfrmTuple; + + return XfrmTuple(f(thrust::get<0>(t)), + f(thrust::get<1>(t)), + f(thrust::get<2>(t)), + f(thrust::get<3>(t)), + f(thrust::get<4>(t)), + f(thrust::get<5>(t)), + f(thrust::get<6>(t)), + f(thrust::get<7>(t)), + f(thrust::get<8>(t)), + f(thrust::get<9>(t))); + } +}; + + +template class UnaryMetaFunction, + typename Tuple, + typename UnaryFunction> +typename tuple_meta_transform::type +tuple_host_transform(const Tuple &t, UnaryFunction f) +{ + return tuple_transform_functor::do_it_on_the_host(t,f); +} + +template class UnaryMetaFunction, + typename Tuple, + typename UnaryFunction> +typename tuple_meta_transform::type +__host__ __device__ +tuple_host_device_transform(const Tuple &t, UnaryFunction f) +{ + return tuple_transform_functor::do_it_on_the_host_or_device(t,f); +} + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/type_traits.h b/compat/thrust/detail/type_traits.h new file mode 100644 index 0000000..5dbeb90 --- /dev/null +++ b/compat/thrust/detail/type_traits.h @@ -0,0 +1,641 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file type_traits.h + * \brief Temporarily define some type traits + * until nvcc can compile tr1::type_traits. + */ + +#pragma once + +#include + +// XXX nvcc 2.2 closed beta can't compile type_traits +//// find type_traits +// +//#ifdef __GNUC__ +// +//#if __GNUC__ == 4 && __GNUC_MINOR__ == 2 +//#include +//#elif __GNUC__ == 4 && __GNUC_MINOR__ > 2 +//#include +//#endif // GCC version +// +//#endif // GCC +// +//#ifdef _MSC_VER +//#include +//#endif // MSVC + + +namespace thrust +{ + +// forward declaration of device_reference +template class device_reference; + +namespace detail +{ + /// helper classes [4.3]. + template + struct integral_constant + { + static const _Tp value = __v; + typedef _Tp value_type; + typedef integral_constant<_Tp, __v> type; + }; + + /// typedef for true_type + typedef integral_constant true_type; + + /// typedef for true_type + typedef integral_constant false_type; + +//template struct is_integral : public std::tr1::is_integral {}; +template struct is_integral : public false_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; +template<> struct is_integral : public true_type {}; + +template struct is_floating_point : public false_type {}; +template<> struct is_floating_point : public true_type {}; +template<> struct is_floating_point : public true_type {}; +template<> struct is_floating_point : public true_type {}; + +template struct is_arithmetic : public is_integral {}; +template<> struct is_arithmetic : public true_type {}; +template<> struct is_arithmetic : public true_type {}; +template<> struct is_arithmetic : public true_type {}; +template<> struct is_arithmetic : public true_type {}; + +template struct is_pointer : public false_type {}; +template struct is_pointer : public true_type {}; + +template struct is_device_ptr : public false_type {}; + +template struct is_void : public false_type {}; +template<> struct is_void : public true_type {}; +template<> struct is_void : public true_type {}; + + +namespace tt_detail +{ + + +} // end tt_detail + +template struct is_pod + : public integral_constant< + bool, + is_void::value || is_pointer::value || is_arithmetic::value +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +// use intrinsic type traits + || __is_pod(T) +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC +// only use the intrinsic for >= 4.3 +#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) + || __is_pod(T) +#endif // GCC VERSION +#endif // THRUST_HOST_COMPILER + > + {}; + + +template struct has_trivial_constructor + : public integral_constant< + bool, + is_pod::value +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC + || __has_trivial_constructor(T) +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC +// only use the intrinsic for >= 4.3 +#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) + || __has_trivial_constructor(T) +#endif // GCC VERSION +#endif // THRUST_HOST_COMPILER + > +{}; + +template struct has_trivial_copy_constructor + : public integral_constant< + bool, + is_pod::value +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC + || __has_trivial_copy(T) +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC +// only use the intrinsic for >= 4.3 +#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) + || __has_trivial_copy(T) +#endif // GCC VERSION +#endif // THRUST_HOST_COMPILER + > +{}; + +template struct has_trivial_destructor : public is_pod {}; + +template struct is_const : public false_type {}; +template struct is_const : public true_type {}; + +template struct is_volatile : public false_type {}; +template struct is_volatile : public true_type {}; + +template + struct add_const +{ + typedef T const type; +}; // end add_const + +template + struct remove_const +{ + typedef T type; +}; // end remove_const + +template + struct remove_const +{ + typedef T type; +}; // end remove_const + +template + struct add_volatile +{ + typedef volatile T type; +}; // end add_volatile + +template + struct remove_volatile +{ + typedef T type; +}; // end remove_volatile + +template + struct remove_volatile +{ + typedef T type; +}; // end remove_volatile + +template + struct add_cv +{ + typedef const volatile T type; +}; // end add_cv + +template + struct remove_cv +{ + typedef typename remove_const::type>::type type; +}; // end remove_cv + + +template struct is_reference : public false_type {}; +template struct is_reference : public true_type {}; + +template struct is_device_reference : public false_type {}; +template struct is_device_reference< thrust::device_reference > : public true_type {}; + + +// NB: Careful with reference to void. +template::value || is_reference<_Tp>::value)> + struct __add_reference_helper + { typedef _Tp& type; }; + +template + struct __add_reference_helper<_Tp, true> + { typedef _Tp type; }; + +template + struct add_reference + : public __add_reference_helper<_Tp>{}; + +template + struct remove_reference +{ + typedef T type; +}; // end remove_reference + +template + struct remove_reference +{ + typedef T type; +}; // end remove_reference + +template + struct is_same + : public false_type +{ +}; // end is_same + +template + struct is_same + : public true_type +{ +}; // end is_same + +template + struct lazy_is_same + : is_same +{ +}; // end lazy_is_same + +template + struct is_different + : public true_type +{ +}; // end is_different + +template + struct is_different + : public false_type +{ +}; // end is_different + +template + struct lazy_is_different + : is_different +{ +}; // end lazy_is_different + +namespace tt_detail +{ + +template + struct is_int_or_cref +{ + typedef typename remove_reference::type type_sans_ref; + static const bool value = (is_integral::value + || (is_integral::value + && is_const::value + && !is_volatile::value)); +}; // end is_int_or_cref + + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN +__THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_BEGIN + + +template + struct is_convertible_sfinae +{ + private: + typedef char one_byte; + typedef struct { char two_chars[2]; } two_bytes; + + static one_byte test(To); + static two_bytes test(...); + static From m_from; + + public: + static const bool value = sizeof(test(m_from)) == sizeof(one_byte); +}; // end is_convertible_sfinae + + +__THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_END +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + + +template + struct is_convertible_needs_simple_test +{ + static const bool from_is_void = is_void::value; + static const bool to_is_void = is_void::value; + static const bool from_is_float = is_floating_point::type>::value; + static const bool to_is_int_or_cref = is_int_or_cref::value; + + static const bool value = (from_is_void || to_is_void || (from_is_float && to_is_int_or_cref)); +}; // end is_convertible_needs_simple_test + + +template::value> + struct is_convertible +{ + static const bool value = (is_void::value + || (is_int_or_cref::value + && !is_void::value)); +}; // end is_convertible + + +template + struct is_convertible +{ + static const bool value = (is_convertible_sfinae::type, To>::value); +}; // end is_convertible + + +} // end tt_detail + +template + struct is_convertible + : public integral_constant::value> +{ +}; // end is_convertible + + +template + struct is_one_convertible_to_the_other + : public integral_constant< + bool, + is_convertible::value || is_convertible::value + > +{}; + + +// mpl stuff + +template + struct or_ + : public integral_constant< + bool, + Condition1::value || Condition2::value || Condition3::value || Condition4::value || Condition5::value || Condition6::value || Condition7::value || Condition8::value || Condition9::value || Condition10::value + > +{ +}; // end or_ + +template + struct and_ + : public integral_constant +{ +}; // end and_ + +template + struct not_ + : public integral_constant +{ +}; // end not_ + +template + struct eval_if +{ +}; // end eval_if + +template + struct eval_if +{ + typedef typename Then::type type; +}; // end eval_if + +template + struct eval_if +{ + typedef typename Else::type type; +}; // end eval_if + +template +// struct identity +// XXX WAR nvcc's confusion with thrust::identity + struct identity_ +{ + typedef T type; +}; // end identity + +template struct enable_if {}; +template struct enable_if {typedef T type;}; + +template struct lazy_enable_if {}; +template struct lazy_enable_if {typedef typename T::type type;}; + +template struct disable_if : enable_if {}; +template struct lazy_disable_if : lazy_enable_if {}; + + +template + struct enable_if_convertible + : enable_if< is_convertible::value, T > +{}; + + +template + struct disable_if_convertible + : disable_if< is_convertible::value, T > +{}; + + +template + struct enable_if_different + : enable_if::value, Result> +{}; + + +template + struct is_numeric + : and_< + is_convertible, + is_convertible + > +{ +}; // end is_numeric + + +template struct is_reference_to_const : false_type {}; +template struct is_reference_to_const : true_type {}; + + +// make_unsigned follows + +namespace tt_detail +{ + +template struct make_unsigned_simple; + +template<> struct make_unsigned_simple { typedef unsigned char type; }; +template<> struct make_unsigned_simple { typedef signed char type; }; +template<> struct make_unsigned_simple { typedef unsigned char type; }; +template<> struct make_unsigned_simple { typedef unsigned short type; }; +template<> struct make_unsigned_simple { typedef unsigned short type; }; +template<> struct make_unsigned_simple { typedef unsigned int type; }; +template<> struct make_unsigned_simple { typedef unsigned int type; }; +template<> struct make_unsigned_simple { typedef unsigned long int type; }; +template<> struct make_unsigned_simple { typedef unsigned long int type; }; +template<> struct make_unsigned_simple { typedef unsigned long long int type; }; +template<> struct make_unsigned_simple { typedef unsigned long long int type; }; + +template + struct make_unsigned_base +{ + // remove cv + typedef typename remove_cv::type remove_cv_t; + + // get the simple unsigned type + typedef typename make_unsigned_simple::type unsigned_remove_cv_t; + + // add back const, volatile, both, or neither to the simple result + typedef typename eval_if< + is_const::value && is_volatile::value, + // add cv back + add_cv, + // check const & volatile individually + eval_if< + is_const::value, + // add c back + add_const, + eval_if< + is_volatile::value, + // add v back + add_volatile, + // original type was neither cv, return the simple unsigned result + identity_ + > + > + >::type type; +}; + +} // end tt_detail + +template + struct make_unsigned + : tt_detail::make_unsigned_base +{}; + +struct largest_available_float +{ +#if defined(__CUDA_ARCH__) +# if (__CUDA_ARCH__ < 130) + typedef float type; +# else + typedef double type; +# endif +#else + typedef double type; +#endif +}; + +// T1 wins if they are both the same size +template + struct larger_type + : thrust::detail::eval_if< + (sizeof(T2) > sizeof(T1)), + thrust::detail::identity_, + thrust::detail::identity_ + > +{}; + + +namespace is_base_of_ns +{ + +typedef char yes; +typedef struct { char two_chars[2]; } no; + +template + struct host +{ + operator Base*() const; + operator Derived*(); +}; // end host + +template + struct impl +{ + template static yes check(Derived *, T); + static no check(Base*, int); + + static const bool value = sizeof(check(host(), int())) == sizeof(yes); +}; // end impl + +} // end is_base_of_ns + + +template + struct is_base_of + : integral_constant< + bool, + is_base_of_ns::impl::value + > +{}; + +template + struct enable_if_base_of + : enable_if< + is_base_of::value, + Result + > +{}; + + +namespace is_assignable_ns +{ + +template + class is_assignable +{ + typedef char yes_type; + typedef struct { char array[2]; } no_type; + + template static typename add_reference::type declval(); + + template struct helper { typedef void * type; }; + + template static yes_type test(typename helper() = declval())>::type); + + template static no_type test(...); + + public: + static const bool value = sizeof(test(0)) == 1; +}; // end is_assignable + +} // end is_assignable_ns + + +template + struct is_assignable + : integral_constant< + bool, + is_assignable_ns::is_assignable::value + > +{}; + + +template + struct is_copy_assignable + : is_assignable< + typename add_reference::type, + typename add_reference::type>::type + > +{}; + + +} // end detail + +} // end thrust + +#include + diff --git a/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h b/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h new file mode 100644 index 0000000..92767b5 --- /dev/null +++ b/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +// this trait reports what type should be used as a temporary in certain algorithms +// which aggregate intermediate results from a function before writing to an output iterator + +// the pseudocode for deducing the type of the temporary used below: +// +// if Function is an AdaptableFunction +// result = Function::result_type +// else if OutputIterator2 is a "pure" output iterator +// result = InputIterator2::value_type +// else +// result = OutputIterator2::value_type +// +// XXX upon c++0x, TemporaryType needs to be: +// result_of::type +template + struct intermediate_type_from_function_and_iterators + : eval_if< + has_result_type::value, + result_type, + eval_if< + is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + > +{ +}; // end intermediate_type_from_function_and_iterators + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/type_traits/function_traits.h b/compat/thrust/detail/type_traits/function_traits.h new file mode 100644 index 0000000..39015c6 --- /dev/null +++ b/compat/thrust/detail/type_traits/function_traits.h @@ -0,0 +1,96 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +// forward definitions for is_commutative +template struct plus; +template struct multiplies; +template struct minimum; +template struct maximum; +template struct logical_or; +template struct logical_and; +template struct bit_or; +template struct bit_and; +template struct bit_xor; + +namespace detail +{ + + +// some metafunctions which check for the nested types of the adaptable functions + +__THRUST_DEFINE_HAS_NESTED_TYPE(has_result_type, result_type) + +__THRUST_DEFINE_HAS_NESTED_TYPE(has_argument_type, argument_type) + +__THRUST_DEFINE_HAS_NESTED_TYPE(has_first_argument_type, first_argument_type) + +__THRUST_DEFINE_HAS_NESTED_TYPE(has_second_argument_type, second_argument_type) + + +template + struct result_type +{ + typedef typename AdaptableBinaryFunction::result_type type; +}; + + +template + struct is_adaptable_unary_function + : thrust::detail::and_< + has_result_type, + has_argument_type + > +{}; + + +template + struct is_adaptable_binary_function + : thrust::detail::and_< + has_result_type, + thrust::detail::and_< + has_first_argument_type, + has_second_argument_type + > + > +{}; + + +template + struct is_commutative + : public thrust::detail::false_type +{}; + +template struct is_commutative< typename thrust::plus > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::multiplies > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::minimum > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::maximum > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::logical_or > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::logical_and > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::bit_or > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::bit_and > : public thrust::detail::is_arithmetic {}; +template struct is_commutative< typename thrust::bit_xor > : public thrust::detail::is_arithmetic {}; + +} // end namespace detail +} // end namespace thrust + diff --git a/compat/thrust/detail/type_traits/has_member_function.h b/compat/thrust/detail/type_traits/has_member_function.h new file mode 100644 index 0000000..117f4cb --- /dev/null +++ b/compat/thrust/detail/type_traits/has_member_function.h @@ -0,0 +1,118 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#define __THRUST_DEFINE_HAS_MEMBER_FUNCTION(trait_name, member_function_name) \ +template class trait_name; \ + \ +template \ +class trait_name \ +{ \ + class yes { char m; }; \ + class no { yes m[2]; }; \ + struct base_mixin \ + { \ + Result member_function_name(); \ + }; \ + struct base : public T, public base_mixin {}; \ + template class helper{}; \ + template \ + static no deduce(U*, helper* = 0); \ + static yes deduce(...); \ +public: \ + static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ + typedef thrust::detail::integral_constant type; \ +}; \ + \ +template \ +class trait_name \ +{ \ + class yes { char m; }; \ + class no { yes m[2]; }; \ + struct base_mixin \ + { \ + Result member_function_name(Arg); \ + }; \ + struct base : public T, public base_mixin {}; \ + template class helper{}; \ + template \ + static no deduce(U*, helper* = 0); \ + static yes deduce(...); \ +public: \ + static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ + typedef thrust::detail::integral_constant type; \ +}; \ + \ +template \ +class trait_name \ +{ \ + class yes { char m; }; \ + class no { yes m[2]; }; \ + struct base_mixin \ + { \ + Result member_function_name(Arg1,Arg2); \ + }; \ + struct base : public T, public base_mixin {}; \ + template class helper{}; \ + template \ + static no deduce(U*, helper* = 0); \ + static yes deduce(...); \ +public: \ + static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ + typedef thrust::detail::integral_constant type; \ +}; \ + \ +template \ +class trait_name \ +{ \ + class yes { char m; }; \ + class no { yes m[2]; }; \ + struct base_mixin \ + { \ + Result member_function_name(Arg1,Arg2,Arg3); \ + }; \ + struct base : public T, public base_mixin {}; \ + template class helper{}; \ + template \ + static no deduce(U*, helper* = 0); \ + static yes deduce(...); \ +public: \ + static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ + typedef thrust::detail::integral_constant type; \ +}; \ + \ +template \ +class trait_name \ +{ \ + class yes { char m; }; \ + class no { yes m[2]; }; \ + struct base_mixin \ + { \ + Result member_function_name(Arg1,Arg2,Arg3,Arg4); \ + }; \ + struct base : public T, public base_mixin {}; \ + template class helper{}; \ + template \ + static no deduce(U*, helper* = 0); \ + static yes deduce(...); \ +public: \ + static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ + typedef thrust::detail::integral_constant type; \ +}; + diff --git a/compat/thrust/detail/type_traits/has_nested_type.h b/compat/thrust/detail/type_traits/has_nested_type.h new file mode 100644 index 0000000..98c9460 --- /dev/null +++ b/compat/thrust/detail/type_traits/has_nested_type.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#define __THRUST_DEFINE_HAS_NESTED_TYPE(trait_name, nested_type_name) \ +template \ + struct trait_name \ +{ \ + typedef char yes_type; \ + typedef int no_type; \ + template static yes_type test(typename S::nested_type_name *); \ + template static no_type test(...); \ + static bool const value = sizeof(test(0)) == sizeof(yes_type);\ + typedef thrust::detail::integral_constant type;\ +}; + diff --git a/compat/thrust/detail/type_traits/has_trivial_assign.h b/compat/thrust/detail/type_traits/has_trivial_assign.h new file mode 100644 index 0000000..d248245 --- /dev/null +++ b/compat/thrust/detail/type_traits/has_trivial_assign.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file type_traits.h + * \brief Temporarily define some type traits + * until nvcc can compile tr1::type_traits. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace detail +{ + +template struct has_trivial_assign + : public integral_constant< + bool, + (is_pod::value && !is_const::value) +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC + || __has_trivial_assign(T) +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC +// only use the intrinsic for >= 4.3 +#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) + || __has_trivial_assign(T) +#endif // GCC VERSION +#endif // THRUST_HOST_COMPILER + > +{}; + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/type_traits/is_call_possible.h b/compat/thrust/detail/type_traits/is_call_possible.h new file mode 100644 index 0000000..41b9539 --- /dev/null +++ b/compat/thrust/detail/type_traits/is_call_possible.h @@ -0,0 +1,161 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +// inspired by Roman Perepelitsa's presentation from comp.lang.c++.moderated +// based on the implementation here: http://www.rsdn.ru/forum/cpp/2759773.1.aspx + +namespace thrust +{ +namespace detail +{ +namespace is_call_possible_detail +{ + +template class void_exp_result {}; + +template +U const& operator,(U const&, void_exp_result); + +template +U& operator,(U&, void_exp_result); + +template +struct clone_constness +{ + typedef dest_type type; +}; + +template +struct clone_constness +{ + typedef const dest_type type; +}; + +} // end is_call_possible_detail +} // end detail +} // end thrust + +#define __THRUST_DEFINE_IS_CALL_POSSIBLE(trait_name, member_function_name) \ +__THRUST_DEFINE_HAS_MEMBER_FUNCTION(trait_name##_has_member, member_function_name) \ + \ +template \ +struct trait_name \ +{ \ + private: \ + struct yes {}; \ + struct no { yes m[2]; }; \ + struct derived : public T \ + { \ + using T::member_function_name; \ + no member_function_name(...) const; \ + }; \ + \ + typedef typename thrust::detail::is_call_possible_detail::clone_constness::type derived_type; \ + \ + template \ + struct return_value_check \ + { \ + static yes deduce(Result); \ + static no deduce(...); \ + static no deduce(no); \ + static no deduce(thrust::detail::is_call_possible_detail::void_exp_result); \ + }; \ + \ + template \ + struct return_value_check \ + { \ + static yes deduce(...); \ + static no deduce(no); \ + }; \ + \ + template \ + struct impl \ + { \ + static const bool value = false; \ + }; \ + \ + template \ + struct impl \ + { \ + static typename add_reference::type test_me; \ + static typename add_reference::type arg; \ + \ + static const bool value = \ + sizeof( \ + return_value_check::deduce( \ + (test_me.member_function_name(arg), thrust::detail::is_call_possible_detail::void_exp_result()) \ + ) \ + ) == sizeof(yes); \ + }; \ + \ + template \ + struct impl \ + { \ + static typename add_reference::type test_me; \ + static typename add_reference::type arg1; \ + static typename add_reference::type arg2; \ + \ + static const bool value = \ + sizeof( \ + return_value_check::deduce( \ + (test_me.member_function_name(arg1,arg2), thrust::detail::is_call_possible_detail::void_exp_result()) \ + ) \ + ) == sizeof(yes); \ + }; \ + \ + template \ + struct impl \ + { \ + static typename add_reference::type test_me; \ + static typename add_reference::type arg1; \ + static typename add_reference::type arg2; \ + static typename add_reference::type arg3; \ + \ + static const bool value = \ + sizeof( \ + return_value_check::deduce( \ + (test_me.member_function_name(arg1,arg2,arg3), thrust::detail::is_call_possible_detail::void_exp_result()) \ + ) \ + ) == sizeof(yes); \ + }; \ + \ + template \ + struct impl \ + { \ + static typename add_reference::type test_me; \ + static typename add_reference::type arg1; \ + static typename add_reference::type arg2; \ + static typename add_reference::type arg3; \ + static typename add_reference::type arg4; \ + \ + static const bool value = \ + sizeof( \ + return_value_check::deduce( \ + (test_me.member_function_name(arg1,arg2,arg3,arg4), thrust::detail::is_call_possible_detail::void_exp_result()) \ + ) \ + ) == sizeof(yes); \ + }; \ + \ + public: \ + static const bool value = impl::value, Signature>::value; \ + typedef thrust::detail::integral_constant type; \ +}; + diff --git a/compat/thrust/detail/type_traits/is_metafunction_defined.h b/compat/thrust/detail/type_traits/is_metafunction_defined.h new file mode 100644 index 0000000..fba0811 --- /dev/null +++ b/compat/thrust/detail/type_traits/is_metafunction_defined.h @@ -0,0 +1,41 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace detail +{ + +__THRUST_DEFINE_HAS_NESTED_TYPE(is_metafunction_defined, type) + +template + struct enable_if_defined + : thrust::detail::lazy_enable_if< + is_metafunction_defined::value, + Metafunction + > +{}; + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h b/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h new file mode 100644 index 0000000..cca59da --- /dev/null +++ b/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h @@ -0,0 +1,40 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template +struct is_discard_iterator + : public thrust::detail::false_type +{}; + +template +struct is_discard_iterator< thrust::discard_iterator > + : public thrust::detail::true_type +{}; + +} // end namespace detail +} // end namespace thrust + diff --git a/compat/thrust/detail/type_traits/iterator/is_output_iterator.h b/compat/thrust/detail/type_traits/iterator/is_output_iterator.h new file mode 100644 index 0000000..4cefe63 --- /dev/null +++ b/compat/thrust/detail/type_traits/iterator/is_output_iterator.h @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + + +template + struct is_void_like + : thrust::detail::or_< + thrust::detail::is_void, + thrust::detail::is_same + > +{}; // end is_void_like + + +template + struct lazy_is_void_like + : is_void_like +{}; // end lazy_is_void_like + + +// XXX this meta function should first check that T is actually an iterator +// +// if thrust::iterator_value is defined and thrust::iterator_value::type == void +// return false +// else +// return true +template + struct is_output_iterator + : eval_if< + is_metafunction_defined >::value, + lazy_is_void_like >, + thrust::detail::true_type + >::type +{ +}; // end is_output_iterator + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/type_traits/minimum_type.h b/compat/thrust/detail/type_traits/minimum_type.h new file mode 100644 index 0000000..aaa011e --- /dev/null +++ b/compat/thrust/detail/type_traits/minimum_type.h @@ -0,0 +1,162 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ + +namespace detail +{ + +namespace minimum_type_detail +{ + +// +// Returns the minimum type or is empty +// if T1 and T2 are unrelated. +// +template struct minimum_type_impl {}; + +template +struct minimum_type_impl +{ + typedef T2 type; +}; // end minimum_type_impl + +template +struct minimum_type_impl +{ + typedef T1 type; +}; // end minimum_type_impl + +template +struct minimum_type_impl +{ + typedef T1 type; +}; // end minimum_type_impl + +template +struct primitive_minimum_type + : minimum_type_detail::minimum_type_impl< + T1, + T2, + ::thrust::detail::is_convertible::value, + ::thrust::detail::is_convertible::value + > +{ +}; // end primitive_minimum_type + +// because some types are not convertible (even to themselves) +// specialize primitive_minimum_type for when both types are identical +template +struct primitive_minimum_type +{ + typedef T type; +}; // end primitive_minimum_type + +// XXX this belongs somewhere more general +struct any_conversion +{ + template operator T (void); +}; + +} // end minimum_type_detail + +template + struct minimum_type; + +// base case +template + struct minimum_type + : minimum_type_detail::primitive_minimum_type +{}; + +template + struct lazy_minimum_type + : minimum_type< + typename T1::type, + typename T2::type + > +{}; + +// carefully avoid referring to a nested ::type which may not exist +template + struct minimum_type + : lazy_minimum_type< + lazy_minimum_type< + lazy_minimum_type< + minimum_type< + T1,T2 + >, + minimum_type< + T3,T4 + > + >, + lazy_minimum_type< + minimum_type< + T5,T6 + >, + minimum_type< + T7,T8 + > + > + >, + lazy_minimum_type< + lazy_minimum_type< + minimum_type< + T9,T10 + >, + minimum_type< + T11,T12 + > + >, + lazy_minimum_type< + minimum_type< + T13,T14 + >, + minimum_type< + T15,T16 + > + > + > + > +{}; + +} // end detail + +} // end thrust + diff --git a/compat/thrust/detail/type_traits/pointer_traits.h b/compat/thrust/detail/type_traits/pointer_traits.h new file mode 100644 index 0000000..a0b5dc6 --- /dev/null +++ b/compat/thrust/detail/type_traits/pointer_traits.h @@ -0,0 +1,276 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template struct pointer_element; + +template class Ptr, typename Arg> + struct pointer_element > +{ + typedef Arg type; +}; + +template class Ptr, typename Arg1, typename Arg2> + struct pointer_element > +{ + typedef Arg1 type; +}; + +template class Ptr, typename Arg1, typename Arg2, typename Arg3> + struct pointer_element > +{ + typedef Arg1 type; +}; + +template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4> + struct pointer_element > +{ + typedef Arg1 type; +}; + +template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5> + struct pointer_element > +{ + typedef Arg1 type; +}; + +template + struct pointer_element +{ + typedef T type; +}; + +template + struct pointer_difference +{ + typedef typename Ptr::difference_type type; +}; + +template + struct pointer_difference +{ + typedef std::ptrdiff_t type; +}; + +template struct rebind_pointer; + +template + struct rebind_pointer +{ + typedef U* type; +}; + +template class Ptr, typename Arg, typename T> + struct rebind_pointer,T> +{ + typedef Ptr type; +}; + +template class Ptr, typename Arg1, typename Arg2, typename T> + struct rebind_pointer,T> +{ + typedef Ptr type; +}; + +template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename T> + struct rebind_pointer,T> +{ + typedef Ptr type; +}; + +template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename T> + struct rebind_pointer,T> +{ + typedef Ptr type; +}; + +// XXX this should probably be renamed native_type or similar +__THRUST_DEFINE_HAS_NESTED_TYPE(has_raw_pointer, raw_pointer) + +namespace pointer_traits_detail +{ + +template struct pointer_raw_pointer_impl {}; + +template + struct pointer_raw_pointer_impl +{ + typedef T* type; +}; + +template + struct pointer_raw_pointer_impl::value>::type> +{ + typedef typename Ptr::raw_pointer type; +}; + +} // end pointer_traits_detail + +template + struct pointer_raw_pointer + : pointer_traits_detail::pointer_raw_pointer_impl +{}; + +namespace pointer_traits_detail +{ + +template + struct capture_address +{ + template + __host__ __device__ + capture_address(T &r) + : m_addr(&r) + {} + + inline __host__ __device__ + Void *operator&() const + { + return m_addr; + } + + Void *m_addr; +}; + +// metafunction to compute the type of pointer_to's parameter below +template + struct pointer_to_param + : thrust::detail::eval_if< + thrust::detail::is_void::value, + thrust::detail::identity_ >, + thrust::detail::add_reference + > +{}; + +} + +template + struct pointer_traits +{ + typedef Ptr pointer; + typedef typename pointer_element::type element_type; + typedef typename pointer_difference::type difference_type; + + template + struct rebind + { + typedef typename rebind_pointer::type other; + }; + + __host__ __device__ + inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) + { + // XXX this is supposed to be pointer::pointer_to(&r); (i.e., call a static member function of pointer called pointer_to) + // assume that pointer has a constructor from raw pointer instead + + return pointer(&r); + } + + // thrust additions follow + typedef typename pointer_raw_pointer::type raw_pointer; + + __host__ __device__ + inline static raw_pointer get(pointer ptr) + { + return ptr.get(); + } +}; + +template + struct pointer_traits +{ + typedef T* pointer; + typedef T element_type; + typedef typename pointer_difference::type difference_type; + + template + struct rebind + { + typedef U* other; + }; + + __host__ __device__ + inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) + { + return &r; + } + + // thrust additions follow + typedef typename pointer_raw_pointer::type raw_pointer; + + __host__ __device__ + inline static raw_pointer get(pointer ptr) + { + return ptr; + } +}; + +template + struct is_pointer_convertible + : thrust::detail::and_< + thrust::detail::is_convertible< + typename pointer_element::type *, + typename pointer_element::type * + >, + thrust::detail::is_convertible< + typename iterator_system::type, + typename iterator_system::type + > + > +{}; + +// this could be a lot better, but for our purposes, it's probably +// sufficient just to check if pointer_raw_pointer has meaning +template + struct is_thrust_pointer + : is_metafunction_defined > +{}; + +// avoid inspecting traits of the arguments if they aren't known to be pointers +template + struct lazy_is_pointer_convertible + : thrust::detail::eval_if< + is_thrust_pointer::value && is_thrust_pointer::value, + is_pointer_convertible, + thrust::detail::identity_ + > +{}; + +template + struct enable_if_pointer_is_convertible + : thrust::detail::enable_if< + lazy_is_pointer_convertible::type::value, + T + > +{}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/type_traits/result_of.h b/compat/thrust/detail/type_traits/result_of.h new file mode 100644 index 0000000..e30b4fd --- /dev/null +++ b/compat/thrust/detail/type_traits/result_of.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template struct result_of; + +// specialization for unary invocations of things which have result_type +template + struct result_of< + Functor(Arg1), + typename thrust::detail::enable_if::value>::type + > +{ + typedef typename Functor::result_type type; +}; // end result_of + +// specialization for binary invocations of things which have result_type +template + struct result_of< + Functor(Arg1,Arg2), + typename thrust::detail::enable_if::value>::type + > +{ + typedef typename Functor::result_type type; +}; + +} // end detail +} // end thrust + diff --git a/compat/thrust/detail/uninitialized_copy.inl b/compat/thrust/detail/uninitialized_copy.inl new file mode 100644 index 0000000..a01dca5 --- /dev/null +++ b/compat/thrust/detail/uninitialized_copy.inl @@ -0,0 +1,93 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file uninitialized_copy.inl + * \brief Inline file for uninitialized_copy.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + ForwardIterator result) +{ + using thrust::system::detail::generic::uninitialized_copy; + return uninitialized_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); +} // end uninitialized_copy() + + +template + ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base &exec, + InputIterator first, + Size n, + ForwardIterator result) +{ + using thrust::system::detail::generic::uninitialized_copy_n; + return uninitialized_copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result); +} // end uninitialized_copy_n() + + +template + ForwardIterator uninitialized_copy(InputIterator first, + InputIterator last, + ForwardIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::uninitialized_copy(select_system(system1,system2), first, last, result); +} // end uninitialized_copy() + + +template + ForwardIterator uninitialized_copy_n(InputIterator first, + Size n, + ForwardIterator result) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::uninitialized_copy_n(select_system(system1,system2), first, n, result); +} // end uninitialized_copy_n() + + +} // end thrust + + diff --git a/compat/thrust/detail/uninitialized_fill.inl b/compat/thrust/detail/uninitialized_fill.inl new file mode 100644 index 0000000..3545de5 --- /dev/null +++ b/compat/thrust/detail/uninitialized_fill.inl @@ -0,0 +1,88 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file uninitialized_fill.inl + * \brief Inline file for uninitialized_fill.h. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template + void uninitialized_fill(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &x) +{ + using thrust::system::detail::generic::uninitialized_fill; + return uninitialized_fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, x); +} // end uninitialized_fill() + + +template + ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + Size n, + const T &x) +{ + using thrust::system::detail::generic::uninitialized_fill_n; + return uninitialized_fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, x); +} // end uninitialized_fill_n() + + +template + void uninitialized_fill(ForwardIterator first, + ForwardIterator last, + const T &x) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + thrust::uninitialized_fill(select_system(system), first, last, x); +} // end uninitialized_fill() + + +template + ForwardIterator uninitialized_fill_n(ForwardIterator first, + Size n, + const T &x) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::uninitialized_fill_n(select_system(system), first, n, x); +} // end uninitialized_fill_n() + + +} // end thrust + diff --git a/compat/thrust/detail/unique.inl b/compat/thrust/detail/unique.inl new file mode 100644 index 0000000..e90187d --- /dev/null +++ b/compat/thrust/detail/unique.inl @@ -0,0 +1,320 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file unique.inl + * \brief Inline file for unique.h. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +template +ForwardIterator unique(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::unique; + return unique(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); +} // end unique() + + +template +ForwardIterator unique(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::unique; + return unique(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, binary_pred); +} // end unique() + + +template +OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator output) +{ + using thrust::system::detail::generic::unique_copy; + return unique_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output); +} // end unique_copy() + + +template +OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::unique_copy; + return unique_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output, binary_pred); +} // end unique_copy() + + +template + thrust::pair + unique_by_key(const thrust::detail::execution_policy_base &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first) +{ + using thrust::system::detail::generic::unique_by_key; + return unique_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key(const thrust::detail::execution_policy_base &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::unique_by_key; + return unique_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, binary_pred); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output) +{ + using thrust::system::detail::generic::unique_by_key_copy; + return unique_by_key_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output); +} // end unique_by_key_copy() + + +template + thrust::pair + unique_by_key_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::unique_by_key_copy; + return unique_by_key_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); +} // end unique_by_key_copy() + + +template + ForwardIterator unique(ForwardIterator first, + ForwardIterator last) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::unique(select_system(system), first, last); +} // end unique() + + +template + ForwardIterator unique(ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System; + + System system; + + return thrust::unique(select_system(system), first, last, binary_pred); +} // end unique() + + +template + OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::unique_copy(select_system(system1,system2), first, last, output); +} // end unique_copy() + + +template + OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::unique_copy(select_system(system1,system2), first, last, output, binary_pred); +} // end unique_copy() + + +template + thrust::pair + unique_by_key(ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::unique_by_key(select_system(system1,system2), keys_first, keys_last, values_first); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key(ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + System1 system1; + System2 system2; + + return thrust::unique_by_key(select_system(system1,system2), keys_first, keys_last, values_first, binary_pred); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key_copy(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::unique_by_key_copy(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output); +} // end unique_by_key_copy() + + +template + thrust::pair + unique_by_key_copy(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + using thrust::system::detail::generic::select_system; + + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + typedef typename thrust::iterator_system::type System3; + typedef typename thrust::iterator_system::type System4; + + System1 system1; + System2 system2; + System3 system3; + System4 system4; + + return thrust::unique_by_key_copy(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); +} // end unique_by_key_copy() + + +} // end namespace thrust + diff --git a/compat/thrust/detail/use_default.h b/compat/thrust/detail/use_default.h new file mode 100644 index 0000000..c6eb66e --- /dev/null +++ b/compat/thrust/detail/use_default.h @@ -0,0 +1,27 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ + +struct use_default {}; + +} // end thrust + diff --git a/compat/thrust/detail/util/align.h b/compat/thrust/detail/util/align.h new file mode 100644 index 0000000..10f107a --- /dev/null +++ b/compat/thrust/detail/util/align.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +// functions to handle memory alignment + +namespace thrust +{ +namespace detail +{ +namespace util +{ + +template +T * align_up(T * ptr, detail::uintptr_t bytes) +{ + return (T *) ( bytes * (((detail::uintptr_t) ptr + (bytes - 1)) / bytes) ); +} + +template +T * align_down(T * ptr, detail::uintptr_t bytes) +{ + return (T *) ( bytes * (detail::uintptr_t(ptr) / bytes) ); +} + +template +bool is_aligned(T * ptr, detail::uintptr_t bytes = sizeof(T)) +{ + return detail::uintptr_t(ptr) % bytes == 0; +} + +} // end namespace util +} // end namespace detail +} // end namespace thrust + diff --git a/compat/thrust/detail/util/blocking.h b/compat/thrust/detail/util/blocking.h new file mode 100644 index 0000000..3bb78a6 --- /dev/null +++ b/compat/thrust/detail/util/blocking.h @@ -0,0 +1,58 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +//functions to support blocking + +namespace thrust +{ + +namespace detail +{ + +namespace util +{ + +// x/y rounding towards +infinity for integers, used to determine # of blocks/warps etc. +template + inline __host__ __device__ L divide_ri(const L x, const R y) +{ + return (x + (y - 1)) / y; +} + +// x/y rounding towards zero for integers, used to determine # of blocks/warps etc. +template + inline __host__ __device__ L divide_rz(const L x, const R y) +{ + return x / y; +} + +// round x towards infinity to the next multiple of y +template + inline __host__ __device__ L round_i(const L x, const R y){ return y * divide_ri(x, y); } + +// round x towards zero to the next multiple of y +template + inline __host__ __device__ L round_z(const L x, const R y){ return y * divide_rz(x, y); } + +} // end namespace util + +} // end namespace detail + +} // end namespace thrust + diff --git a/compat/thrust/detail/vector_base.h b/compat/thrust/detail/vector_base.h new file mode 100644 index 0000000..6974eab --- /dev/null +++ b/compat/thrust/detail/vector_base.h @@ -0,0 +1,534 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file vector_base.h + * \brief Defines the interface to a base class for + * host_vector & device_vector. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +template + class vector_base +{ + private: + typedef thrust::detail::contiguous_storage storage_type; + + public: + // typedefs + typedef typename storage_type::value_type value_type; + typedef typename storage_type::pointer pointer; + typedef typename storage_type::const_pointer const_pointer; + typedef typename storage_type::reference reference; + typedef typename storage_type::const_reference const_reference; + typedef typename storage_type::size_type size_type; + typedef typename storage_type::difference_type difference_type; + typedef typename storage_type::allocator_type allocator_type; + + typedef typename storage_type::iterator iterator; + typedef typename storage_type::const_iterator const_iterator; + + typedef thrust::reverse_iterator reverse_iterator; + typedef thrust::reverse_iterator const_reverse_iterator; + + /*! This constructor creates an empty vector_base. + */ + vector_base(void); + + /*! This constructor creates a vector_base with default-constructed + * elements. + * \param n The number of elements to create. + */ + explicit vector_base(size_type n); + + /*! This constructor creates a vector_base with copies + * of an exemplar element. + * \param n The number of elements to initially create. + * \param value An element to copy. + */ + explicit vector_base(size_type n, const value_type &value); + + /*! Copy constructor copies from an exemplar vector_base. + * \param v The vector_base to copy. + */ + vector_base(const vector_base &v); + + /*! assign operator makes a copy of an exemplar vector_base. + * \param v The vector_base to copy. + */ + vector_base &operator=(const vector_base &v); + + /*! Copy constructor copies from an exemplar vector_base with different + * type. + * \param v The vector_base to copy. + */ + template + vector_base(const vector_base &v); + + /*! assign operator makes a copy of an exemplar vector_base with different + * type. + * \param v The vector_base to copy. + */ + template + vector_base &operator=(const vector_base &v); + + /*! Copy constructor copies from an exemplar std::vector. + * \param v The std::vector to copy. + * XXX TODO: Make this method redundant with a properly templatized constructor. + * We would like to copy from a vector whose element type is anything + * assignable to value_type. + */ + template + vector_base(const std::vector &v); + + /*! assign operator makes a copy of an exemplar std::vector. + * \param v The vector to copy. + * XXX TODO: Templatize this assign on the type of the vector to copy from. + * We would like to copy from a vector whose element type is anything + * assignable to value_type. + */ + template + vector_base &operator=(const std::vector &v); + + /*! This constructor builds a vector_base from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + vector_base(InputIterator first, InputIterator last); + + /*! The destructor erases the elements. + */ + ~vector_base(void); + + /*! \brief Resizes this vector_base to the specified number of elements. + * \param new_size Number of elements this vector_base should contain. + * \throw std::length_error If n exceeds max_size9). + * + * This method will resize this vector_base to the specified number of + * elements. If the number is smaller than this vector_base's current + * size this vector_base is truncated, otherwise this vector_base is + * extended and new elements are default constructed. + */ + void resize(size_type new_size); + + /*! \brief Resizes this vector_base to the specified number of elements. + * \param new_size Number of elements this vector_base should contain. + * \param x Data with which new elements should be populated. + * \throw std::length_error If n exceeds max_size(). + * + * This method will resize this vector_base to the specified number of + * elements. If the number is smaller than this vector_base's current + * size this vector_base is truncated, otherwise this vector_base is + * extended and new elements are populated with given data. + */ + void resize(size_type new_size, const value_type &x); + + /*! Returns the number of elements in this vector_base. + */ + size_type size(void) const; + + /*! Returns the size() of the largest possible vector_base. + * \return The largest possible return value of size(). + */ + size_type max_size(void) const; + + /*! \brief If n is less than or equal to capacity(), this call has no effect. + * Otherwise, this method is a request for allocation of additional memory. If + * the request is successful, then capacity() is greater than or equal to + * n; otherwise, capacity() is unchanged. In either case, size() is unchanged. + * \throw std::length_error If n exceeds max_size(). + */ + void reserve(size_type n); + + /*! Returns the number of elements which have been reserved in this + * vector_base. + */ + size_type capacity(void) const; + + /*! This method shrinks the capacity of this vector_base to exactly + * fit its elements. + */ + void shrink_to_fit(void); + + /*! \brief Subscript access to the data contained in this vector_dev. + * \param n The index of the element for which data should be accessed. + * \return Read/write reference to data. + * + * This operator allows for easy, array-style, data access. + * Note that data access with this operator is unchecked and + * out_of_range lookups are not defined. + */ + reference operator[](size_type n); + + /*! \brief Subscript read access to the data contained in this vector_dev. + * \param n The index of the element for which data should be accessed. + * \return Read reference to data. + * + * This operator allows for easy, array-style, data access. + * Note that data access with this operator is unchecked and + * out_of_range lookups are not defined. + */ + const_reference operator[](size_type n) const; + + /*! This method returns an iterator pointing to the beginning of + * this vector_base. + * \return mStart + */ + iterator begin(void); + + /*! This method returns a const_iterator pointing to the beginning + * of this vector_base. + * \return mStart + */ + const_iterator begin(void) const; + + /*! This method returns a const_iterator pointing to the beginning + * of this vector_base. + * \return mStart + */ + const_iterator cbegin(void) const; + + /*! This method returns a reverse_iterator pointing to the beginning of + * this vector_base's reversed sequence. + * \return A reverse_iterator pointing to the beginning of this + * vector_base's reversed sequence. + */ + reverse_iterator rbegin(void); + + /*! This method returns a const_reverse_iterator pointing to the beginning of + * this vector_base's reversed sequence. + * \return A const_reverse_iterator pointing to the beginning of this + * vector_base's reversed sequence. + */ + const_reverse_iterator rbegin(void) const; + + /*! This method returns a const_reverse_iterator pointing to the beginning of + * this vector_base's reversed sequence. + * \return A const_reverse_iterator pointing to the beginning of this + * vector_base's reversed sequence. + */ + const_reverse_iterator crbegin(void) const; + + /*! This method returns an iterator pointing to one element past the + * last of this vector_base. + * \return begin() + size(). + */ + iterator end(void); + + /*! This method returns a const_iterator pointing to one element past the + * last of this vector_base. + * \return begin() + size(). + */ + const_iterator end(void) const; + + /*! This method returns a const_iterator pointing to one element past the + * last of this vector_base. + * \return begin() + size(). + */ + const_iterator cend(void) const; + + /*! This method returns a reverse_iterator pointing to one element past the + * last of this vector_base's reversed sequence. + * \return rbegin() + size(). + */ + reverse_iterator rend(void); + + /*! This method returns a const_reverse_iterator pointing to one element past the + * last of this vector_base's reversed sequence. + * \return rbegin() + size(). + */ + const_reverse_iterator rend(void) const; + + /*! This method returns a const_reverse_iterator pointing to one element past the + * last of this vector_base's reversed sequence. + * \return rbegin() + size(). + */ + const_reverse_iterator crend(void) const; + + /*! This method returns a const_reference referring to the first element of this + * vector_base. + * \return The first element of this vector_base. + */ + const_reference front(void) const; + + /*! This method returns a reference pointing to the first element of this + * vector_base. + * \return The first element of this vector_base. + */ + reference front(void); + + /*! This method returns a const reference pointing to the last element of + * this vector_base. + * \return The last element of this vector_base. + */ + const_reference back(void) const; + + /*! This method returns a reference referring to the last element of + * this vector_dev. + * \return The last element of this vector_base. + */ + reference back(void); + + /*! This method returns a pointer to this vector_base's first element. + * \return A pointer to the first element of this vector_base. + */ + pointer data(void); + + /*! This method returns a const_pointer to this vector_base's first element. + * \return a const_pointer to the first element of this vector_base. + */ + const_pointer data(void) const; + + /*! This method resizes this vector_base to 0. + */ + void clear(void); + + /*! This method returns true iff size() == 0. + * \return true if size() == 0; false, otherwise. + */ + bool empty(void) const; + + /*! This method appends the given element to the end of this vector_base. + * \param x The element to append. + */ + void push_back(const value_type &x); + + /*! This method erases the last element of this vector_base, invalidating + * all iterators and references to it. + */ + void pop_back(void); + + /*! This method swaps the contents of this vector_base with another vector_base. + * \param v The vector_base with which to swap. + */ + void swap(vector_base &v); + + /*! This method removes the element at position pos. + * \param pos The position of the element of interest. + * \return An iterator pointing to the new location of the element that followed the element + * at position pos. + */ + iterator erase(iterator pos); + + /*! This method removes the range of elements [first,last) from this vector_base. + * \param first The beginning of the range of elements to remove. + * \param last The end of the range of elements to remove. + * \return An iterator pointing to the new location of the element that followed the last + * element in the sequence [first,last). + */ + iterator erase(iterator first, iterator last); + + /*! This method inserts a single copy of a given exemplar value at the + * specified position in this vector_base. + * \param position The insertion position. + * \param x The exemplar element to copy & insert. + * \return An iterator pointing to the newly inserted element. + */ + iterator insert(iterator position, const T &x); + + /*! This method inserts a copy of an exemplar value to a range at the + * specified position in this vector_base. + * \param position The insertion position + * \param n The number of insertions to perform. + * \param x The value to replicate and insert. + */ + void insert(iterator position, size_type n, const T &x); + + /*! This method inserts a copy of an input range at the specified position + * in this vector_base. + * \param position The insertion position. + * \param first The beginning of the range to copy. + * \param last The end of the range to copy. + * + * \tparam InputIterator is a model of Assignable. + */ + template + void insert(iterator position, InputIterator first, InputIterator last); + + /*! This version of \p assign replicates a given exemplar + * \p n times into this vector_base. + * \param n The number of times to copy \p x. + * \param x The exemplar element to replicate. + */ + void assign(size_type n, const T &x); + + /*! This version of \p assign makes this vector_base a copy of a given input range. + * \param first The beginning of the range to copy. + * \param last The end of the range to copy. + * + * \tparam InputIterator is a model of Input Iterator. + */ + template + void assign(InputIterator first, InputIterator last); + + /*! This method returns a copy of this vector's allocator. + * \return A copy of the alloctor used by this vector. + */ + allocator_type get_allocator(void) const; + + protected: + // Our storage + storage_type m_storage; + + // The size of this vector_base, in number of elements. + size_type m_size; + + private: + // these methods resolve the ambiguity of the constructor template of form (Iterator, Iterator) + template + void init_dispatch(IteratorOrIntegralType begin, IteratorOrIntegralType end, false_type); + + template + void init_dispatch(IteratorOrIntegralType n, IteratorOrIntegralType value, true_type); + + template + void range_init(InputIterator first, InputIterator last); + + template + void range_init(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag); + + template + void range_init(ForwardIterator first, ForwardIterator last, thrust::random_access_traversal_tag); + + void default_init(size_type n); + + void fill_init(size_type n, const T &x); + + // these methods resolve the ambiguity of the insert() template of form (iterator, InputIterator, InputIterator) + template + void insert_dispatch(iterator position, InputIteratorOrIntegralType first, InputIteratorOrIntegralType last, false_type); + + // these methods resolve the ambiguity of the insert() template of form (iterator, InputIterator, InputIterator) + template + void insert_dispatch(iterator position, InputIteratorOrIntegralType n, InputIteratorOrIntegralType x, true_type); + + // this method appends n default-constructed elements at the end + void append(size_type n); + + // this method performs insertion from a fill value + void fill_insert(iterator position, size_type n, const T &x); + + // this method performs insertion from a range + template + void copy_insert(iterator position, InputIterator first, InputIterator last); + + // these methods resolve the ambiguity of the assign() template of form (InputIterator, InputIterator) + template + void assign_dispatch(InputIterator first, InputIterator last, false_type); + + // these methods resolve the ambiguity of the assign() template of form (InputIterator, InputIterator) + template + void assign_dispatch(Integral n, Integral x, true_type); + + // this method performs assignment from a range + template + void range_assign(InputIterator first, InputIterator last); + + // this method performs assignment from a range of RandomAccessIterators + template + void range_assign(RandomAccessIterator first, RandomAccessIterator last, thrust::random_access_traversal_tag); + + // this method performs assignment from a range of InputIterators + template + void range_assign(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag); + + // this method performs assignment from a fill value + void fill_assign(size_type n, const T &x); + + // this method allocates new storage and construct copies the given range + template + void allocate_and_copy(size_type requested_size, + ForwardIterator first, ForwardIterator last, + storage_type &new_storage); +}; // end vector_base + +} // end detail + +/*! This function assigns the contents of vector a to vector b and the + * contents of vector b to vector a. + * + * \param a The first vector of interest. After completion, the contents + * of b will be returned here. + * \param b The second vector of interest. After completion, the contents + * of a will be returned here. + */ +template + void swap(detail::vector_base &a, + detail::vector_base &b); + + +/*! This operator allows comparison between two vectors. + * \param lhs The first \p vector to compare. + * \param rhs The second \p vector to compare. + * \return \c true if and only if each corresponding element in either + * \p vector equals the other; \c false, otherwise. + */ +template +bool operator==(const detail::vector_base& lhs, + const detail::vector_base& rhs); + +template +bool operator==(const detail::vector_base& lhs, + const std::vector& rhs); + +template +bool operator==(const std::vector& lhs, + const detail::vector_base& rhs); + +/*! This operator allows comparison between two vectors. + * \param lhs The first \p vector to compare. + * \param rhs The second \p vector to compare. + * \return \c false if and only if each corresponding element in either + * \p vector equals the other; \c true, otherwise. + */ +template +bool operator!=(const detail::vector_base& lhs, + const detail::vector_base& rhs); + +template +bool operator!=(const detail::vector_base& lhs, + const std::vector& rhs); + +template +bool operator!=(const std::vector& lhs, + const detail::vector_base& rhs); + +} // end thrust + +#include + diff --git a/compat/thrust/detail/vector_base.inl b/compat/thrust/detail/vector_base.inl new file mode 100644 index 0000000..24e6466 --- /dev/null +++ b/compat/thrust/detail/vector_base.inl @@ -0,0 +1,1203 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file vector_base.inl + * \brief Inline file for vector_base.h. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace thrust +{ + +namespace detail +{ + +template + vector_base + ::vector_base(void) + :m_storage(), + m_size(0) +{ + ; +} // end vector_base::vector_base() + +template + vector_base + ::vector_base(size_type n) + :m_storage(), + m_size(0) +{ + default_init(n); +} // end vector_base::vector_base() + +template + vector_base + ::vector_base(size_type n, const value_type &value) + :m_storage(), + m_size(0) +{ + fill_init(n,value); +} // end vector_base::vector_base() + +template + vector_base + ::vector_base(const vector_base &v) + :m_storage(), + m_size(0) +{ + range_init(v.begin(), v.end()); +} // end vector_base::vector_base() + +template + vector_base & + vector_base + ::operator=(const vector_base &v) +{ + if(this != &v) + { + assign(v.begin(), v.end()); + } // end if + + return *this; +} // end vector_base::operator=() + +template + template + vector_base + ::vector_base(const vector_base &v) + :m_storage(), + m_size(0) +{ + range_init(v.begin(), v.end()); +} // end vector_base::vector_base() + +template + template + vector_base & + vector_base + ::operator=(const vector_base &v) +{ + assign(v.begin(), v.end()); + + return *this; +} // end vector_base::operator=() + +template + template + vector_base + ::vector_base(const std::vector &v) + :m_storage(), + m_size(0) +{ + range_init(v.begin(), v.end()); +} // end vector_base::vector_base() + +template + template + vector_base & + vector_base + ::operator=(const std::vector &v) +{ + assign(v.begin(), v.end()); + + return *this; +} // end vector_base::operator=() + +template + template + void vector_base + ::init_dispatch(IteratorOrIntegralType n, + IteratorOrIntegralType value, + true_type) +{ + fill_init(n,value); +} // end vector_base::init_dispatch() + +template + void vector_base + ::default_init(size_type n) +{ + if(n > 0) + { + m_storage.allocate(n); + m_size = n; + + m_storage.default_construct_n(begin(), size()); + } // end if +} // end vector_base::default_init() + +template + void vector_base + ::fill_init(size_type n, const T &x) +{ + if(n > 0) + { + m_storage.allocate(n); + m_size = n; + + m_storage.uninitialized_fill_n(begin(), size(), x); + } // end if +} // end vector_base::fill_init() + +template + template + void vector_base + ::init_dispatch(InputIterator first, + InputIterator last, + false_type) +{ + range_init(first, last); +} // end vector_base::init_dispatch() + +template + template + void vector_base + ::range_init(InputIterator first, + InputIterator last) +{ + range_init(first, last, + typename thrust::iterator_traversal::type()); +} // end vector_base::range_init() + +template + template + void vector_base + ::range_init(InputIterator first, + InputIterator last, + thrust::incrementable_traversal_tag) +{ + for(; first != last; ++first) + push_back(*first); +} // end vector_base::range_init() + +template + template + void vector_base + ::range_init(ForwardIterator first, + ForwardIterator last, + thrust::random_access_traversal_tag) +{ + size_type new_size = thrust::distance(first, last); + + allocate_and_copy(new_size, first, last, m_storage); + m_size = new_size; +} // end vector_base::range_init() + +template + template + vector_base + ::vector_base(InputIterator first, + InputIterator last) + :m_storage(), + m_size(0) +{ + // check the type of InputIterator: if it's an integral type, + // we need to interpret this call as (size_type, value_type) + typedef thrust::detail::is_integral Integer; + + init_dispatch(first, last, Integer()); +} // end vector_basee::vector_base() + +template + void vector_base + ::resize(size_type new_size) +{ + if(new_size < size()) + { + iterator new_end = begin(); + thrust::advance(new_end, new_size); + erase(new_end, end()); + } // end if + else + { + append(new_size - size()); + } // end else +} // end vector_base::resize() + +template + void vector_base + ::resize(size_type new_size, const value_type &x) +{ + if(new_size < size()) + { + iterator new_end = begin(); + thrust::advance(new_end, new_size); + erase(new_end, end()); + } // end if + else + { + insert(end(), new_size - size(), x); + } // end else +} // end vector_base::resize() + +template + typename vector_base::size_type + vector_base + ::size(void) const +{ + return m_size; +} // end vector_base::size() + +template + typename vector_base::size_type + vector_base + ::max_size(void) const +{ + return m_storage.max_size(); +} // end vector_base::max_size() + +template + void vector_base + ::reserve(size_type n) +{ + if(n > capacity()) + { + allocate_and_copy(n, begin(), end(), m_storage); + } // end if +} // end vector_base::reserve() + +template + typename vector_base::size_type + vector_base + ::capacity(void) const +{ + return m_storage.size(); +} // end vector_base::capacity() + +template + void vector_base + ::shrink_to_fit(void) +{ + // use the swap trick + vector_base(*this).swap(*this); +} // end vector_base::shrink_to_fit() + +template + typename vector_base::reference + vector_base + ::operator[](const size_type n) +{ + return m_storage[n]; +} // end vector_base::operator[] + +template + typename vector_base::const_reference + vector_base + ::operator[](const size_type n) const +{ + return m_storage[n]; +} // end vector_base::operator[] + +template + typename vector_base::iterator + vector_base + ::begin(void) +{ + return m_storage.begin(); +} // end vector_base::begin() + +template + typename vector_base::const_iterator + vector_base + ::begin(void) const +{ + return m_storage.begin(); +} // end vector_base::begin() + +template + typename vector_base::const_iterator + vector_base + ::cbegin(void) const +{ + return begin(); +} // end vector_base::cbegin() + +template + typename vector_base::reverse_iterator + vector_base + ::rbegin(void) +{ + return reverse_iterator(end()); +} // end vector_base::rbegin() + +template + typename vector_base::const_reverse_iterator + vector_base + ::rbegin(void) const +{ + return const_reverse_iterator(end()); +} // end vector_base::rbegin() + +template + typename vector_base::const_reverse_iterator + vector_base + ::crbegin(void) const +{ + return rbegin(); +} // end vector_base::crbegin() + +template + typename vector_base::iterator + vector_base + ::end(void) +{ + iterator result = begin(); + thrust::advance(result, size()); + return result; +} // end vector_base::end() + +template + typename vector_base::const_iterator + vector_base + ::end(void) const +{ + const_iterator result = begin(); + thrust::advance(result, size()); + return result; +} // end vector_base::end() + +template + typename vector_base::const_iterator + vector_base + ::cend(void) const +{ + return end(); +} // end vector_base::cend() + +template + typename vector_base::reverse_iterator + vector_base + ::rend(void) +{ + return reverse_iterator(begin()); +} // end vector_base::rend() + +template + typename vector_base::const_reverse_iterator + vector_base + ::rend(void) const +{ + return const_reverse_iterator(begin()); +} // end vector_base::rend() + +template + typename vector_base::const_reverse_iterator + vector_base + ::crend(void) const +{ + return rend(); +} // end vector_base::crend() + +template + typename vector_base::const_reference + vector_base + ::front(void) const +{ + return *begin(); +} // end vector_base::front() + +template + typename vector_base::reference + vector_base + ::front(void) +{ + return *begin(); +} // end vector_base::front() + +template + typename vector_base::const_reference + vector_base + ::back(void) const +{ + const_iterator ptr_to_back = end(); + --ptr_to_back; + return *ptr_to_back; +} // end vector_base::vector_base + +template + typename vector_base::reference + vector_base + ::back(void) +{ + iterator ptr_to_back = end(); + --ptr_to_back; + return *ptr_to_back; +} // end vector_base::vector_base + +template + typename vector_base::pointer + vector_base + ::data(void) +{ + return &front(); +} // end vector_base::data() + +template + typename vector_base::const_pointer + vector_base + ::data(void) const +{ + return &front(); +} // end vector_base::data() + +template + vector_base + ::~vector_base(void) +{ + // destroy every living thing + m_storage.destroy(begin(),end()); +} // end vector_base::~vector_base() + +template + void vector_base + ::clear(void) +{ + resize(0); +} // end vector_base::~vector_dev() + +template + bool vector_base + ::empty(void) const +{ + return size() == 0; +} // end vector_base::empty(); + +template + void vector_base + ::push_back(const value_type &x) +{ + insert(end(), x); +} // end vector_base::push_back() + +template + void vector_base + ::pop_back(void) +{ + iterator e = end(); + iterator ptr_to_back = e; + --ptr_to_back; + m_storage.destroy(ptr_to_back, e); + --m_size; +} // end vector_base::pop_back() + +template + typename vector_base::iterator vector_base + ::erase(iterator pos) +{ + iterator end = pos; + ++end; + return erase(pos,end); +} // end vector_base::erase() + +template + typename vector_base::iterator vector_base + ::erase(iterator first, iterator last) +{ + // overlap copy the range [last,end()) to first + // XXX this copy only potentially overlaps + iterator i = thrust::detail::overlapped_copy(last, end(), first); + + // destroy everything after i + m_storage.destroy(i, end()); + + // modify our size + m_size -= (last - first); + + // return an iterator pointing to the position of the first element + // following the erased range + return first; +} // end vector_base::erase() + +template + void vector_base + ::swap(vector_base &v) +{ + thrust::swap(m_storage, v.m_storage); + thrust::swap(m_size, v.m_size); +} // end vector_base::swap() + +template + void vector_base + ::assign(size_type n, const T &x) +{ + fill_assign(n, x); +} // end vector_base::assign() + +template + template + void vector_base + ::assign(InputIterator first, InputIterator last) +{ + // we could have received assign(n, x), so disambiguate on the + // type of InputIterator + typedef typename thrust::detail::is_integral integral; + + assign_dispatch(first, last, integral()); +} // end vector_base::assign() + +template + typename vector_base::allocator_type + vector_base + ::get_allocator(void) const +{ + return m_storage.get_allocator(); +} // end vector_base::get_allocator() + +template + typename vector_base::iterator + vector_base + ::insert(iterator position, const T &x) +{ + // find the index of the insertion + size_type index = thrust::distance(begin(), position); + + // make the insertion + insert(position, 1, x); + + // return an iterator pointing back to position + iterator result = begin(); + thrust::advance(result, index); + return result; +} // end vector_base::insert() + +template + void vector_base + ::insert(iterator position, size_type n, const T &x) +{ + fill_insert(position, n, x); +} // end vector_base::insert() + +template + template + void vector_base + ::insert(iterator position, InputIterator first, InputIterator last) +{ + // we could have received insert(position, n, x), so disambiguate on the + // type of InputIterator + typedef typename thrust::detail::is_integral integral; + + insert_dispatch(position, first, last, integral()); +} // end vector_base::insert() + +template + template + void vector_base + ::assign_dispatch(InputIterator first, InputIterator last, false_type) +{ + range_assign(first, last); +} // end vector_base::assign_dispatch() + +template + template + void vector_base + ::assign_dispatch(Integral n, Integral x, true_type) +{ + fill_assign(n, x); +} // end vector_base::assign_dispatch() + +template + template + void vector_base + ::insert_dispatch(iterator position, InputIterator first, InputIterator last, false_type) +{ + copy_insert(position, first, last); +} // end vector_base::insert_dispatch() + +template + template + void vector_base + ::insert_dispatch(iterator position, Integral n, Integral x, true_type) +{ + fill_insert(position, n, x); +} // end vector_base::insert_dispatch() + +template + template + void vector_base + ::copy_insert(iterator position, + ForwardIterator first, + ForwardIterator last) +{ + if(first != last) + { + // how many new elements will we create? + const size_type num_new_elements = thrust::distance(first, last); + if(capacity() - size() >= num_new_elements) + { + // we've got room for all of them + // how many existing elements will we displace? + const size_type num_displaced_elements = end() - position; + iterator old_end = end(); + + if(num_displaced_elements > num_new_elements) + { + // construct copy n displaced elements to new elements + // following the insertion + m_storage.uninitialized_copy(end() - num_new_elements, end(), end()); + + // extend the size + m_size += num_new_elements; + + // copy num_displaced_elements - num_new_elements elements to existing elements + // this copy overlaps + const size_type copy_length = (old_end - num_new_elements) - position; + thrust::detail::overlapped_copy(position, old_end - num_new_elements, old_end - copy_length); + + // finally, copy the range to the insertion point + thrust::copy(first, last, position); + } // end if + else + { + ForwardIterator mid = first; + thrust::advance(mid, num_displaced_elements); + + // construct copy new elements at the end of the vector + m_storage.uninitialized_copy(mid, last, end()); + + // extend the size + m_size += num_new_elements - num_displaced_elements; + + // construct copy the displaced elements + m_storage.uninitialized_copy(position, old_end, end()); + + // extend the size + m_size += num_displaced_elements; + + // copy to elements which already existed + thrust::copy(first, mid, position); + } // end else + } // end if + else + { + const size_type old_size = size(); + + // compute the new capacity after the allocation + size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, num_new_elements); + + // allocate exponentially larger new storage + new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, 2 * capacity()); + + // do not exceed maximum storage + new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, max_size()); + + if(new_capacity > max_size()) + { + throw std::length_error("insert(): insertion exceeds max_size()."); + } // end if + + storage_type new_storage(new_capacity); + + // record how many constructors we invoke in the try block below + iterator new_end = new_storage.begin(); + + try + { + // construct copy elements before the insertion to the beginning of the newly + // allocated storage + new_end = m_storage.uninitialized_copy(begin(), position, new_storage.begin()); + + // construct copy elements to insert + new_end = m_storage.uninitialized_copy(first, last, new_end); + + // construct copy displaced elements from the old storage to the new storage + // remember [position, end()) refers to the old storage + new_end = m_storage.uninitialized_copy(position, end(), new_end); + } // end try + catch(...) + { + // something went wrong, so destroy & deallocate the new storage + m_storage.destroy(new_storage.begin(), new_end); + new_storage.deallocate(); + + // rethrow + throw; + } // end catch + + // call destructors on the elements in the old storage + m_storage.destroy(begin(), end()); + + // record the vector's new state + m_storage.swap(new_storage); + m_size = old_size + num_new_elements; + } // end else + } // end if +} // end vector_base::copy_insert() + +template + void vector_base + ::append(size_type n) +{ + if(n != 0) + { + if(capacity() - size() >= n) + { + // we've got room for all of them + + // default construct new elements at the end of the vector + m_storage.default_construct_n(end(), n); + + // extend the size + m_size += n; + } // end if + else + { + const size_type old_size = size(); + + // compute the new capacity after the allocation + size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, n); + + // allocate exponentially larger new storage + new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, 2 * capacity()); + + // do not exceed maximum storage + new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, max_size()); + + // create new storage + storage_type new_storage(new_capacity); + + // record how many constructors we invoke in the try block below + iterator new_end = new_storage.begin(); + + try + { + // construct copy all elements into the newly allocated storage + new_end = m_storage.uninitialized_copy(begin(), end(), new_storage.begin()); + + // construct new elements to insert + m_storage.default_construct_n(new_end, n); + new_end += n; + } // end try + catch(...) + { + // something went wrong, so destroy & deallocate the new storage + m_storage.destroy(new_storage.begin(), new_end); + new_storage.deallocate(); + + // rethrow + throw; + } // end catch + + // call destructors on the elements in the old storage + m_storage.destroy(begin(), end()); + + // record the vector's new state + m_storage.swap(new_storage); + m_size = old_size + n; + } // end else + } // end if +} // end vector_base::append() + +template + void vector_base + ::fill_insert(iterator position, size_type n, const T &x) +{ + if(n != 0) + { + if(capacity() - size() >= n) + { + // we've got room for all of them + // how many existing elements will we displace? + const size_type num_displaced_elements = end() - position; + iterator old_end = end(); + + if(num_displaced_elements > n) + { + // construct copy n displaced elements to new elements + // following the insertion + m_storage.uninitialized_copy(end() - n, end(), end()); + + // extend the size + m_size += n; + + // copy num_displaced_elements - n elements to existing elements + // this copy overlaps + const size_type copy_length = (old_end - n) - position; + thrust::detail::overlapped_copy(position, old_end - n, old_end - copy_length); + + // finally, fill the range to the insertion point + thrust::fill_n(position, n, x); + } // end if + else + { + // construct new elements at the end of the vector + m_storage.uninitialized_fill_n(end(), n - num_displaced_elements, x); + + // extend the size + m_size += n - num_displaced_elements; + + // construct copy the displaced elements + m_storage.uninitialized_copy(position, old_end, end()); + + // extend the size + m_size += num_displaced_elements; + + // fill to elements which already existed + thrust::fill(position, old_end, x); + } // end else + } // end if + else + { + const size_type old_size = size(); + + // compute the new capacity after the allocation + size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, n); + + // allocate exponentially larger new storage + new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, 2 * capacity()); + + // do not exceed maximum storage + new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, max_size()); + + if(new_capacity > max_size()) + { + throw std::length_error("insert(): insertion exceeds max_size()."); + } // end if + + storage_type new_storage(new_capacity); + + // record how many constructors we invoke in the try block below + iterator new_end = new_storage.begin(); + + try + { + // construct copy elements before the insertion to the beginning of the newly + // allocated storage + new_end = m_storage.uninitialized_copy(begin(), position, new_storage.begin()); + + // construct new elements to insert + m_storage.uninitialized_fill_n(new_end, n, x); + new_end += n; + + // construct copy displaced elements from the old storage to the new storage + // remember [position, end()) refers to the old storage + new_end = m_storage.uninitialized_copy(position, end(), new_end); + } // end try + catch(...) + { + // something went wrong, so destroy & deallocate the new storage + m_storage.destroy(new_storage.begin(), new_end); + new_storage.deallocate(); + + // rethrow + throw; + } // end catch + + // call destructors on the elements in the old storage + m_storage.destroy(begin(), end()); + + // record the vector's new state + m_storage.swap(new_storage); + m_size = old_size + n; + } // end else + } // end if +} // end vector_base::fill_insert() + +template + template + void vector_base + ::range_assign(InputIterator first, + InputIterator last) +{ + // dispatch on traversal + range_assign(first, last, + typename thrust::iterator_traversal::type()); +} // end range_assign() + +template + template + void vector_base + ::range_assign(InputIterator first, + InputIterator last, + thrust::incrementable_traversal_tag) +{ + iterator current(begin()); + + // assign to elements which already exist + for(; first != last && current != end(); ++current, ++first) + { + *current = *first; + } // end for + + // either just the input was exhausted or both + // the input and vector elements were exhausted + if(first == last) + { + // if we exhausted the input, erase leftover elements + erase(current, end()); + } // end if + else + { + // insert the rest of the input at the end of the vector + insert(end(), first, last); + } // end else +} // end vector_base::range_assign() + +template + template + void vector_base + ::range_assign(RandomAccessIterator first, + RandomAccessIterator last, + thrust::random_access_traversal_tag) +{ + const size_type n = thrust::distance(first, last); + + if(n > capacity()) + { + storage_type new_storage; + allocate_and_copy(n, first, last, new_storage); + + // call destructors on the elements in the old storage + m_storage.destroy(begin(), end()); + + // record the vector's new state + m_storage.swap(new_storage); + m_size = n; + } // end if + else if(size() >= n) + { + // we can already accomodate the new range + iterator new_end = thrust::copy(first, last, begin()); + + // destroy the elements we don't need + m_storage.destroy(new_end, end()); + + // update size + m_size = n; + } // end else if + else + { + // range fits inside allocated storage, but some elements + // have not been constructed yet + + // XXX TODO we could possibly implement this with one call + // to transform rather than copy + uninitialized_copy + + // copy to elements which already exist + RandomAccessIterator mid = first; + thrust::advance(mid, size()); + thrust::copy(first, mid, begin()); + + // uninitialize_copy to elements which must be constructed + m_storage.uninitialized_copy(mid, last, end()); + + // update size + m_size = n; + } // end else +} // end vector_base::assign() + +template + void vector_base + ::fill_assign(size_type n, const T &x) +{ + if(n > capacity()) + { + // XXX we should also include a copy of the allocator: + // vector_base temp(n, x, get_allocator()); + vector_base temp(n, x); + temp.swap(*this); + } // end if + else if(n > size()) + { + // fill to existing elements + thrust::fill(begin(), end(), x); + + // construct uninitialized elements + m_storage.uninitialized_fill_n(end(), n - size(), x); + + // adjust size + m_size += (n - size()); + } // end else if + else + { + // fill to existing elements + iterator new_end = thrust::fill_n(begin(), n, x); + + // erase the elements after the fill + erase(new_end, end()); + } // end else +} // end vector_base::fill_assign() + +template + template + void vector_base + ::allocate_and_copy(size_type requested_size, + ForwardIterator first, ForwardIterator last, + storage_type &new_storage) +{ + if(requested_size == 0) + { + new_storage.deallocate(); + return; + } // end if + + // allocate exponentially larger new storage + size_type allocated_size = thrust::max(requested_size, 2 * capacity()); + + // do not exceed maximum storage + allocated_size = thrust::min(allocated_size, max_size()); + + if(requested_size > allocated_size) + { + throw std::length_error("assignment exceeds max_size()."); + } // end if + + new_storage.allocate(allocated_size); + + try + { + // construct the range to the newly allocated storage + m_storage.uninitialized_copy(first, last, new_storage.begin()); + } // end try + catch(...) + { + // something went wrong, so destroy & deallocate the new storage + // XXX seems like this destroys too many elements -- should just be last - first instead of requested_size + iterator new_storage_end = new_storage.begin(); + thrust::advance(new_storage_end, requested_size); + m_storage.destroy(new_storage.begin(), new_storage_end); + new_storage.deallocate(); + + // rethrow + throw; + } // end catch +} // end vector_base::allocate_and_copy() + + +} // end detail + +template + void swap(detail::vector_base &a, + detail::vector_base &b) +{ + a.swap(b); +} // end swap() + + + +namespace detail +{ + +// iterator tags match +template +bool vector_equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + thrust::detail::true_type) +{ + return thrust::equal(first1, last1, first2); +} + +// iterator tags differ +template +bool vector_equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + thrust::detail::false_type) +{ + typename thrust::iterator_difference::type n = thrust::distance(first1,last1); + + typedef typename thrust::iterator_system::type FromSystem1; + typedef typename thrust::iterator_system::type FromSystem2; + + // bring both ranges to the host system + // note that these copies are no-ops if the range is already convertible to the host system + FromSystem1 from_system1; + FromSystem2 from_system2; + thrust::host_system_tag to_system; + thrust::detail::move_to_system rng1(from_system1, to_system, first1, last1); + thrust::detail::move_to_system rng2(from_system2, to_system, first2, first2 + n); + + return thrust::equal(rng1.begin(), rng1.end(), rng2.begin()); +} + +template +bool vector_equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2) +{ + typedef typename thrust::iterator_system::type system1; + typedef typename thrust::iterator_system::type system2; + + // dispatch on the sameness of the two systems + return vector_equal(first1, last1, first2, + thrust::detail::is_same()); +} + +} // end namespace detail + + + + +template +bool operator==(const detail::vector_base& lhs, + const detail::vector_base& rhs) +{ + return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); +} + +template +bool operator==(const detail::vector_base& lhs, + const std::vector& rhs) +{ + return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); +} + +template +bool operator==(const std::vector& lhs, + const detail::vector_base& rhs) +{ + return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); +} + +template +bool operator!=(const detail::vector_base& lhs, + const detail::vector_base& rhs) +{ + return !(lhs == rhs); +} + +template +bool operator!=(const detail::vector_base& lhs, + const std::vector& rhs) +{ + return !(lhs == rhs); +} + +template +bool operator!=(const std::vector& lhs, + const detail::vector_base& rhs) +{ + return !(lhs == rhs); +} + +} // end thrust + diff --git a/compat/thrust/device_allocator.h b/compat/thrust/device_allocator.h new file mode 100644 index 0000000..a5462d1 --- /dev/null +++ b/compat/thrust/device_allocator.h @@ -0,0 +1,123 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_allocator.h + * \brief An allocator which creates new elements in device memory + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup memory_management_classes Memory Management Classes + * \{ + */ + +template class device_allocator; + +/*! \p device_allocator is a device memory allocator. + * This class is a specialization for \c void. + * + * \see device_ptr + * \see http://www.sgi.com/tech/stl/Allocators.html + */ +template<> + class device_allocator +{ + public: + /*! Type of element allocated, \c void. */ + typedef void value_type; + + /*! Pointer to allocation, \c device_ptr. */ + typedef device_ptr pointer; + + /*! \c const pointer to allocation, \c device_ptr. */ + typedef device_ptr const_pointer; + + /*! Type of allocation size, \c std::size_t. */ + typedef std::size_t size_type; + + /*! Type of allocation difference, \c pointer::difference_type. */ + typedef pointer::difference_type difference_type; + + /*! The \p rebind metafunction provides the type of a \p device_allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p device_allocator. + */ + typedef device_allocator other; + }; // end rebind +}; // end device_allocator + +/*! \p device_allocator is a device memory allocator. + * This implementation inherits from \p device_new_allocator. + * + * \see device_ptr + * \see device_new_allocator + * \see http://www.sgi.com/tech/stl/Allocators.html + */ +template + class device_allocator + : public device_new_allocator +{ + public: + /*! The \p rebind metafunction provides the type of a \p device_allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p device_allocator. + */ + typedef device_allocator other; + }; // end rebind + + /*! No-argument constructor has no effect. + */ + __host__ __device__ + inline device_allocator() {} + + /*! Copy constructor has no effect. + */ + __host__ __device__ + inline device_allocator(device_allocator const&) {} + + /*! Constructor from other \p allocator has no effect. + */ + template + __host__ __device__ + inline device_allocator(device_allocator const&) {} +}; // end device_allocator + +/*! \} + */ + +} // end thrust + diff --git a/compat/thrust/device_delete.h b/compat/thrust/device_delete.h new file mode 100644 index 0000000..1df3bb6 --- /dev/null +++ b/compat/thrust/device_delete.h @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_delete.h + * \brief Deletes variables in device memory + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +/*! \addtogroup deallocation_functions Deallocation Functions + * \ingroup memory_management_functions + * \{ + */ + +/*! \p device_delete deletes a \p device_ptr allocated with + * \p device_new. + * + * \param ptr The \p device_ptr to delete, assumed to have + * been allocated with \p device_new. + * \param n The number of objects to destroy at \p ptr. Defaults to \c 1 + * similar to \p device_new. + * + * \see device_ptr + * \see device_new + */ +template + inline void device_delete(thrust::device_ptr ptr, + const size_t n = 1); + +/*! \} + */ + +} // end thrust + +#include + diff --git a/compat/thrust/device_free.h b/compat/thrust/device_free.h new file mode 100644 index 0000000..a734418 --- /dev/null +++ b/compat/thrust/device_free.h @@ -0,0 +1,68 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_free.h + * \brief Deallocates storage allocated by \p device_malloc + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +/*! \addtogroup deallocation_functions Deallocation Functions + * \ingroup memory_management_functions + * \{ + */ + +/*! \p device_free deallocates memory allocated by the function \p device_malloc. + * + * \param ptr A \p device_ptr pointing to memory to be deallocated. + * + * The following code snippet demonstrates how to use \p device_free to + * deallocate memory allocated by \p device_malloc. + * + * \code + * #include + * #include + * ... + * // allocate some integers with device_malloc + * const int N = 100; + * thrust::device_ptr int_array = thrust::device_malloc(N); + * + * // manipulate integers + * ... + * + * // deallocate with device_free + * thrust::device_free(int_array); + * \endcode + * + * \see device_ptr + * \see device_malloc + */ +inline void device_free(thrust::device_ptr ptr); + +/*! \} + */ + +} // end thrust + +#include + diff --git a/compat/thrust/device_malloc.h b/compat/thrust/device_malloc.h new file mode 100644 index 0000000..a3b0723 --- /dev/null +++ b/compat/thrust/device_malloc.h @@ -0,0 +1,103 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_malloc.h + * \brief Allocates storage in device memory + */ + +#pragma once + +#include +#include +#include // for std::size_t + +namespace thrust +{ + +/*! \addtogroup allocation_functions Allocation Functions + * \ingroup memory_management_functions + * \{ + */ + +/*! This version of \p device_malloc allocates sequential device storage + * for bytes. + * + * \param n The number of bytes to allocate sequentially + * in device memory. + * \return A \p device_ptr to the newly allocated memory. + * + * The following code snippet demonstrates how to use \p device_malloc to + * allocate a range of device memory. + * + * \code + * #include + * #include + * ... + * // allocate some memory with device_malloc + * const int N = 100; + * thrust::device_ptr void_ptr = thrust::device_malloc(N); + * + * // manipulate memory + * ... + * + * // deallocate with device_free + * thrust::device_free(void_ptr); + * \endcode + * + * \see device_ptr + * \see device_free + */ +inline thrust::device_ptr device_malloc(const std::size_t n); + +/*! This version of \p device_malloc allocates sequential device storage for + * new objects of the given type. + * + * \param n The number of objects of type T to allocate + * sequentially in device memory. + * \return A \p device_ptr to the newly allocated memory. + * + * The following code snippet demonstrates how to use \p device_malloc to + * allocate a range of device memory. + * + * \code + * #include + * #include + * ... + * // allocate some integers with device_malloc + * const int N = 100; + * thrust::device_ptr int_array = thrust::device_malloc(N); + * + * // manipulate integers + * ... + * + * // deallocate with device_free + * thrust::device_free(int_array); + * \endcode + * + * \see device_ptr + * \see device_free + */ +template + inline thrust::device_ptr device_malloc(const std::size_t n); + +/*! \} + */ + +} // end thrust + +#include + diff --git a/compat/thrust/device_malloc_allocator.h b/compat/thrust/device_malloc_allocator.h new file mode 100644 index 0000000..404a6d2 --- /dev/null +++ b/compat/thrust/device_malloc_allocator.h @@ -0,0 +1,174 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_malloc_allocator.h + * \brief An allocator which allocates storage with \p device_malloc + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +// forward declarations to WAR circular #includes +template class device_ptr; +template device_ptr device_malloc(const std::size_t n); + +/*! \addtogroup memory_management Memory Management + * \addtogroup memory_management_classes Memory Management Classes + * \ingroup memory_management + * \{ + */ + +/*! \p device_malloc_allocator is a device memory allocator that employs the + * \p device_malloc function for allocation. + * + * \see device_malloc + * \see device_ptr + * \see http://www.sgi.com/tech/stl/Allocators.html + */ +template + class device_malloc_allocator +{ + public: + /*! Type of element allocated, \c T. */ + typedef T value_type; + + /*! Pointer to allocation, \c device_ptr. */ + typedef device_ptr pointer; + + /*! \c const pointer to allocation, \c device_ptr. */ + typedef device_ptr const_pointer; + + /*! Reference to allocated element, \c device_reference. */ + typedef device_reference reference; + + /*! \c const reference to allocated element, \c device_reference. */ + typedef device_reference const_reference; + + /*! Type of allocation size, \c std::size_t. */ + typedef std::size_t size_type; + + /*! Type of allocation difference, \c pointer::difference_type. */ + typedef typename pointer::difference_type difference_type; + + /*! The \p rebind metafunction provides the type of a \p device_malloc_allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p device_malloc_allocator. + */ + typedef device_malloc_allocator other; + }; // end rebind + + /*! No-argument constructor has no effect. */ + __host__ __device__ + inline device_malloc_allocator() {} + + /*! No-argument destructor has no effect. */ + __host__ __device__ + inline ~device_malloc_allocator() {} + + /*! Copy constructor has no effect. */ + __host__ __device__ + inline device_malloc_allocator(device_malloc_allocator const&) {} + + /*! Constructor from other \p device_malloc_allocator has no effect. */ + template + __host__ __device__ + inline device_malloc_allocator(device_malloc_allocator const&) {} + + /*! Returns the address of an allocated object. + * \return &r. + */ + __host__ __device__ + inline pointer address(reference r) { return &r; } + + /*! Returns the address an allocated object. + * \return &r. + */ + __host__ __device__ + inline const_pointer address(const_reference r) { return &r; } + + /*! Allocates storage for \p cnt objects. + * \param cnt The number of objects to allocate. + * \return A \p pointer to uninitialized storage for \p cnt objects. + * \note Memory allocated by this function must be deallocated with \p deallocate. + */ + __host__ + inline pointer allocate(size_type cnt, + const_pointer = const_pointer(static_cast(0))) + { + if(cnt > this->max_size()) + { + throw std::bad_alloc(); + } // end if + + return pointer(device_malloc(cnt)); + } // end allocate() + + /*! Deallocates storage for objects allocated with \p allocate. + * \param p A \p pointer to the storage to deallocate. + * \param cnt The size of the previous allocation. + * \note Memory deallocated by this function must previously have been + * allocated with \p allocate. + */ + __host__ + inline void deallocate(pointer p, size_type cnt) + { + device_free(p); + } // end deallocate() + + /*! Returns the largest value \c n for which allocate(n) might succeed. + * \return The largest value \c n for which allocate(n) might succeed. + */ + inline size_type max_size() const + { + return (std::numeric_limits::max)() / sizeof(T); + } // end max_size() + + /*! Compares against another \p device_malloc_allocator for equality. + * \return \c true + */ + __host__ __device__ + inline bool operator==(device_malloc_allocator const&) { return true; } + + /*! Compares against another \p device_malloc_allocator for inequality. + * \return \c false + */ + __host__ __device__ + inline bool operator!=(device_malloc_allocator const &a) {return !operator==(a); } +}; // end device_malloc_allocator + +/*! \} + */ + +} // end thrust + + diff --git a/compat/thrust/device_new.h b/compat/thrust/device_new.h new file mode 100644 index 0000000..001d476 --- /dev/null +++ b/compat/thrust/device_new.h @@ -0,0 +1,88 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_new.h + * \brief Constructs new elements in device memory + */ + +#pragma once + +#include + +// #include this for size_t +#include +#include + +namespace thrust +{ + +/*! + * \addtogroup allocation_functions Allocation Functions + * \{ + */ + +/*! \p device_new implements the placement \c new operator for types + * resident in device memory. \p device_new calls T's null + * constructor on a array of objects in device memory. + * No memory is allocated by this function. + * + * \param p A \p device_ptr to a region of device memory into which + * to construct one or many Ts. + * \param n The number of objects to construct at \p p. + * \return p, casted to T's type. + * + * \see device_ptr + */ +template + device_ptr device_new(device_ptr p, + const size_t n = 1); + +/*! \p device_new implements the placement new operator for types + * resident in device memory. \p device_new calls T's copy + * constructor on a array of objects in device memory. No memory is + * allocated by this function. + * + * \param p A \p device_ptr to a region of device memory into which to + * construct one or many Ts. + * \param exemplar The value from which to copy. + * \param n The number of objects to construct at \p p. + * \return p, casted to T's type. + * + * \see device_ptr + * \see fill + */ +template + device_ptr device_new(device_ptr p, + const T &exemplar, + const size_t n = 1); + +/*! \p device_new implements the new operator for types resident in device memory. + * It allocates device memory large enough to hold \p n new objects of type \c T. + * + * \param n The number of objects to allocate. Defaults to \c 1. + * \return A \p device_ptr to the newly allocated region of device memory. + */ +template + device_ptr device_new(const size_t n = 1); + +/*! \} + */ + +} // end thrust + +#include + diff --git a/compat/thrust/device_new_allocator.h b/compat/thrust/device_new_allocator.h new file mode 100644 index 0000000..527d1fd --- /dev/null +++ b/compat/thrust/device_new_allocator.h @@ -0,0 +1,172 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_new_allocator.h + * \brief An allocator which allocates storage with \p device_new + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup memory_management Memory Management + * \addtogroup memory_management_classes Memory Management Classes + * \ingroup memory_management + * \{ + */ + +/*! \p device_new_allocator is a device memory allocator that employs the + * \p device_new function for allocation. + * + * \see device_new + * \see device_ptr + * \see http://www.sgi.com/tech/stl/Allocators.html + */ +template + class device_new_allocator +{ + public: + /*! Type of element allocated, \c T. */ + typedef T value_type; + + /*! Pointer to allocation, \c device_ptr. */ + typedef device_ptr pointer; + + /*! \c const pointer to allocation, \c device_ptr. */ + typedef device_ptr const_pointer; + + /*! Reference to allocated element, \c device_reference. */ + typedef device_reference reference; + + /*! \c const reference to allocated element, \c device_reference. */ + typedef device_reference const_reference; + + /*! Type of allocation size, \c std::size_t. */ + typedef std::size_t size_type; + + /*! Type of allocation difference, \c pointer::difference_type. */ + typedef typename pointer::difference_type difference_type; + + /*! The \p rebind metafunction provides the type of a \p device_new_allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p device_new_allocator. + */ + typedef device_new_allocator other; + }; // end rebind + + /*! No-argument constructor has no effect. */ + __host__ __device__ + inline device_new_allocator() {} + + /*! No-argument destructor has no effect. */ + __host__ __device__ + inline ~device_new_allocator() {} + + /*! Copy constructor has no effect. */ + __host__ __device__ + inline device_new_allocator(device_new_allocator const&) {} + + /*! Constructor from other \p device_malloc_allocator has no effect. */ + template + __host__ __device__ + inline device_new_allocator(device_new_allocator const&) {} + + /*! Returns the address of an allocated object. + * \return &r. + */ + __host__ __device__ + inline pointer address(reference r) { return &r; } + + /*! Returns the address an allocated object. + * \return &r. + */ + __host__ __device__ + inline const_pointer address(const_reference r) { return &r; } + + /*! Allocates storage for \p cnt objects. + * \param cnt The number of objects to allocate. + * \return A \p pointer to uninitialized storage for \p cnt objects. + * \note Memory allocated by this function must be deallocated with \p deallocate. + */ + __host__ + inline pointer allocate(size_type cnt, + const_pointer = const_pointer(static_cast(0))) + { + if(cnt > this->max_size()) + { + throw std::bad_alloc(); + } // end if + + // use "::operator new" rather than keyword new + return pointer(device_new(cnt)); + } // end allocate() + + /*! Deallocates storage for objects allocated with \p allocate. + * \param p A \p pointer to the storage to deallocate. + * \param cnt The size of the previous allocation. + * \note Memory deallocated by this function must previously have been + * allocated with \p allocate. + */ + __host__ + inline void deallocate(pointer p, size_type cnt) + { + // use "::operator delete" rather than keyword delete + device_delete(p); + } // end deallocate() + + /*! Returns the largest value \c n for which allocate(n) might succeed. + * \return The largest value \c n for which allocate(n) might succeed. + */ + __host__ __device__ + inline size_type max_size() const + { + return std::numeric_limits::max THRUST_PREVENT_MACRO_SUBSTITUTION () / sizeof(T); + } // end max_size() + + /*! Compares against another \p device_malloc_allocator for equality. + * \return \c true + */ + __host__ __device__ + inline bool operator==(device_new_allocator const&) { return true; } + + /*! Compares against another \p device_malloc_allocator for inequality. + * \return \c false + */ + __host__ __device__ + inline bool operator!=(device_new_allocator const &a) {return !operator==(a); } +}; // end device_new_allocator + +/*! \} + */ + +} // end thrust + diff --git a/compat/thrust/device_ptr.h b/compat/thrust/device_ptr.h new file mode 100644 index 0000000..dfc7e90 --- /dev/null +++ b/compat/thrust/device_ptr.h @@ -0,0 +1,170 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_ptr.h + * \brief A pointer to a variable which resides in the "device" system's memory space + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup memory_management Memory Management + * \addtogroup memory_management_classes Memory Management Classes + * \ingroup memory_management + * \{ + */ + +// forward declarations +template class device_reference; + +/*! \p device_ptr stores a pointer to an object allocated in device memory. This type + * provides type safety when dispatching standard algorithms on ranges resident in + * device memory. + * + * \p device_ptr has pointer semantics: it may be dereferenced safely from the host and + * may be manipulated with pointer arithmetic. + * + * \p device_ptr can be created with the functions device_malloc, device_new, or + * device_pointer_cast, or by explicitly calling its constructor with a raw pointer. + * + * The raw pointer encapsulated by a \p device_ptr may be obtained by either its get + * method or the \p raw_pointer_cast free function. + * + * \note \p device_ptr is not a smart pointer; it is the programmer's responsibility to + * deallocate memory pointed to by \p device_ptr. + * + * \see device_malloc + * \see device_new + * \see device_pointer_cast + * \see raw_pointer_cast + */ +template + class device_ptr + : public thrust::pointer< + T, + thrust::device_system_tag, + thrust::device_reference, + thrust::device_ptr + > +{ + private: + typedef thrust::pointer< + T, + thrust::device_system_tag, + thrust::device_reference, + thrust::device_ptr + > super_t; + + public: + /*! \p device_ptr's null constructor initializes its raw pointer to \c 0. + */ + __host__ __device__ + device_ptr() : super_t() {} + + /*! \p device_ptr's copy constructor is templated to allow copying to a + * device_ptr from a T *. + * + * \param ptr A raw pointer to copy from, presumed to point to a location in + * device memory. + */ + template + __host__ __device__ + explicit device_ptr(OtherT *ptr) : super_t(ptr) {} + + /*! \p device_ptr's copy constructor allows copying from another device_ptr with related type. + * \param other The \p device_ptr to copy from. + */ + template + __host__ __device__ + device_ptr(const device_ptr &other) : super_t(other) {} + + /*! \p device_ptr's assignment operator allows assigning from another \p device_ptr with related type. + * \param other The other \p device_ptr to copy from. + * \return *this + */ + template + __host__ __device__ + device_ptr &operator=(const device_ptr &other) + { + super_t::operator=(other); + return *this; + } + +// declare these members for the purpose of Doxygenating them +// they actually exist in a derived-from class +#if 0 + /*! This method returns this \p device_ptr's raw pointer. + * \return This \p device_ptr's raw pointer. + */ + __host__ __device__ + T *get(void) const; +#endif // end doxygen-only members +}; // end device_ptr + +/*! This operator outputs the value of a \p device_ptr's raw pointer to a \p std::basic_ostream. + * + * \param os The std::basic_ostream of interest. + * \param p The device_ptr of interest. + * \return os. + */ +template +inline std::basic_ostream &operator<<(std::basic_ostream &os, const device_ptr &p); + +/*! \} + */ + + +/*! + * \addtogroup memory_management_functions Memory Management Functions + * \ingroup memory_management + * \{ + */ + +/*! \p device_pointer_cast creates a device_ptr from a raw pointer which is presumed to point + * to a location in device memory. + * + * \param ptr A raw pointer, presumed to point to a location in device memory. + * \return A device_ptr wrapping ptr. + */ +template +__host__ __device__ +inline device_ptr device_pointer_cast(T *ptr); + +/*! This version of \p device_pointer_cast creates a copy of a device_ptr from another device_ptr. + * This version is included for symmetry with \p raw_pointer_cast. + * + * \param ptr A device_ptr. + * \return A copy of \p ptr. + */ +template +__host__ __device__ +inline device_ptr device_pointer_cast(const device_ptr &ptr); + +/*! \} + */ + +} // end thrust + +#include +#include + diff --git a/compat/thrust/device_reference.h b/compat/thrust/device_reference.h new file mode 100644 index 0000000..edae2b5 --- /dev/null +++ b/compat/thrust/device_reference.h @@ -0,0 +1,969 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_reference.h + * \brief A reference to a variable which resides in the "device" system's memory space + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup memory_management_classes Memory Management Classes + * \ingroup memory_management + * \{ + */ + +/*! \p device_reference acts as a reference-like object to an object stored in device memory. + * \p device_reference is not intended to be used directly; rather, this type + * is the result of deferencing a \p device_ptr. Similarly, taking the address of + * a \p device_reference yields a \p device_ptr. + * + * \p device_reference may often be used from host code in place of operations defined on + * its associated \c value_type. For example, when \p device_reference refers to an + * arithmetic type, arithmetic operations on it are legal: + * + * \code + * #include + * + * int main(void) + * { + * thrust::device_vector vec(1, 13); + * + * thrust::device_reference ref_to_thirteen = vec[0]; + * + * int x = ref_to_thirteen + 1; + * + * // x is 14 + * + * return 0; + * } + * \endcode + * + * Similarly, we can print the value of \c ref_to_thirteen in the above code by using an + * \c iostream: + * + * \code + * #include + * #include + * + * int main(void) + * { + * thrust::device_vector vec(1, 13); + * + * thrust::device_reference ref_to_thirteen = vec[0]; + * + * std::cout << ref_to_thirteen << std::endl; + * + * // 13 is printed + * + * return 0; + * } + * \endcode + * + * Of course, we needn't explicitly create a \p device_reference in the previous + * example, because one is returned by \p device_vector's bracket operator. A more natural + * way to print the value of a \p device_vector element might be: + * + * \code + * #include + * #include + * + * int main(void) + * { + * thrust::device_vector vec(1, 13); + * + * std::cout << vec[0] << std::endl; + * + * // 13 is printed + * + * return 0; + * } + * \endcode + * + * These kinds of operations should be used sparingly in performance-critical code, because + * they imply a potentially expensive copy between host and device space. + * + * Some operations which are possible with regular objects are impossible with their + * corresponding \p device_reference objects due to the requirements of the C++ language. For + * example, because the member access operator cannot be overloaded, member variables and functions + * of a referent object cannot be directly accessed through its \p device_reference. + * + * The following code, which generates a compiler error, illustrates: + * + * \code + * #include + * + * struct foo + * { + * int x; + * }; + * + * int main(void) + * { + * thrust::device_vector foo_vec(1); + * + * thrust::device_reference foo_ref = foo_vec[0]; + * + * foo_ref.x = 13; // ERROR: x cannot be accessed through foo_ref + * + * return 0; + * } + * \endcode + * + * Instead, a host space copy must be created to access \c foo's \c x member: + * + * \code + * #include + * + * struct foo + * { + * int x; + * }; + * + * int main(void) + * { + * thrust::device_vector foo_vec(1); + * + * // create a local host-side foo object + * foo host_foo; + * host_foo.x = 13; + * + * thrust::device_reference foo_ref = foo_vec[0]; + * + * foo_ref = host_foo; + * + * // foo_ref's x member is 13 + * + * return 0; + * } + * \endcode + * + * Another common case where a \p device_reference cannot directly be used in place of + * its referent object occurs when passing them as parameters to functions like \c printf + * which have varargs parameters. Because varargs parameters must be Plain Old Data, a + * \p device_reference to a POD type requires a cast when passed to \c printf: + * + * \code + * #include + * #include + * + * int main(void) + * { + * thrust::device_vector vec(1,13); + * + * // vec[0] must be cast to int when passing to printf + * printf("%d\n", (int) vec[0]); + * + * return 0; + * } + * \endcode + * + * \see device_ptr + * \see device_vector + */ +template + class device_reference + : public thrust::reference< + T, + thrust::device_ptr, + thrust::device_reference + > +{ + private: + typedef thrust::reference< + T, + thrust::device_ptr, + thrust::device_reference + > super_t; + + public: + /*! The type of the value referenced by this type of \p device_reference. + */ + typedef typename super_t::value_type value_type; + + /*! The type of the expression &ref, where ref is a \p device_reference. + */ + typedef typename super_t::pointer pointer; + + /*! This copy constructor accepts a const reference to another + * \p device_reference. After this \p device_reference is constructed, + * it shall refer to the same object as \p other. + * + * \param other A \p device_reference to copy from. + * + * The following code snippet demonstrates the semantics of this + * copy constructor. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_reference ref = v[0]; + * + * // ref equals the object at v[0] + * assert(ref == v[0]); + * + * // the address of ref equals the address of v[0] + * assert(&ref == &v[0]); + * + * // modifying v[0] modifies ref + * v[0] = 13; + * assert(ref == 13); + * \endcode + * + * \note This constructor is templated primarily to allow initialization of + * device_reference from device_reference. + */ + template + __host__ __device__ + device_reference(const device_reference &other, + typename thrust::detail::enable_if_convertible< + typename device_reference::pointer, + pointer + >::type * = 0) + : super_t(other) + {} + + /*! This copy constructor initializes this \p device_reference + * to refer to an object pointed to by the given \p device_ptr. After + * this \p device_reference is constructed, it shall refer to the + * object pointed to by \p ptr. + * + * \param ptr A \p device_ptr to copy from. + * + * The following code snippet demonstrates the semantic of this + * copy constructor. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals the object pointed to by ptr + * assert(ref == *ptr); + * + * // the address of ref equals ptr + * assert(&ref == ptr); + * + * // modifying *ptr modifies ref + * *ptr = 13; + * assert(ref == 13); + * \endcode + */ + __host__ __device__ + explicit device_reference(const pointer &ptr) + : super_t(ptr) + {} + + /*! This assignment operator assigns the value of the object referenced by + * the given \p device_reference to the object referenced by this + * \p device_reference. + * + * \param other The \p device_reference to assign from. + * \return *this + */ + template + __host__ __device__ + device_reference &operator=(const device_reference &other); + + /*! Assignment operator assigns the value of the given value to the + * value referenced by this \p device_reference. + * + * \param x The value to assign from. + * \return *this + */ + __host__ __device__ + device_reference &operator=(const value_type &x); + +// declare these members for the purpose of Doxygenating them +// they actually exist in a derived-from class +#if 0 + /*! Address-of operator returns a \p device_ptr pointing to the object + * referenced by this \p device_reference. It does not return the + * address of this \p device_reference. + * + * \return A \p device_ptr pointing to the object this + * \p device_reference references. + */ + __host__ __device__ + pointer operator&(void) const; + + /*! Conversion operator converts this \p device_reference to T + * by returning a copy of the object referenced by this + * \p device_reference. + * + * \return A copy of the object referenced by this \p device_reference. + */ + __host__ __device__ + operator value_type (void) const; + + /*! swaps the value this \p device_reference references with another. + * \p other The other \p device_reference with which to swap. + */ + __host__ __device__ + void swap(device_reference &other); + + /*! Prefix increment operator increments the object referenced by this + * \p device_reference. + * + * \return *this + * + * The following code snippet demonstrates the semantics of + * \p device_reference's prefix increment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * + * // increment ref + * ++ref; + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * \endcode + * + * \note The increment executes as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator++(void); + + /*! Postfix increment operator copies the object referenced by this + * \p device_reference, increments the object referenced by this + * \p device_reference, and returns the copy. + * + * \return A copy of the object referenced by this \p device_reference + * before being incremented. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's postfix increment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * + * // increment ref + * int x = ref++; + * + * // x equals 0 + * assert(x == 0) + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * \endcode + * + * \note The increment executes as if it were executed on the host. + * This may change in a later version. + */ + value_type operator++(int); + + /*! Addition assignment operator add-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the add-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's addition assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * + * // add-assign ref + * ref += 5; + * + * // ref equals 5 + * assert(ref == 5); + * + * // the object pointed to by ptr equals 5 + * assert(*ptr == 5); + * + * // v[0] equals 5 + * assert(v[0] == 5); + * \endcode + * + * \note The add-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator+=(const T &rhs); + + /*! Prefix decrement operator decrements the object referenced by this + * \p device_reference. + * + * \return *this + * + * The following code snippet demonstrates the semantics of + * \p device_reference's prefix decrement operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * + * // decrement ref + * --ref; + * + * // ref equals -1 + * assert(ref == -1); + * + * // the object pointed to by ptr equals -1 + * assert(*ptr == -1); + * + * // v[0] equals -1 + * assert(v[0] == -1); + * \endcode + * + * \note The decrement executes as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator--(void); + + /*! Postfix decrement operator copies the object referenced by this + * \p device_reference, decrements the object referenced by this + * \p device_reference, and returns the copy. + * + * \return A copy of the object referenced by this \p device_reference + * before being decremented. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's postfix decrement operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * + * // decrement ref + * int x = ref--; + * + * // x equals 0 + * assert(x == 0) + * + * // ref equals -1 + * assert(ref == -1); + * + * // the object pointed to by ptr equals -1 + * assert(*ptr == -1); + * + * // v[0] equals -1 + * assert(v[0] == -1); + * \endcode + * + * \note The decrement executes as if it were executed on the host. + * This may change in a later version. + */ + value_type operator--(int); + + /*! Subtraction assignment operator subtract-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the subtraction-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's addition assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * + * // subtract-assign ref + * ref -= 5; + * + * // ref equals -5 + * assert(ref == -5); + * + * // the object pointed to by ptr equals -5 + * assert(*ptr == -5); + * + * // v[0] equals -5 + * assert(v[0] == -5); + * \endcode + * + * \note The subtract-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator-=(const T &rhs); + + /*! Multiplication assignment operator multiply-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the multiply-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's multiply assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,1); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * + * // multiply-assign ref + * ref *= 5; + * + * // ref equals 5 + * assert(ref == 5); + * + * // the object pointed to by ptr equals 5 + * assert(*ptr == 5); + * + * // v[0] equals 5 + * assert(v[0] == 5); + * \endcode + * + * \note The multiply-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator*=(const T &rhs); + + /*! Division assignment operator divide-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the divide-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's divide assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,5); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 5 + * assert(ref == 5); + * + * // the object pointed to by ptr equals 5 + * assert(*ptr == 5); + * + * // v[0] equals 5 + * assert(v[0] == 5); + * + * // divide-assign ref + * ref /= 5; + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * \endcode + * + * \note The divide-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator/=(const T &rhs); + + /*! Modulation assignment operator modulus-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the divide-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's divide assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,5); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 5 + * assert(ref == 5); + * + * // the object pointed to by ptr equals 5 + * assert(*ptr == 5); + * + * // v[0] equals 5 + * assert(v[0] == 5); + * + * // modulus-assign ref + * ref %= 5; + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * \endcode + * + * \note The modulus-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator%=(const T &rhs); + + /*! Bitwise left shift assignment operator left shift-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the left shift-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's left shift assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,1); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * + * // left shift-assign ref + * ref <<= 1; + * + * // ref equals 2 + * assert(ref == 2); + * + * // the object pointed to by ptr equals 2 + * assert(*ptr == 2); + * + * // v[0] equals 2 + * assert(v[0] == 2); + * \endcode + * + * \note The left shift-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator<<=(const T &rhs); + + /*! Bitwise right shift assignment operator right shift-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the right shift-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's right shift assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,2); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 2 + * assert(ref == 2); + * + * // the object pointed to by ptr equals 2 + * assert(*ptr == 2); + * + * // v[0] equals 2 + * assert(v[0] == 2); + * + * // right shift-assign ref + * ref >>= 1; + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * \endcode + * + * \note The right shift-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator>>=(const T &rhs); + + /*! Bitwise AND assignment operator AND-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the AND-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's AND assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,1); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * + * // right AND-assign ref + * ref &= 0; + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * \endcode + * + * \note The AND-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator&=(const T &rhs); + + /*! Bitwise OR assignment operator OR-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the OR-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's OR assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,0); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * + * // right OR-assign ref + * ref |= 1; + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * \endcode + * + * \note The OR-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator|=(const T &rhs); + + /*! Bitwise XOR assignment operator XOR-assigns the object referenced by this + * \p device_reference and returns this \p device_reference. + * + * \param rhs The right hand side of the XOR-assignment. + * \return *this. + * + * The following code snippet demonstrates the semantics of + * \p device_reference's XOR assignment operator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(1,1); + * thrust::device_ptr ptr = &v[0]; + * thrust::device_reference ref(ptr); + * + * // ref equals 1 + * assert(ref == 1); + * + * // the object pointed to by ptr equals 1 + * assert(*ptr == 1); + * + * // v[0] equals 1 + * assert(v[0] == 1); + * + * // right XOR-assign ref + * ref ^= 1; + * + * // ref equals 0 + * assert(ref == 0); + * + * // the object pointed to by ptr equals 0 + * assert(*ptr == 0); + * + * // v[0] equals 0 + * assert(v[0] == 0); + * \endcode + * + * \note The XOR-assignment executes as as if it were executed on the host. + * This may change in a later version. + */ + device_reference &operator^=(const T &rhs); +#endif // end doxygen-only members +}; // end device_reference + +/*! swaps the value of one \p device_reference with another. + * \p x The first \p device_reference of interest. + * \p y The second \p device_reference of interest. + */ +template +__host__ __device__ +void swap(device_reference &x, device_reference &y); + +/*! \} + */ + +} // end thrust + +#include + diff --git a/compat/thrust/device_vector.h b/compat/thrust/device_vector.h new file mode 100644 index 0000000..8c9d005 --- /dev/null +++ b/compat/thrust/device_vector.h @@ -0,0 +1,418 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file device_vector.h + * \brief A dynamically-sizable array of elements which reside in the "device" memory space + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +// forward declaration of host_vector +template class host_vector; + +/*! \addtogroup container_classes Container Classes + * \addtogroup device_containers Device Containers + * \ingroup container_classes + * \{ + */ + +/*! A \p device_vector is a container that supports random access to elements, + * constant time removal of elements at the end, and linear time insertion + * and removal of elements at the beginning or in the middle. The number of + * elements in a \p device_vector may vary dynamically; memory management is + * automatic. The memory associated with a \p device_vector resides in the memory + * space of a parallel device. + * + * \see http://www.sgi.com/tech/stl/Vector.html + * \see host_vector + */ +template > + class device_vector + : public detail::vector_base +{ + private: + typedef detail::vector_base Parent; + + public: + /*! \cond */ + typedef typename Parent::size_type size_type; + typedef typename Parent::value_type value_type; + /*! \endcond */ + + /*! This constructor creates an empty \p device_vector. + */ + __host__ + device_vector(void) + :Parent() {} + + /*! This constructor creates a \p device_vector with the given + * size. + * \param n The number of elements to initially craete. + */ + __host__ + explicit device_vector(size_type n) + :Parent(n) {} + + /*! This constructor creates a \p device_vector with copies + * of an exemplar element. + * \param n The number of elements to initially create. + * \param value An element to copy. + */ + __host__ + explicit device_vector(size_type n, const value_type &value) + :Parent(n,value) {} + + /*! Copy constructor copies from an exemplar \p device_vector. + * \param v The \p device_vector to copy. + */ + __host__ + device_vector(const device_vector &v) + :Parent(v) {} + + /*! Copy constructor copies from an exemplar \p device_vector with different type. + * \param v The \p device_vector to copy. + */ + template + __device__ + device_vector(const device_vector &v) + :Parent(v) {} + + /*! Assign operator copies from an exemplar \p device_vector with different type. + * \param v The \p device_vector to copy. + */ + template + __device__ + device_vector &operator=(const device_vector &v) + { Parent::operator=(v); return *this; } + + /*! Copy constructor copies from an exemplar \c std::vector. + * \param v The std::vector to copy. + */ + template + __host__ + device_vector(const std::vector &v) + :Parent(v) {} + + /*! Assign operator copies from an exemplar std::vector. + * \param v The std::vector to copy. + */ + template + __host__ + device_vector &operator=(const std::vector &v) + { Parent::operator=(v); return *this;} + + /*! Copy constructor copies from an exemplar \p host_vector with possibly different type. + * \param v The \p host_vector to copy. + */ + template + __host__ + device_vector(const host_vector &v); + + /*! Assign operator copies from an examplar \p host_vector. + * \param v The \p host_vector to copy. + */ + template + __host__ + device_vector &operator=(const host_vector &v) + { Parent::operator=(v); return *this; } + + /*! This constructor builds a \p device_vector from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + __host__ + device_vector(InputIterator first, InputIterator last) + :Parent(first,last) {} + +// declare these members for the purpose of Doxygenating them +// they actually exist in a derived-from class +#if 0 + /*! \brief Resizes this vector to the specified number of elements. + * \param new_size Number of elements this vector should contain. + * \param x Data with which new elements should be populated. + * \throw std::length_error If n exceeds max_size(). + * + * This method will resize this vector to the specified number of + * elements. If the number is smaller than this vector's current + * size this vector is truncated, otherwise this vector is + * extended and new elements are populated with given data. + */ + void resize(size_type new_size, const value_type &x = value_type()); + + /*! Returns the number of elements in this vector. + */ + size_type size(void) const; + + /*! Returns the size() of the largest possible vector. + * \return The largest possible return value of size(). + */ + size_type max_size(void) const; + + /*! \brief If n is less than or equal to capacity(), this call has no effect. + * Otherwise, this method is a request for allocation of additional memory. If + * the request is successful, then capacity() is greater than or equal to + * n; otherwise, capacity() is unchanged. In either case, size() is unchanged. + * \throw std::length_error If n exceeds max_size(). + */ + void reserve(size_type n); + + /*! Returns the number of elements which have been reserved in this + * vector. + */ + size_type capacity(void) const; + + /*! This method shrinks the capacity of this vector to exactly + * fit its elements. + */ + void shrink_to_fit(void); + + /*! \brief Subscript access to the data contained in this vector_dev. + * \param n The index of the element for which data should be accessed. + * \return Read/write reference to data. + * + * This operator allows for easy, array-style, data access. + * Note that data access with this operator is unchecked and + * out_of_range lookups are not defined. + */ + reference operator[](size_type n); + + /*! \brief Subscript read access to the data contained in this vector_dev. + * \param n The index of the element for which data should be accessed. + * \return Read reference to data. + * + * This operator allows for easy, array-style, data access. + * Note that data access with this operator is unchecked and + * out_of_range lookups are not defined. + */ + const_reference operator[](size_type n) const; + + /*! This method returns an iterator pointing to the beginning of + * this vector. + * \return mStart + */ + iterator begin(void); + + /*! This method returns a const_iterator pointing to the beginning + * of this vector. + * \return mStart + */ + const_iterator begin(void) const; + + /*! This method returns a const_iterator pointing to the beginning + * of this vector. + * \return mStart + */ + const_iterator cbegin(void) const; + + /*! This method returns a reverse_iterator pointing to the beginning of + * this vector's reversed sequence. + * \return A reverse_iterator pointing to the beginning of this + * vector's reversed sequence. + */ + reverse_iterator rbegin(void); + + /*! This method returns a const_reverse_iterator pointing to the beginning of + * this vector's reversed sequence. + * \return A const_reverse_iterator pointing to the beginning of this + * vector's reversed sequence. + */ + const_reverse_iterator rbegin(void) const; + + /*! This method returns a const_reverse_iterator pointing to the beginning of + * this vector's reversed sequence. + * \return A const_reverse_iterator pointing to the beginning of this + * vector's reversed sequence. + */ + const_reverse_iterator crbegin(void) const; + + /*! This method returns an iterator pointing to one element past the + * last of this vector. + * \return begin() + size(). + */ + iterator end(void); + + /*! This method returns a const_iterator pointing to one element past the + * last of this vector. + * \return begin() + size(). + */ + const_iterator end(void) const; + + /*! This method returns a const_iterator pointing to one element past the + * last of this vector. + * \return begin() + size(). + */ + const_iterator cend(void) const; + + /*! This method returns a reverse_iterator pointing to one element past the + * last of this vector's reversed sequence. + * \return rbegin() + size(). + */ + reverse_iterator rend(void); + + /*! This method returns a const_reverse_iterator pointing to one element past the + * last of this vector's reversed sequence. + * \return rbegin() + size(). + */ + const_reverse_iterator rend(void) const; + + /*! This method returns a const_reverse_iterator pointing to one element past the + * last of this vector's reversed sequence. + * \return rbegin() + size(). + */ + const_reverse_iterator crend(void) const; + + /*! This method returns a const_reference referring to the first element of this + * vector. + * \return The first element of this vector. + */ + const_reference front(void) const; + + /*! This method returns a reference pointing to the first element of this + * vector. + * \return The first element of this vector. + */ + reference front(void); + + /*! This method returns a const reference pointing to the last element of + * this vector. + * \return The last element of this vector. + */ + const_reference back(void) const; + + /*! This method returns a reference referring to the last element of + * this vector_dev. + * \return The last element of this vector. + */ + reference back(void); + + /*! This method returns a pointer to this vector's first element. + * \return A pointer to the first element of this vector. + */ + pointer data(void); + + /*! This method returns a const_pointer to this vector's first element. + * \return a const_pointer to the first element of this vector. + */ + const_pointer data(void) const; + + /*! This method resizes this vector to 0. + */ + void clear(void); + + /*! This method returns true iff size() == 0. + * \return true if size() == 0; false, otherwise. + */ + bool empty(void) const; + + /*! This method appends the given element to the end of this vector. + * \param x The element to append. + */ + void push_back(const value_type &x); + + /*! This method erases the last element of this vector, invalidating + * all iterators and references to it. + */ + void pop_back(void); + + /*! This method swaps the contents of this vector_base with another vector. + * \param v The vector with which to swap. + */ + void swap(device_vector &v); + + /*! This method removes the element at position pos. + * \param pos The position of the element of interest. + * \return An iterator pointing to the new location of the element that followed the element + * at position pos. + */ + iterator erase(iterator pos); + + /*! This method removes the range of elements [first,last) from this vector. + * \param first The beginning of the range of elements to remove. + * \param last The end of the range of elements to remove. + * \return An iterator pointing to the new location of the element that followed the last + * element in the sequence [first,last). + */ + iterator erase(iterator first, iterator last); + + /*! This method inserts a single copy of a given exemplar value at the + * specified position in this vector. + * \param position The insertion position. + * \param x The exemplar element to copy & insert. + * \return An iterator pointing to the newly inserted element. + */ + iterator insert(iterator position, const T &x); + + /*! This method inserts a copy of an exemplar value to a range at the + * specified position in this vector. + * \param position The insertion position + * \param n The number of insertions to perform. + * \param x The value to replicate and insert. + */ + void insert(iterator position, size_type n, const T &x); + + /*! This method inserts a copy of an input range at the specified position + * in this vector. + * \param position The insertion position. + * \param first The beginning of the range to copy. + * \param last The end of the range to copy. + * + * \tparam InputIterator is a model of Assignable. + */ + template + void insert(iterator position, InputIterator first, InputIterator last); + + /*! This version of \p assign replicates a given exemplar + * \p n times into this vector. + * \param n The number of times to copy \p x. + * \param x The exemplar element to replicate. + */ + void assign(size_type n, const T &x); + + /*! This version of \p assign makes this vector a copy of a given input range. + * \param first The beginning of the range to copy. + * \param last The end of the range to copy. + * + * \tparam InputIterator is a model of Input Iterator. + */ + template + void assign(InputIterator first, InputIterator last); + + /*! This method returns a copy of this vector's allocator. + * \return A copy of the alloctor used by this vector. + */ + allocator_type get_allocator(void) const; +#endif // end doxygen-only members +}; // end device_vector + +/*! \} + */ + +} // end thrust + +#include + + diff --git a/compat/thrust/distance.h b/compat/thrust/distance.h new file mode 100644 index 0000000..67b4194 --- /dev/null +++ b/compat/thrust/distance.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file distance.h + * \brief Computes the size of a range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup iterators + * \{ + */ + +/*! \p distance finds the distance between \p first and \p last, i.e. the + * number of times that \p first must be incremented until it is equal to + * \p last. + * + * \param first The beginning of an input range of interest. + * \param last The end of an input range of interest. + * \return The distance between the beginning and end of the input range. + * + * \tparam InputIterator is a model of Input Iterator. + * + * \pre If \c InputIterator meets the requirements of random access iterator, \p last shall be reachable from \p first or + * \p first shall be reachable from \p last; otherwise, \p last shall be reachable from \p first. + * + * The following code snippet demonstrates how to use \p distance to compute + * the distance to one iterator from another. + * + * \code + * #include + * #include + * ... + * thrust::device_vector vec(13); + * thrust::device_vector::iterator iter1 = vec.begin(); + * thrust::device_vector::iterator iter2 = iter1 + 7; + * + * int d = thrust::distance(iter1, iter2); + * + * // d is 7 + * \endcode + * + * \see http://www.sgi.com/tech/stl/distance.html + */ +template + inline typename thrust::iterator_traits::difference_type + distance(InputIterator first, InputIterator last); + +/*! \} // end iterators + */ + +} // end thrust + +#include + diff --git a/compat/thrust/equal.h b/compat/thrust/equal.h new file mode 100644 index 0000000..e96946f --- /dev/null +++ b/compat/thrust/equal.h @@ -0,0 +1,236 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file equal.h + * \brief Equality between ranges + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reductions + * \{ + * \addtogroup comparisons + * \ingroup reductions + * \{ + */ + + +/*! \p equal returns \c true if the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) are identical when + * compared element-by-element, and otherwise returns \c false. + * + * This version of \p equal returns \c true if and only if for every + * iterator \c i in [first1, last1), *i == *(first2 + (i - first1)). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \return \c true, if the sequences are equal; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator1's \c value_type is a model of Equality Comparable, + * and \p InputIterator1's \c value_type can be compared for equality with \c InputIterator2's \c value_type. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is a model of Equality Comparable, + * and \p InputIterator2's \c value_type can be compared for equality with \c InputIterator1's \c value_type. + * + * The following code snippet demonstrates how to use \p equal to test + * two ranges for equality using the \p thrust::host execution policy: + * + * \code + * #include + * #include + * ... + * int A1[7] = {3, 1, 4, 1, 5, 9, 3}; + * int A2[7] = {3, 1, 4, 2, 8, 5, 7}; + * ... + * bool result = thrust::equal(thrust::host, A1, A1 + 7, A2); + * + * // result == false + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal.html + */ +template +bool equal(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2); + + +/*! \p equal returns \c true if the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) are identical when + * compared element-by-element, and otherwise returns \c false. + * + * This version of \p equal returns \c true if and only if for every + * iterator \c i in [first1, last1), *i == *(first2 + (i - first1)). + * + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \return \c true, if the sequences are equal; \c false, otherwise. + * + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator1's \c value_type is a model of Equality Comparable, + * and \p InputIterator1's \c value_type can be compared for equality with \c InputIterator2's \c value_type. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is a model of Equality Comparable, + * and \p InputIterator2's \c value_type can be compared for equality with \c InputIterator1's \c value_type. + * + * The following code snippet demonstrates how to use \p equal to test + * two ranges for equality. + * + * \code + * #include + * ... + * int A1[7] = {3, 1, 4, 1, 5, 9, 3}; + * int A2[7] = {3, 1, 4, 2, 8, 5, 7}; + * ... + * bool result = thrust::equal(A1, A1 + 7, A2); + * + * // result == false + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal.html + */ +template +bool equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2); + + +/*! \p equal returns \c true if the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) are identical when + * compared element-by-element, and otherwise returns \c false. + * + * This version of \p equal returns \c true if and only if for every + * iterator \c i in [first1, last1), + * binary_pred(*i, *(first2 + (i - first1))) is \c true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param binary_pred Binary predicate used to test element equality. + * \return \c true, if the sequences are equal; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator1's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p BinaryPredicate's \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p equal to compare the + * elements in two ranges modulo 2 using the \p thrust::host execution policy. + * + * \code + * #include + * #include + * ... + * + * __host__ __device__ + * struct compare_modulo_two + * { + * bool operator()(int x, int y) + * { + * return (x % 2) == (y % 2); + * } + * }; + * ... + * int x[5] = {0, 2, 4, 6, 8, 10}; + * int y[5] = {1, 3, 5, 7, 9, 11}; + * + * bool result = thrust::equal(x, x + 5, y, compare_modulo_two()); + * + * // result is true + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal.html + */ +template +bool equal(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred); + + +/*! \p equal returns \c true if the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) are identical when + * compared element-by-element, and otherwise returns \c false. + * + * This version of \p equal returns \c true if and only if for every + * iterator \c i in [first1, last1), + * binary_pred(*i, *(first2 + (i - first1))) is \c true. + * + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param binary_pred Binary predicate used to test element equality. + * \return \c true, if the sequences are equal; \c false, otherwise. + * + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator1's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p BinaryPredicate's \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p equal to compare the + * elements in two ranges modulo 2. + * + * \code + * #include + * + * __host__ __device__ + * struct compare_modulo_two + * { + * bool operator()(int x, int y) + * { + * return (x % 2) == (y % 2); + * } + * }; + * ... + * int x[5] = {0, 2, 4, 6, 8, 10}; + * int y[5] = {1, 3, 5, 7, 9, 11}; + * + * bool result = thrust::equal(x, x + 5, y, compare_modulo_two()); + * + * // result is true + * \endcode + * + * \see http://www.sgi.com/tech/stl/equal.html + */ +template +bool equal(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, BinaryPredicate binary_pred); + + +/*! \} // end comparisons + * \} // end reductions + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/execution_policy.h b/compat/thrust/execution_policy.h new file mode 100644 index 0000000..a5b61e9 --- /dev/null +++ b/compat/thrust/execution_policy.h @@ -0,0 +1,351 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/execution_policy.h + * \brief Thrust execution policies. + */ + +#pragma once + +#include + +// get the definition of thrust::execution_policy +#include + +// #include the host system's execution_policy header +#define __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER <__THRUST_HOST_SYSTEM_ROOT/execution_policy.h> +#include __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER +#undef __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER + +// #include the device system's execution_policy.h header +#define __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/execution_policy.h> +#include __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER +#undef __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER + +namespace thrust +{ + + +/*! \cond + */ + + +namespace detail +{ + + +typedef thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::detail::par_t host_t; + + +typedef thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::detail::par_t device_t; + + +} // end detail + + +/*! \endcond + */ + + +/*! \addtogroup execution_policies Parallel Execution Policies + * \{ + */ + + +// define execution_policy for the purpose of Doxygenating it +// it is actually defined elsewhere +#if 0 +/*! \p execution_policy is the base class for all Thrust parallel execution policies + * like \p thrust::host, \p thrust::device, and each backend system's tag type. + * + * Custom user-defined backends should derive a policy from this type in order to + * interoperate with Thrust algorithm dispatch. + * + * The following code snippet demonstrates how to derive a standalone custom execution policy + * from \p thrust::execution_policy to implement a backend which only implements \p for_each: + * + * \code + * #include + * #include + * + * // define a type derived from thrust::execution_policy to distinguish our custom execution policy: + * struct my_policy : thrust::execution_policy {}; + * + * // overload for_each on my_policy + * template + * Iterator for_each(my_policy, Iterator first, Iterator last, Function f) + * { + * std::cout << "Hello, world from for_each(my_policy)!" << std::endl; + * + * for(; first < last; ++first) + * { + * f(*first); + * } + * + * return first; + * } + * + * struct ignore_argument + * { + * void operator()(int) {} + * }; + * + * int main() + * { + * int data[4]; + * + * // dispatch thrust::for_each using our custom policy: + * my_policy exec; + * thrust::for_each(exec, data, data + 4, ignore_argument()); + * + * // can't dispatch thrust::transform because no overload exists for my_policy: + * //thrust::transform(exec, data, data, + 4, data, thrust::identity()); // error! + * + * return 0; + * } + * \endcode + * + * \see host_execution_policy + * \see device_execution_policy + */ +template +struct execution_policy : thrust::detail::execution_policy_base +{}; +#endif + + +/*! \p host_execution_policy is the base class for all Thrust parallel execution policies + * which are derived from Thrust's default host backend system configured with the \p THRUST_HOST_SYSTEM + * macro. + * + * Custom user-defined backends which wish to inherit the functionality of Thrust's host backend system + * should derive a policy from this type in order to interoperate with Thrust algorithm dispatch. + * + * The following code snippet demonstrates how to derive a standalone custom execution policy from + * \p thrust::host_execution_policy to implement a backend which specializes \p for_each while inheriting + * the behavior of every other algorithm from the host system: + * + * \code + * #include + * #include + * + * // define a type derived from thrust::host_execution_policy to distinguish our custom execution policy: + * struct my_policy : thrust::host_execution_policy {}; + * + * // overload for_each on my_policy + * template + * Iterator for_each(my_policy, Iterator first, Iterator last, Function f) + * { + * std::cout << "Hello, world from for_each(my_policy)!" << std::endl; + * + * for(; first < last; ++first) + * { + * f(*first); + * } + * + * return first; + * } + * + * struct ignore_argument + * { + * void operator()(int) {} + * }; + * + * int main() + * { + * int data[4]; + * + * // dispatch thrust::for_each using our custom policy: + * my_policy exec; + * thrust::for_each(exec, data, data + 4, ignore_argument()); + * + * // dispatch thrust::transform whose behavior our policy inherits + * thrust::transform(exec, data, data, + 4, data, thrust::identity()); + * + * return 0; + * } + * \endcode + * + * \see execution_policy + * \see device_execution_policy + */ +template + struct host_execution_policy + : thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::execution_policy +{}; + + +/*! \p device_execution_policy is the base class for all Thrust parallel execution policies + * which are derived from Thrust's default device backend system configured with the \p THRUST_DEVICE_SYSTEM + * macro. + * + * Custom user-defined backends which wish to inherit the functionality of Thrust's device backend system + * should derive a policy from this type in order to interoperate with Thrust algorithm dispatch. + * + * The following code snippet demonstrates how to derive a standalone custom execution policy from + * \p thrust::device_execution_policy to implement a backend which specializes \p for_each while inheriting + * the behavior of every other algorithm from the device system: + * + * \code + * #include + * #include + * + * // define a type derived from thrust::device_execution_policy to distinguish our custom execution policy: + * struct my_policy : thrust::device_execution_policy {}; + * + * // overload for_each on my_policy + * template + * Iterator for_each(my_policy, Iterator first, Iterator last, Function f) + * { + * std::cout << "Hello, world from for_each(my_policy)!" << std::endl; + * + * for(; first < last; ++first) + * { + * f(*first); + * } + * + * return first; + * } + * + * struct ignore_argument + * { + * void operator()(int) {} + * }; + * + * int main() + * { + * int data[4]; + * + * // dispatch thrust::for_each using our custom policy: + * my_policy exec; + * thrust::for_each(exec, data, data + 4, ignore_argument()); + * + * // dispatch thrust::transform whose behavior our policy inherits + * thrust::transform(exec, data, data, + 4, data, thrust::identity()); + * + * return 0; + * } + * \endcode + * + * \see execution_policy + * \see host_execution_policy + */ +template + struct device_execution_policy + : thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::execution_policy +{}; + + +/*! \p thrust::host is the default parallel execution policy associated with Thrust's host backend system + * configured by the \p THRUST_HOST_SYSTEM macro. + * + * Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target + * algorithm dispatch at Thrust's host system by providing \p thrust::host as an algorithm parameter. + * + * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such as + * \p thrust::host_vector. + * + * Note that even though \p thrust::host targets the host CPU, it is a parallel execution policy. That is, + * the order that an algorithm invokes functors or dereferences iterators is not defined. + * + * The type of \p thrust::host is implementation-defined. + * + * The following code snippet demonstrates how to use \p thrust::host to explicitly dispatch an invocation + * of \p thrust::for_each to the host backend system: + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * printf("%d\n"); + * } + * }; + * ... + * int vec[3]; + * vec[0] = 0; vec[1] = 1; vec[2] = 2; + * + * thrust::for_each(thrust::host, vec.begin(), vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + * + * \see host_execution_policy + * \see thrust::device + */ +static const detail::host_t host; + + +/*! \p thrust::device is the default parallel execution policy associated with Thrust's device backend system + * configured by the \p THRUST_DEVICE_SYSTEM macro. + * + * Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target + * algorithm dispatch at Thrust's device system by providing \p thrust::device as an algorithm parameter. + * + * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such as + * \p thrust::device_vector or to avoid wrapping e.g. raw pointers allocated by the CUDA API with types + * such as \p thrust::device_ptr. + * + * The user must take care to guarantee that the iterators provided to an algorithm are compatible with + * the device backend system. For example, raw pointers allocated by std::malloc typically + * cannot be dereferenced by a GPU. For this reason, raw pointers allocated by host APIs should not be mixed + * with a \p thrust::device algorithm invocation when the device backend is CUDA. + * + * The type of \p thrust::device is implementation-defined. + * + * The following code snippet demonstrates how to use \p thrust::device to explicitly dispatch an invocation + * of \p thrust::for_each to the device backend system: + * + * \code + * #include + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * printf("%d\n"); + * } + * }; + * ... + * thrust::device_vector d_vec[3]; + * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; + * + * thrust::for_each(thrust::device, vec.begin(), vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + * + * \see host_execution_policy + * \see thrust::device + */ +static const detail::device_t device; + + +/*! \} + */ + + +} // end thrust + diff --git a/compat/thrust/extrema.h b/compat/thrust/extrema.h new file mode 100644 index 0000000..335bcd1 --- /dev/null +++ b/compat/thrust/extrema.h @@ -0,0 +1,798 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file extrema.h + * \brief Functions for computing computing extremal values + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! This version of \p min returns the smaller of two values, given a comparison operation. + * \param lhs The first value to compare. + * \param rhs The second value to compare. + * \param comp A comparison operation. + * \return The smaller element. + * + * \tparam T is convertible to \p BinaryPredicate's first argument type and to its second argument type. + * \tparam BinaryPredicate is a model of BinaryPredicate. + * + * The following code snippet demonstrates how to use \p min to compute the smaller of two + * key-value objects. + * + * \code + * #include + * ... + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value a = {13, 0}; + * key_value b = { 7, 1); + * + * key_value smaller = thrust::min(a, b, compare_key_value()); + * + * // smaller is {7, 1} + * \endcode + * + * \note Returns the first argument when the arguments are equivalent. + * \see max + */ +template +__host__ __device__ + T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp); + + +/*! This version of \p min returns the smaller of two values. + * \param lhs The first value to compare. + * \param rhs The second value to compare. + * \return The smaller element. + * + * \tparam T is a model of LessThan Comparable. + * + * The following code snippet demonstrates how to use \p min to compute the smaller of two + * integers. + * + * \code + * #include + * ... + * int a = 13; + * int b = 7; + * + * int smaller = thrust::min(a, b); + * + * // smaller is 7 + * \endcode + * + * \note Returns the first argument when the arguments are equivalent. + * \see max + */ +template +__host__ __device__ + T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs); + + +/*! This version of \p max returns the larger of two values, given a comparison operation. + * \param lhs The first value to compare. + * \param rhs The second value to compare. + * \param comp A comparison operation. + * \return The larger element. + * + * \tparam T is convertible to \p BinaryPredicate's first argument type and to its second argument type. + * \tparam BinaryPredicate is a model of BinaryPredicate. + * + * The following code snippet demonstrates how to use \p max to compute the larger of two + * key-value objects. + * + * \code + * #include + * ... + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value a = {13, 0}; + * key_value b = { 7, 1); + * + * key_value larger = thrust::max(a, b, compare_key_value()); + * + * // larger is {13, 0} + * \endcode + * + * \note Returns the first argument when the arguments are equivalent. + * \see min + */ +template +__host__ __device__ + T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp); + + +/*! This version of \p max returns the larger of two values. + * \param lhs The first value to compare. + * \param rhs The second value to compare. + * \return The larger element. + * + * \tparam T is a model of LessThan Comparable. + * + * The following code snippet demonstrates how to use \p max to compute the larger of two + * integers. + * + * \code + * #include + * ... + * int a = 13; + * int b = 7; + * + * int larger = thrust::min(a, b); + * + * // larger is 13 + * \endcode + * + * \note Returns the first argument when the arguments are equivalent. + * \see min + */ +template +__host__ __device__ + T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs); + + +/*! \addtogroup reductions + * \{ + * \addtogroup extrema + * \ingroup reductions + * \{ + */ + +/*! \p min_element finds the smallest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value smaller + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p min_element differ in how they define whether one element is + * less than another. This version compares objects using \c operator<. Specifically, + * this version of \p min_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), *j < *i is + * \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return An iterator pointing to the smallest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \c ForwardIterator's \c value_type is a model of + * LessThan Comparable. + * + * \code + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int *result = thrust::min_element(thrust::host, data, data + 6); + * + * // result is data + 1 + * // *result is 0 + * \endcode + * + * \see http://www.sgi.com/tech/stl/min_element.html + */ +template +ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); + + +/*! \p min_element finds the smallest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value smaller + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p min_element differ in how they define whether one element is + * less than another. This version compares objects using \c operator<. Specifically, + * this version of \p min_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), *j < *i is + * \c false. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return An iterator pointing to the smallest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \c ForwardIterator's \c value_type is a model of + * LessThan Comparable. + * + * \code + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int *result = thrust::min_element(data, data + 6); + * + * // result is data + 1 + * // *result is 0 + * \endcode + * + * \see http://www.sgi.com/tech/stl/min_element.html + */ +template +ForwardIterator min_element(ForwardIterator first, ForwardIterator last); + + +/*! \p min_element finds the smallest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value smaller + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p min_element differ in how they define whether one element is + * less than another. This version compares objects using a function object \p comp. + * Specifically, this version of \p min_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), comp(*j, *i) is + * \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp A binary predicate used for comparison. + * \return An iterator pointing to the smallest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \p comp's + * \c first_argument_type and \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p min_element to find the smallest element + * of a collection of key-value pairs using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; + * + * key_value *smallest = thrust::min_element(thrust::host, data, data + 4, compare_key_value()); + * + * // smallest == data + 1 + * // *smallest == {0,7} + * \endcode + * + * \see http://www.sgi.com/tech/stl/min_element.html + */ +template +ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); + + +/*! \p min_element finds the smallest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value smaller + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p min_element differ in how they define whether one element is + * less than another. This version compares objects using a function object \p comp. + * Specifically, this version of \p min_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), comp(*j, *i) is + * \c false. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp A binary predicate used for comparison. + * \return An iterator pointing to the smallest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \p comp's + * \c first_argument_type and \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p min_element to find the smallest element + * of a collection of key-value pairs. + * + * \code + * #include + * + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; + * + * key_value *smallest = thrust::min_element(data, data + 4, compare_key_value()); + * + * // smallest == data + 1 + * // *smallest == {0,7} + * \endcode + * + * \see http://www.sgi.com/tech/stl/min_element.html + */ +template +ForwardIterator min_element(ForwardIterator first, ForwardIterator last, + BinaryPredicate comp); + + +/*! \p max_element finds the largest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value larger + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p max_element differ in how they define whether one element is + * greater than another. This version compares objects using \c operator<. Specifically, + * this version of \p max_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), *i < *j is + * \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return An iterator pointing to the largest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam A Thrust backend system. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \c ForwardIterator's \c value_type is a model of + * LessThan Comparable. + * + * \code + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int *result = thrust::max_element(thrust::host, data, data + 6); + * + * // *result == 3 + * \endcode + * + * \see http://www.sgi.com/tech/stl/max_element.html + */ +template +ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); + + +/*! \p max_element finds the largest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value larger + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p max_element differ in how they define whether one element is + * greater than another. This version compares objects using \c operator<. Specifically, + * this version of \p max_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), *i < *j is + * \c false. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return An iterator pointing to the largest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \c ForwardIterator's \c value_type is a model of + * LessThan Comparable. + * + * \code + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int *result = thrust::max_element(data, data + 6); + * + * // *result == 3 + * \endcode + * + * \see http://www.sgi.com/tech/stl/max_element.html + */ +template +ForwardIterator max_element(ForwardIterator first, ForwardIterator last); + + +/*! \p max_element finds the largest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value larger + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p max_element differ in how they define whether one element is + * less than another. This version compares objects using a function object \p comp. + * Specifically, this version of \p max_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), comp(*i, *j) is + * \c false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp A binary predicate used for comparison. + * \return An iterator pointing to the largest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \p comp's + * \c first_argument_type and \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p max_element to find the largest element + * of a collection of key-value pairs using the \p thrust::host execution policy for parallelization. + * + * \code + * #include + * #include + * ... + * + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; + * + * key_value *largest = thrust::max_element(thrust::host, data, data + 4, compare_key_value()); + * + * // largest == data + 3 + * // *largest == {6,1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/max_element.html + */ +template +ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); + + +/*! \p max_element finds the largest element in the range [first, last). + * It returns the first iterator \c i in [first, last) + * such that no other iterator in [first, last) points to a value larger + * than \c *i. The return value is \p last if and only if [first, last) is an + * empty range. + * + * The two versions of \p max_element differ in how they define whether one element is + * less than another. This version compares objects using a function object \p comp. + * Specifically, this version of \p max_element returns the first iterator \c i in [first, last) + * such that, for every iterator \c j in [first, last), comp(*i, *j) is + * \c false. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp A binary predicate used for comparison. + * \return An iterator pointing to the largest element of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \p comp's + * \c first_argument_type and \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p max_element to find the largest element + * of a collection of key-value pairs. + * + * \code + * #include + * + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; + * + * key_value *largest = thrust::max_element(data, data + 4, compare_key_value()); + * + * // largest == data + 3 + * // *largest == {6,1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/max_element.html + */ +template +ForwardIterator max_element(ForwardIterator first, ForwardIterator last, + BinaryPredicate comp); + + +/*! \p minmax_element finds the smallest and largest elements in the range [first, last). + * It returns a pair of iterators (imin, imax) where \c imin is the same iterator + * returned by \p min_element and \c imax is the same iterator returned by \p max_element. + * This function is potentially more efficient than separate calls to \p min_element and \p max_element. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \c ForwardIterator's \c value_type is a model of + * LessThan Comparable. + * + * \code + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * thrust::pair result = thrust::minmax_element(thrust::host, data, data + 6); + * + * // result.first is data + 1 + * // result.second is data + 5 + * // *result.first is 0 + * // *result.second is 3 + * \endcode + * + * \see min_element + * \see max_element + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf + */ +template +thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); + + +/*! \p minmax_element finds the smallest and largest elements in the range [first, last). + * It returns a pair of iterators (imin, imax) where \c imin is the same iterator + * returned by \p min_element and \c imax is the same iterator returned by \p max_element. + * This function is potentially more efficient than separate calls to \p min_element and \p max_element. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \c ForwardIterator's \c value_type is a model of + * LessThan Comparable. + * + * \code + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * thrust::pair result = thrust::minmax_element(data, data + 6); + * + * // result.first is data + 1 + * // result.second is data + 5 + * // *result.first is 0 + * // *result.second is 3 + * \endcode + * + * \see min_element + * \see max_element + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf + */ +template +thrust::pair minmax_element(ForwardIterator first, + ForwardIterator last); + + +/*! \p minmax_element finds the smallest and largest elements in the range [first, last). + * It returns a pair of iterators (imin, imax) where \c imin is the same iterator + * returned by \p min_element and \c imax is the same iterator returned by \p max_element. + * This function is potentially more efficient than separate calls to \p min_element and \p max_element. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp A binary predicate used for comparison. + * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \p comp's + * \c first_argument_type and \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p minmax_element to find the smallest and largest elements + * of a collection of key-value pairs using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; + * + * thrust::pair extrema = thrust::minmax_element(thrust::host, data, data + 4, compare_key_value()); + * + * // extrema.first == data + 1 + * // *extrema.first == {0,7} + * // extrema.second == data + 3 + * // *extrema.second == {6,1} + * \endcode + * + * \see min_element + * \see max_element + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf + */ +template +thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); + + +/*! \p minmax_element finds the smallest and largest elements in the range [first, last). + * It returns a pair of iterators (imin, imax) where \c imin is the same iterator + * returned by \p min_element and \c imax is the same iterator returned by \p max_element. + * This function is potentially more efficient than separate calls to \p min_element and \p max_element. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp A binary predicate used for comparison. + * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), + * if it is not an empty range; \p last, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \p comp's + * \c first_argument_type and \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p minmax_element to find the smallest and largest elements + * of a collection of key-value pairs. + * + * \code + * #include + * #include + * + * struct key_value + * { + * int key; + * int value; + * }; + * + * struct compare_key_value + * { + * __host__ __device__ + * bool operator()(key_value lhs, key_value rhs) + * { + * return lhs.key < rhs.key; + * } + * }; + * + * ... + * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; + * + * thrust::pair extrema = thrust::minmax_element(data, data + 4, compare_key_value()); + * + * // extrema.first == data + 1 + * // *extrema.first == {0,7} + * // extrema.second == data + 3 + * // *extrema.second == {6,1} + * \endcode + * + * \see min_element + * \see max_element + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf + */ +template +thrust::pair minmax_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp); + +/*! \} // end extrema + * \} // end reductions + */ + +} // end thrust + +#include +#include + diff --git a/compat/thrust/fill.h b/compat/thrust/fill.h new file mode 100644 index 0000000..b492cec --- /dev/null +++ b/compat/thrust/fill.h @@ -0,0 +1,205 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file fill.h + * \brief Fills a range with a constant value + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup transformations + * \addtogroup filling + * \ingroup transformations + * \{ + */ + + +/*! \p fill assigns the value \p value to every element in + * the range [first, last). That is, for every + * iterator \c i in [first, last), it performs + * the assignment *i = value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param value The value to be copied. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam T is a model of Assignable, + * and \p T's \c value_type is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's + * elements to a given value using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector v(4); + * thrust::fill(thrust::device, v.begin(), v.end(), 137); + * + * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 + * \endcode + * + * \see http://www.sgi.com/tech/stl/fill.html + * \see \c fill_n + * \see \c uninitialized_fill + */ +template + void fill(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value); + + +/*! \p fill assigns the value \p value to every element in + * the range [first, last). That is, for every + * iterator \c i in [first, last), it performs + * the assignment *i = value. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param value The value to be copied. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam T is a model of Assignable, + * and \p T's \c value_type is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's + * elements to a given value. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(4); + * thrust::fill(v.begin(), v.end(), 137); + * + * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 + * \endcode + * + * \see http://www.sgi.com/tech/stl/fill.html + * \see \c fill_n + * \see \c uninitialized_fill + */ +template + void fill(ForwardIterator first, + ForwardIterator last, + const T &value); + + +/*! \p fill_n assigns the value \p value to every element in + * the range [first, first+n). That is, for every + * iterator \c i in [first, first+n), it performs + * the assignment *i = value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param n The size of the sequence. + * \param value The value to be copied. + * \return first + n + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam T is a model of Assignable, + * and \p T's \c value_type is convertible to a type in \p OutputIterator's set of \c value_type. + * + * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's + * elements to a given value using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector v(4); + * thrust::fill_n(thrust::device, v.begin(), v.size(), 137); + * + * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 + * \endcode + * + * \see http://www.sgi.com/tech/stl/fill_n.html + * \see \c fill + * \see \c uninitialized_fill_n + */ +template + OutputIterator fill_n(const thrust::detail::execution_policy_base &exec, + OutputIterator first, + Size n, + const T &value); + + +/*! \p fill_n assigns the value \p value to every element in + * the range [first, first+n). That is, for every + * iterator \c i in [first, first+n), it performs + * the assignment *i = value. + * + * \param first The beginning of the sequence. + * \param n The size of the sequence. + * \param value The value to be copied. + * \return first + n + * + * \tparam OutputIterator is a model of Output Iterator. + * \tparam T is a model of Assignable, + * and \p T's \c value_type is convertible to a type in \p OutputIterator's set of \c value_type. + * + * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's + * elements to a given value. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(4); + * thrust::fill_n(v.begin(), v.size(), 137); + * + * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 + * \endcode + * + * \see http://www.sgi.com/tech/stl/fill_n.html + * \see \c fill + * \see \c uninitialized_fill_n + */ +template + OutputIterator fill_n(OutputIterator first, + Size n, + const T &value); + + +/*! \} // end filling + * \} // transformations + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/find.h b/compat/thrust/find.h new file mode 100644 index 0000000..fa01ded --- /dev/null +++ b/compat/thrust/find.h @@ -0,0 +1,382 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file find.h + * \brief Locating values in (unsorted) ranges + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + +/*! \addtogroup searching + * \ingroup algorithms + * \{ + */ + + +/*! \p find returns the first iterator \c i in the range + * [first, last) such that *i == value + * or \c last if no such iterator exists. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first Beginning of the sequence to search. + * \param last End of the sequence to search. + * \param value The value to find. + * \return The first iterator \c i such that *i == value or \c last. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \p InputIterator's \c value_type is equality comparable to type \c T. + * \tparam T is a model of EqualityComparable. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector input(4); + * + * input[0] = 0; + * input[1] = 5; + * input[2] = 3; + * input[3] = 7; + * + * thrust::device_vector::iterator iter; + * + * iter = thrust::find(thrust::device, input.begin(), input.end(), 3); // returns input.first() + 2 + * iter = thrust::find(thrust::device, input.begin(), input.end(), 5); // returns input.first() + 1 + * iter = thrust::find(thrust::device, input.begin(), input.end(), 9); // returns input.end() + * \endcode + * + * \see find_if + * \see mismatch + */ +template +InputIterator find(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + const T& value); + + +/*! \p find returns the first iterator \c i in the range + * [first, last) such that *i == value + * or \c last if no such iterator exists. + * + * \param first Beginning of the sequence to search. + * \param last End of the sequence to search. + * \param value The value to find. + * \return The first iterator \c i such that *i == value or \c last. + * + * \tparam InputIterator is a model of Input Iterator + * and \p InputIterator's \c value_type is equality comparable to type \c T. + * \tparam T is a model of EqualityComparable. + * + * \code + * #include + * #include + * ... + * thrust::device_vector input(4); + * + * input[0] = 0; + * input[1] = 5; + * input[2] = 3; + * input[3] = 7; + * + * thrust::device_vector::iterator iter; + * + * iter = thrust::find(input.begin(), input.end(), 3); // returns input.first() + 2 + * iter = thrust::find(input.begin(), input.end(), 5); // returns input.first() + 1 + * iter = thrust::find(input.begin(), input.end(), 9); // returns input.end() + * \endcode + * + * \see find_if + * \see mismatch + */ +template +InputIterator find(InputIterator first, + InputIterator last, + const T& value); + + +/*! \p find_if returns the first iterator \c i in the range + * [first, last) such that pred(*i) is \c true + * or \c last if no such iterator exists. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first Beginning of the sequence to search. + * \param last End of the sequence to search. + * \param pred A predicate used to test range elements. + * \return The first iterator \c i such that pred(*i) is \c true, or \c last. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator. + * \tparam Predicate is a model of Predicate. + * + * \code + * #include + * #include + * #include + * ... + * + * struct greater_than_four + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 4; + * } + * }; + * + * struct greater_than_ten + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 10; + * } + * }; + * + * ... + * thrust::device_vector input(4); + * + * input[0] = 0; + * input[1] = 5; + * input[2] = 3; + * input[3] = 7; + * + * thrust::device_vector::iterator iter; + * + * iter = thrust::find_if(thrust::device, input.begin(), input.end(), greater_than_four()); // returns input.first() + 1 + * + * iter = thrust::find_if(thrust::device, input.begin(), input.end(), greater_than_ten()); // returns input.end() + * \endcode + * + * \see find + * \see find_if_not + * \see mismatch + */ +template +InputIterator find_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + Predicate pred); + + +/*! \p find_if returns the first iterator \c i in the range + * [first, last) such that pred(*i) is \c true + * or \c last if no such iterator exists. + * + * \param first Beginning of the sequence to search. + * \param last End of the sequence to search. + * \param pred A predicate used to test range elements. + * \return The first iterator \c i such that pred(*i) is \c true, or \c last. + * + * \tparam InputIterator is a model of Input Iterator. + * \tparam Predicate is a model of Predicate. + * + * \code + * #include + * #include + * + * struct greater_than_four + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 4; + * } + * }; + * + * struct greater_than_ten + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 10; + * } + * }; + * + * ... + * thrust::device_vector input(4); + * + * input[0] = 0; + * input[1] = 5; + * input[2] = 3; + * input[3] = 7; + * + * thrust::device_vector::iterator iter; + * + * iter = thrust::find_if(input.begin(), input.end(), greater_than_four()); // returns input.first() + 1 + * + * iter = thrust::find_if(input.begin(), input.end(), greater_than_ten()); // returns input.end() + * \endcode + * + * \see find + * \see find_if_not + * \see mismatch + */ +template +InputIterator find_if(InputIterator first, + InputIterator last, + Predicate pred); + + +/*! \p find_if_not returns the first iterator \c i in the range + * [first, last) such that pred(*i) is \c false + * or \c last if no such iterator exists. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first Beginning of the sequence to search. + * \param last End of the sequence to search. + * \param pred A predicate used to test range elements. + * \return The first iterator \c i such that pred(*i) is \c false, or \c last. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator. + * \tparam Predicate is a model of Predicate. + * + * \code + * #include + * #include + * #include + * ... + * + * struct greater_than_four + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 4; + * } + * }; + * + * struct greater_than_ten + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 10; + * } + * }; + * + * ... + * thrust::device_vector input(4); + * + * input[0] = 0; + * input[1] = 5; + * input[2] = 3; + * input[3] = 7; + * + * thrust::device_vector::iterator iter; + * + * iter = thrust::find_if_not(thrust::device, input.begin(), input.end(), greater_than_four()); // returns input.first() + * + * iter = thrust::find_if_not(thrust::device, input.begin(), input.end(), greater_than_ten()); // returns input.first() + * \endcode + * + * \see find + * \see find_if + * \see mismatch + */ +template +InputIterator find_if_not(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + Predicate pred); + + +/*! \p find_if_not returns the first iterator \c i in the range + * [first, last) such that pred(*i) is \c false + * or \c last if no such iterator exists. + * + * \param first Beginning of the sequence to search. + * \param last End of the sequence to search. + * \param pred A predicate used to test range elements. + * \return The first iterator \c i such that pred(*i) is \c false, or \c last. + * + * \tparam InputIterator is a model of Input Iterator. + * \tparam Predicate is a model of Predicate. + * + * \code + * #include + * #include + * + * struct greater_than_four + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 4; + * } + * }; + * + * struct greater_than_ten + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x > 10; + * } + * }; + * + * ... + * thrust::device_vector input(4); + * + * input[0] = 0; + * input[1] = 5; + * input[2] = 3; + * input[3] = 7; + * + * thrust::device_vector::iterator iter; + * + * iter = thrust::find_if_not(input.begin(), input.end(), greater_than_four()); // returns input.first() + * + * iter = thrust::find_if_not(input.begin(), input.end(), greater_than_ten()); // returns input.first() + * \endcode + * + * \see find + * \see find_if + * \see mismatch + */ +template +InputIterator find_if_not(InputIterator first, + InputIterator last, + Predicate pred); + +/*! \} // end searching + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/for_each.h b/compat/thrust/for_each.h new file mode 100644 index 0000000..efab9d8 --- /dev/null +++ b/compat/thrust/for_each.h @@ -0,0 +1,278 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.h + * \brief Applies a function to each element in a range + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup modifying + * \ingroup transformations + * \{ + */ + + +/*! \p for_each applies the function object \p f to each element + * in the range [first, last); \p f's return value, if any, + * is ignored. Unlike the C++ Standard Template Library function + * std::for_each, this version offers no guarantee on + * order of execution. For this reason, this version of \p for_each + * does not return a copy of the function object. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param f The function object to apply to the range [first, last). + * \return last + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. + * \tparam UnaryFunction is a model of Unary Function, + * and \p UnaryFunction does not apply any non-constant operation through its argument. + * + * The following code snippet demonstrates how to use \p for_each to print the elements + * of a \p std::device_vector using the \p thrust::device parallelization policy: + * + * \code + * #include + * #include + * #include + * #include + * ... + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * // note that using printf in a __device__ function requires + * // code compiled for a GPU with compute capability 2.0 or + * // higher (nvcc --arch=sm_20) + * printf("%d\n"); + * } + * }; + * ... + * thrust::device_vector d_vec(3); + * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; + * + * thrust::for_each(thrust::device, d_vec.begin(), d_vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + * + * \see for_each_n + * \see http://www.sgi.com/tech/stl/for_each.html + */ +template +InputIterator for_each(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + UnaryFunction f); + + +/*! \p for_each_n applies the function object \p f to each element + * in the range [first, first + n); \p f's return value, if any, + * is ignored. Unlike the C++ Standard Template Library function + * std::for_each, this version offers no guarantee on + * order of execution. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param n The size of the input sequence. + * \param f The function object to apply to the range [first, first + n). + * \return first + n + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. + * \tparam Size is an integral type. + * \tparam UnaryFunction is a model of Unary Function, + * and \p UnaryFunction does not apply any non-constant operation through its argument. + * + * The following code snippet demonstrates how to use \p for_each_n to print the elements + * of a \p device_vector using the \p thrust::device parallelization policy. + * + * \code + * #include + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * // note that using printf in a __device__ function requires + * // code compiled for a GPU with compute capability 2.0 or + * // higher (nvcc --arch=sm_20) + * printf("%d\n"); + * } + * }; + * ... + * thrust::device_vector d_vec(3); + * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; + * + * thrust::for_each_n(thrust::device, d_vec.begin(), d_vec.size(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + * + * \see for_each + * \see http://www.sgi.com/tech/stl/for_each.html + */ +template +InputIterator for_each_n(const thrust::detail::execution_policy_base &exec, + InputIterator first, + Size n, + UnaryFunction f); + +/*! \p for_each applies the function object \p f to each element + * in the range [first, last); \p f's return value, if any, + * is ignored. Unlike the C++ Standard Template Library function + * std::for_each, this version offers no guarantee on + * order of execution. For this reason, this version of \p for_each + * does not return a copy of the function object. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param f The function object to apply to the range [first, last). + * \return last + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. + * \tparam UnaryFunction is a model of Unary Function, + * and \p UnaryFunction does not apply any non-constant operation through its argument. + * + * The following code snippet demonstrates how to use \p for_each to print the elements + * of a \p device_vector. + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * // note that using printf in a __device__ function requires + * // code compiled for a GPU with compute capability 2.0 or + * // higher (nvcc --arch=sm_20) + * printf("%d\n"); + * } + * }; + * ... + * thrust::device_vector d_vec(3); + * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; + * + * thrust::for_each(d_vec.begin(), d_vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + * + * \see for_each_n + * \see http://www.sgi.com/tech/stl/for_each.html + */ +template +InputIterator for_each(InputIterator first, + InputIterator last, + UnaryFunction f); + + +/*! \p for_each_n applies the function object \p f to each element + * in the range [first, first + n); \p f's return value, if any, + * is ignored. Unlike the C++ Standard Template Library function + * std::for_each, this version offers no guarantee on + * order of execution. + * + * \param first The beginning of the sequence. + * \param n The size of the input sequence. + * \param f The function object to apply to the range [first, first + n). + * \return first + n + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. + * \tparam Size is an integral type. + * \tparam UnaryFunction is a model of Unary Function, + * and \p UnaryFunction does not apply any non-constant operation through its argument. + * + * The following code snippet demonstrates how to use \p for_each_n to print the elements + * of a \p device_vector. + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * // note that using printf in a __device__ function requires + * // code compiled for a GPU with compute capability 2.0 or + * // higher (nvcc --arch=sm_20) + * printf("%d\n"); + * } + * }; + * ... + * thrust::device_vector d_vec(3); + * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; + * + * thrust::for_each_n(d_vec.begin(), d_vec.size(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + * + * \see for_each + * \see http://www.sgi.com/tech/stl/for_each.html + */ +template +InputIterator for_each_n(InputIterator first, + Size n, + UnaryFunction f); + +/*! \} // end modifying + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/functional.h b/compat/thrust/functional.h new file mode 100644 index 0000000..b3d47f9 --- /dev/null +++ b/compat/thrust/functional.h @@ -0,0 +1,1079 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file functional.h + * \brief Function objects and tools for manipulating them + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup function_objects Function Objects + */ + +template struct unary_traits; + +template struct binary_traits; + +/*! \addtogroup function_object_adaptors Function Object Adaptors + * \ingroup function_objects + * \{ + */ + +/*! \p unary_function is an empty base class: it contains no member functions + * or member variables, but only type information. The only reason it exists + * is to make it more convenient to define types that are models of the + * concept Adaptable Unary Function. Specifically, any model of Adaptable + * Unary Function must define nested \c typedefs. Those \c typedefs are + * provided by the base class \p unary_function. + * + * The following code snippet demonstrates how to construct an + * Adaptable Unary Function using \p unary_function. + * + * \code + * struct sine : public thrust::unary_function + * { + * __host__ __device__ + * float operator()(float x) { return sinf(x); } + * }; + * \endcode + * + * \note unary_function is currently redundant with the C++ STL type + * \c std::unary_function. We reserve it here for potential additional + * functionality at a later date. + * + * \see http://www.sgi.com/tech/stl/unary_function.html + * \see binary_function + */ +template + struct unary_function + : public std::unary_function +{ +}; // end unary_function + +/*! \p binary_function is an empty base class: it contains no member functions + * or member variables, but only type information. The only reason it exists + * is to make it more convenient to define types that are models of the + * concept Adaptable Binary Function. Specifically, any model of Adaptable + * Binary Function must define nested \c typedefs. Those \c typedefs are + * provided by the base class \p binary_function. + * + * The following code snippet demonstrates how to construct an + * Adaptable Binary Function using \p binary_function. + * + * \code + * struct exponentiate : public thrust::binary_function + * { + * __host__ __device__ + * float operator()(float x, float y) { return powf(x,y); } + * }; + * \endcode + * + * \note binary_function is currently redundant with the C++ STL type + * \c std::binary_function. We reserve it here for potential additional + * functionality at a later date. + * + * \see http://www.sgi.com/tech/stl/binary_function.html + * \see unary_function + */ +template + struct binary_function + : public std::binary_function +{ +}; // end binary_function + +/*! \} + */ + + +/*! \addtogroup predefined_function_objects Predefined Function Objects + * \ingroup function_objects + */ + +/*! \addtogroup arithmetic_operations Arithmetic Operations + * \ingroup predefined_function_objects + * \{ + */ + +/*! \p plus is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class plus, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x+y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x+y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use plus to sum two + * device_vectors of \c floats. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 75); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::plus()); + * // V3 is now {76, 77, 78, ..., 1075} + * \endcode + * + * \see http://www.sgi.com/tech/stl/plus.html + * \see binary_function + */ +template + struct plus : public binary_function +{ + /*! Function call operator. The return value is lhs + rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs + rhs;} +}; // end plus + +/*! \p minus is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class minus, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x-y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x-y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use minus to subtract + * a device_vector of \c floats from another. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 75); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::minus()); + * // V3 is now {-74, -75, -76, ..., -925} + * \endcode + * + * \see http://www.sgi.com/tech/stl/minus.html + * \see binary_function + */ +template + struct minus : public binary_function +{ + /*! Function call operator. The return value is lhs - rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs - rhs;} +}; // end minus + +/*! \p multiplies is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class minus, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x*y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x*y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use multiplies to multiply + * two device_vectors of \c floats. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 75); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::multiplies()); + * // V3 is now {75, 150, 225, ..., 75000} + * \endcode + * + * \see http://www.sgi.com/tech/stl/multiplies.html + * \see binary_function + */ +template + struct multiplies : public binary_function +{ + /*! Function call operator. The return value is lhs * rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs * rhs;} +}; // end multiplies + +/*! \p divides is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class divides, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x/y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x/y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use divides to divide + * one device_vectors of \c floats by another. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 75); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::divides()); + * // V3 is now {1/75, 2/75, 3/75, ..., 1000/75} + * \endcode + * + * \see http://www.sgi.com/tech/stl/divides.html + * \see binary_function + */ +template + struct divides : public binary_function +{ + /*! Function call operator. The return value is lhs / rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs / rhs;} +}; // end divides + +/*! \p modulus is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class divides, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x%y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x%y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use modulus to take + * the modulus of one device_vectors of \c floats by another. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 75); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::modulus()); + * // V3 is now {1%75, 2%75, 3%75, ..., 1000%75} + * \endcode + * + * \see http://www.sgi.com/tech/stl/modulus.html + * \see binary_function + */ +template + struct modulus : public binary_function +{ + /*! Function call operator. The return value is lhs % rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs % rhs;} +}; // end modulus + +/*! \p negate is a function object. Specifically, it is an Adaptable Unary Function. + * If \c f is an object of class negate, and \c x is an object + * of class \c T, then f(x) returns -x. + * + * \tparam T is a model of Assignable, + * and if \c x is an object of type \p T, then -x must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use negate to negate + * the element of a device_vector of \c floats. + * + * \code + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), + * thrust::negate()); + * // V2 is now {-1, -2, -3, ..., -1000} + * \endcode + * + * \see http://www.sgi.com/tech/stl/negate.html + * \see unary_function + */ +template + struct negate : public unary_function +{ + /*! Function call operator. The return value is -x. + */ + __host__ __device__ T operator()(const T &x) const {return -x;} +}; // end negate + +/*! \} + */ + +/*! \addtogroup comparison_operations Comparison Operations + * \ingroup predefined_function_objects + * \{ + */ + +/*! \p equal_to is a function object. Specifically, it is an Adaptable Binary + * Predicate, which means it is a function object that tests the truth or falsehood + * of some condition. If \c f is an object of class equal_to and \c x + * and \c y are objects of class \c T, then f(x,y) returns \c true if + * x == y and \c false otherwise. + * + * \tparam T is a model of Equality Comparable. + * + * \see http://www.sgi.com/tech/stl/equal_to.html + * \see binary_function + */ +template + struct equal_to : public binary_function +{ + /*! Function call operator. The return value is lhs == rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs == rhs;} +}; // end equal_to + +/*! \p not_equal_to is a function object. Specifically, it is an Adaptable Binary + * Predicate, which means it is a function object that tests the truth or falsehood + * of some condition. If \c f is an object of class not_equal_to and \c x + * and \c y are objects of class \c T, then f(x,y) returns \c true if + * x != y and \c false otherwise. + * + * \tparam T is a model of Equality Comparable. + * + * \see http://www.sgi.com/tech/stl/not_equal_to.html + * \see binary_function + */ +template + struct not_equal_to : public binary_function +{ + /*! Function call operator. The return value is lhs != rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs != rhs;} +}; // end not_equal_to + +/*! \p greater is a function object. Specifically, it is an Adaptable Binary + * Predicate, which means it is a function object that tests the truth or falsehood + * of some condition. If \c f is an object of class greater and \c x + * and \c y are objects of class \c T, then f(x,y) returns \c true if + * x > y and \c false otherwise. + * + * \tparam T is a model of LessThan Comparable. + * + * \see http://www.sgi.com/tech/stl/greater.html + * \see binary_function + */ +template + struct greater : public binary_function +{ + /*! Function call operator. The return value is lhs > rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs > rhs;} +}; // end greater + +/*! \p less is a function object. Specifically, it is an Adaptable Binary + * Predicate, which means it is a function object that tests the truth or falsehood + * of some condition. If \c f is an object of class less and \c x + * and \c y are objects of class \c T, then f(x,y) returns \c true if + * x < y and \c false otherwise. + * + * \tparam T is a model of LessThan Comparable. + * + * \see http://www.sgi.com/tech/stl/less.html + * \see binary_function + */ +template + struct less : public binary_function +{ + /*! Function call operator. The return value is lhs < rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs < rhs;} +}; // end less + +/*! \p greater_equal is a function object. Specifically, it is an Adaptable Binary + * Predicate, which means it is a function object that tests the truth or falsehood + * of some condition. If \c f is an object of class greater_equal and \c x + * and \c y are objects of class \c T, then f(x,y) returns \c true if + * x >= y and \c false otherwise. + * + * \tparam T is a model of LessThan Comparable. + * + * \see http://www.sgi.com/tech/stl/greater_equal.html + * \see binary_function + */ +template + struct greater_equal : public binary_function +{ + /*! Function call operator. The return value is lhs >= rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs >= rhs;} +}; // end greater_equal + +/*! \p less_equal is a function object. Specifically, it is an Adaptable Binary + * Predicate, which means it is a function object that tests the truth or falsehood + * of some condition. If \c f is an object of class less_equal and \c x + * and \c y are objects of class \c T, then f(x,y) returns \c true if + * x <= y and \c false otherwise. + * + * \tparam T is a model of LessThan Comparable. + * + * \see http://www.sgi.com/tech/stl/less_equal.html + * \see binary_function + */ +template + struct less_equal : public binary_function +{ + /*! Function call operator. The return value is lhs <= rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs <= rhs;} +}; // end less_equal + +/*! \} + */ + + +/*! \addtogroup logical_operations Logical Operations + * \ingroup predefined_function_objects + * \{ + */ + +/*! \p logical_and is a function object. Specifically, it is an Adaptable Binary Predicate, + * which means it is a function object that tests the truth or falsehood of some condition. + * If \c f is an object of class logical_and and \c x and \c y are objects of + * class \c T (where \c T is convertible to \c bool) then f(x,y) returns \c true + * if and only if both \c x and \c y are \c true. + * + * \tparam T must be convertible to \c bool. + * + * \see http://www.sgi.com/tech/stl/logical_and.html + * \see binary_function + */ +template + struct logical_and : public binary_function +{ + /*! Function call operator. The return value is lhs && rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs && rhs;} +}; // end logical_and + +/*! \p logical_or is a function object. Specifically, it is an Adaptable Binary Predicate, + * which means it is a function object that tests the truth or falsehood of some condition. + * If \c f is an object of class logical_or and \c x and \c y are objects of + * class \c T (where \c T is convertible to \c bool) then f(x,y) returns \c true + * if and only if either \c x or \c y are \c true. + * + * \tparam T must be convertible to \c bool. + * + * \see http://www.sgi.com/tech/stl/logical_or.html + * \see binary_function + */ +template + struct logical_or : public binary_function +{ + /*! Function call operator. The return value is lhs || rhs. + */ + __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs || rhs;} +}; // end logical_or + +/*! \p logical_not is a function object. Specifically, it is an Adaptable Predicate, + * which means it is a function object that tests the truth or falsehood of some condition. + * If \c f is an object of class logical_not and \c x is an object of + * class \c T (where \c T is convertible to \c bool) then f(x) returns \c true + * if and only if \c x is \c false. + * + * \tparam T must be convertible to \c bool. + * + * The following code snippet demonstrates how to use \p logical_not to transform + * a device_vector of \c bools into its logical complement. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector V; + * ... + * thrust::transform(V.begin(), V.end(), V.begin(), thrust::logical_not()); + * // The elements of V are now the logical complement of what they were prior + * \endcode + * + * \see http://www.sgi.com/tech/stl/logical_not.html + * \see unary_function + */ +template + struct logical_not : public unary_function +{ + /*! Function call operator. The return value is !x. + */ + __host__ __device__ bool operator()(const T &x) const {return !x;} +}; // end logical_not + +/*! \} + */ + +/*! \addtogroup bitwise_operations Bitwise Operations + * \ingroup predefined_function_objects + * \{ + */ + +/*! \p bit_and is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class bit_and, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x&y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x&y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use bit_and to take + * the bitwise AND of one device_vector of \c ints by another. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 13); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::bit_and()); + * // V3 is now {1&13, 2&13, 3&13, ..., 1000%13} + * \endcode + * + * \see binary_function + */ +template + struct bit_and : public binary_function +{ + /*! Function call operator. The return value is lhs & rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs & rhs;} +}; // end bit_and + +/*! \p bit_or is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class bit_and, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x|y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x|y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use bit_or to take + * the bitwise OR of one device_vector of \c ints by another. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 13); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::bit_or()); + * // V3 is now {1|13, 2|13, 3|13, ..., 1000|13} + * \endcode + * + * \see binary_function + */ +template + struct bit_or : public binary_function +{ + /*! Function call operator. The return value is lhs | rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs | rhs;} +}; // end bit_or + +/*! \p bit_xor is a function object. Specifically, it is an Adaptable Binary Function. + * If \c f is an object of class bit_and, and \c x and \c y are objects + * of class \c T, then f(x,y) returns x^y. + * + * \tparam T is a model of Assignable, + * and if \c x and \c y are objects of type \p T, then x^y must be defined and must have a return type that is convertible to \c T. + * + * The following code snippet demonstrates how to use bit_xor to take + * the bitwise XOR of one device_vector of \c ints by another. + * + * \code + * #include + * #include + * #include + * #include + * #include + * ... + * const int N = 1000; + * thrust::device_vector V1(N); + * thrust::device_vector V2(N); + * thrust::device_vector V3(N); + * + * thrust::sequence(V1.begin(), V1.end(), 1); + * thrust::fill(V2.begin(), V2.end(), 13); + * + * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), + * thrust::bit_xor()); + * // V3 is now {1^13, 2^13, 3^13, ..., 1000^13} + * \endcode + * + * \see binary_function + */ +template + struct bit_xor : public binary_function +{ + /*! Function call operator. The return value is lhs ^ rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs ^ rhs;} +}; // end bit_xor + +/*! \} + */ + +/*! \addtogroup generalized_identity_operations Generalized Identity Operations + * \ingroup predefined_function_objects + * \{ + */ + +/*! \p identity is a Unary Function that represents the identity function: it takes + * a single argument \c x, and returns \c x. + * + * \tparam T No requirements on \p T. + * + * The following code snippet demonstrates that \p identity returns its + * argument. + * + * \code + * #include + * #include + * ... + * int x = 137; + * thrust::identity id; + * assert(x == id(x)); + * \endcode + * + * \see http://www.sgi.com/tech/stl/identity.html + * \see unary_function + */ +template + struct identity : public unary_function +{ + /*! Function call operator. The return value is x. + */ + __host__ __device__ const T &operator()(const T &x) const {return x;} +}; // end identity + +/*! \p maximum is a function object that takes two arguments and returns the greater + * of the two. Specifically, it is an Adaptable Binary Function. If \c f is an + * object of class maximum and \c x and \c y are objects of class \c T + * f(x,y) returns \c x if x > y and \c y, otherwise. + * + * \tparam T is a model of LessThan Comparable. + * + * The following code snippet demonstrates that \p maximum returns its + * greater argument. + * + * \code + * #include + * #include + * ... + * int x = 137; + * int y = -137; + * thrust::maximum mx; + * assert(x == mx(x,y)); + * \endcode + * + * \see minimum + * \see min + * \see binary_function + */ +template + struct maximum : public binary_function +{ + /*! Function call operator. The return value is rhs < lhs ? lhs : rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? rhs : lhs;} +}; // end maximum + +/*! \p minimum is a function object that takes two arguments and returns the lesser + * of the two. Specifically, it is an Adaptable Binary Function. If \c f is an + * object of class minimum and \c x and \c y are objects of class \c T + * f(x,y) returns \c x if x < y and \c y, otherwise. + * + * \tparam T is a model of LessThan Comparable. + * + * The following code snippet demonstrates that \p minimum returns its + * lesser argument. + * + * \code + * #include + * #include + * ... + * int x = 137; + * int y = -137; + * thrust::minimum mn; + * assert(y == mn(x,y)); + * \endcode + * + * \see maximum + * \see max + * \see binary_function + */ +template + struct minimum : public binary_function +{ + /*! Function call operator. The return value is lhs < rhs ? lhs : rhs. + */ + __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? lhs : rhs;} +}; // end minimum + +/*! \p project1st is a function object that takes two arguments and returns + * its first argument; the second argument is unused. It is essentially a + * generalization of identity to the case of a Binary Function. + * + * \code + * #include + * #include + * ... + * int x = 137; + * int y = -137; + * thrust::project1st pj1; + * assert(x == pj1(x,y)); + * \endcode + * + * \see identity + * \see project2nd + * \see binary_function + */ +template + struct project1st : public binary_function +{ + /*! Function call operator. The return value is lhs. + */ + __host__ __device__ const T1 &operator()(const T1 &lhs, const T2 &rhs) const {return lhs;} +}; // end project1st + +/*! \p project2nd is a function object that takes two arguments and returns + * its second argument; the first argument is unused. It is essentially a + * generalization of identity to the case of a Binary Function. + * + * \code + * #include + * #include + * ... + * int x = 137; + * int y = -137; + * thrust::project2nd pj2; + * assert(y == pj2(x,y)); + * \endcode + * + * \see identity + * \see project1st + * \see binary_function + */ +template + struct project2nd : public binary_function +{ + /*! Function call operator. The return value is rhs. + */ + __host__ __device__ const T2 &operator()(const T1 &lhs, const T2 &rhs) const {return rhs;} +}; // end project2nd + +/*! \} + */ + + +// odds and ends + +/*! \addtogroup function_object_adaptors + * \{ + */ + +/*! \p unary_negate is a function object adaptor: it is an Adaptable Predicate + * that represents the logical negation of some other Adaptable Predicate. + * That is: if \c f is an object of class unary_negate, + * then there exists an object \c pred of class \c AdaptablePredicate such + * that f(x) always returns the same value as !pred(x). + * There is rarely any reason to construct a unary_negate directly; + * it is almost always easier to use the helper function not1. + * + * \see http://www.sgi.com/tech/stl/unary_negate.html + * \see not1 + */ +template +struct unary_negate + : public thrust::unary_function +{ + /*! Constructor takes a \p Predicate object to negate. + * \param p The \p Predicate object to negate. + */ + __host__ __device__ + explicit unary_negate(Predicate p) : pred(p){} + + /*! Function call operator. The return value is !pred(x). + */ + __host__ __device__ + bool operator()(const typename Predicate::argument_type& x) { return !pred(x); } + + /*! \cond */ + Predicate pred; + /*! \endcond */ +}; // end unary_negate + +/*! \p not1 is a helper function to simplify the creation of Adaptable Predicates: + * it takes an Adaptable Predicate \p pred as an argument and returns a new Adaptable + * Predicate that represents the negation of \p pred. That is: if \c pred is an object + * of a type which models Adaptable Predicate, then the the type of the result + * \c npred of not1(pred) is also a model of Adaptable Predicate and + * npred(x) always returns the same value as !pred(x). + * + * \param pred The Adaptable Predicate to negate. + * \return A new object, npred such that npred(x) always returns + * the same value as !pred(x). + * + * \tparam Predicate is a model of Adaptable Predicate. + * + * \see unary_negate + * \see not2 + */ +template + __host__ __device__ + unary_negate not1(const Predicate &pred); + +/*! \p binary_negate is a function object adaptor: it is an Adaptable Binary + * Predicate that represents the logical negation of some other Adaptable + * Binary Predicate. That is: if \c f is an object of class binary_negate, + * then there exists an object \c pred of class \c AdaptableBinaryPredicate + * such that f(x,y) always returns the same value as !pred(x,y). + * There is rarely any reason to construct a binary_negate directly; + * it is almost always easier to use the helper function not2. + * + * \see http://www.sgi.com/tech/stl/binary_negate.html + */ +template +struct binary_negate + : public thrust::binary_function +{ + /*! Constructor takes a \p Predicate object to negate. + * \param p The \p Predicate object to negate. + */ + __host__ __device__ + explicit binary_negate(Predicate p) : pred(p){} + + /*! Function call operator. The return value is !pred(x,y). + */ + __host__ __device__ + bool operator()(const typename Predicate::first_argument_type& x, const typename Predicate::second_argument_type& y) + { + return !pred(x,y); + } + + /*! \cond */ + Predicate pred; + /*! \endcond */ +}; // end binary_negate + +/*! \p not2 is a helper function to simplify the creation of Adaptable Binary Predicates: + * it takes an Adaptable Binary Predicate \p pred as an argument and returns a new Adaptable + * Binary Predicate that represents the negation of \p pred. That is: if \c pred is an object + * of a type which models Adaptable Binary Predicate, then the the type of the result + * \c npred of not2(pred) is also a model of Adaptable Binary Predicate and + * npred(x,y) always returns the same value as !pred(x,y). + * + * \param pred The Adaptable Binary Predicate to negate. + * \return A new object, npred such that npred(x,y) always returns + * the same value as !pred(x,y). + * + * \tparam Binary Predicate is a model of Adaptable Binary Predicate. + * + * \see binary_negate + * \see not1 + */ +template + __host__ __device__ + binary_negate not2(const BinaryPredicate &pred); + +/*! \} + */ + + +/*! \addtogroup placeholder_objects Placeholder Objects + * \ingroup function_objects + * \{ + */ + + +/*! \namespace placeholders + * \brief Facilities for constructing simple functions inline. + * + * Objects in the \p thrust::placeholders namespace may be used to create simple arithmetic functions inline + * in an algorithm invocation. Combining placeholders such as \p _1 and \p _2 with arithmetic operations such as \c + + * creates an unnamed function object which applies the operation to their arguments. + * + * The type of placeholder objects is implementation-defined. + * + * The following code snippet demonstrates how to use the placeholders \p _1 and \p _2 with \p thrust::transform + * to implement the SAXPY computation: + * + * \code + * #include + * #include + * #include + * + * int main() + * { + * thrust::device_vector x(4), y(4); + * x[0] = 1; + * x[1] = 2; + * x[2] = 3; + * x[3] = 4; + * + * y[0] = 1; + * y[1] = 1; + * y[2] = 1; + * y[3] = 1; + * + * float a = 2.0f; + * + * using namespace thrust::placeholders; + * + * thrust::transform(x.begin(), x.end(), y.begin(), y.begin(), + * a * _1 + 2 + * ); + * + * // y is now {3, 5, 7, 9} + * } + * \endcode + */ +namespace placeholders +{ + + +/*! \p thrust::placeholders::_1 is the placeholder for the first function parameter. + */ +static const thrust::detail::functional::placeholder<0>::type _1; + + +/*! \p thrust::placeholders::_2 is the placeholder for the second function parameter. + */ +static const thrust::detail::functional::placeholder<1>::type _2; + + +/*! \p thrust::placeholders::_3 is the placeholder for the third function parameter. + */ +static const thrust::detail::functional::placeholder<2>::type _3; + + +/*! \p thrust::placeholders::_4 is the placeholder for the fourth function parameter. + */ +static const thrust::detail::functional::placeholder<3>::type _4; + + +/*! \p thrust::placeholders::_5 is the placeholder for the fifth function parameter. + */ +static const thrust::detail::functional::placeholder<4>::type _5; + + +/*! \p thrust::placeholders::_6 is the placeholder for the sixth function parameter. + */ +static const thrust::detail::functional::placeholder<5>::type _6; + + +/*! \p thrust::placeholders::_7 is the placeholder for the seventh function parameter. + */ +static const thrust::detail::functional::placeholder<6>::type _7; + + +/*! \p thrust::placeholders::_8 is the placeholder for the eighth function parameter. + */ +static const thrust::detail::functional::placeholder<7>::type _8; + + +/*! \p thrust::placeholders::_9 is the placeholder for the ninth function parameter. + */ +static const thrust::detail::functional::placeholder<8>::type _9; + + +/*! \p thrust::placeholders::_10 is the placeholder for the tenth function parameter. + */ +static const thrust::detail::functional::placeholder<9>::type _10; + + +} // end placeholders + + +/*! \} // placeholder_objects + */ + + +} // end thrust + +#include +#include + diff --git a/compat/thrust/gather.h b/compat/thrust/gather.h new file mode 100644 index 0000000..f2b8233 --- /dev/null +++ b/compat/thrust/gather.h @@ -0,0 +1,438 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file gather.h + * \brief Irregular copying from a source range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup gathering + * \ingroup copying + * \{ + */ + + +/*! \p gather copies elements from a source array into a destination range according + * to a map. For each input iterator \c i in the range [map_first, map_last), the + * value input_first[\*i] is assigned to *(result + (i - map_first)). + * \p RandomAccessIterator must permit random access. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param map_first Beginning of the range of gather locations. + * \param map_last End of the range of gather locations. + * \param input_first Beginning of the source range. + * \param result Beginning of the destination range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam RandomAccessIterator must be a model of Random Access Iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). + * + * \remark \p gather is the inverse of thrust::scatter. + * + * The following code snippet demonstrates how to use \p gather to reorder + * a range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * // mark even indices with a 1; odd indices with a 0 + * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * thrust::device_vector d_values(values, values + 10); + * + * // gather all even indices into the first half of the range + * // and odd indices to the last half of the range + * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10); + * thrust::gather(thrust::device, + * d_map.begin(), d_map.end(), + * d_values.begin(), + * d_output.begin()); + * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * \endcode + */ +template + OutputIterator gather(const thrust::detail::execution_policy_base &exec, + InputIterator map_first, + InputIterator map_last, + RandomAccessIterator input_first, + OutputIterator result); + + +/*! \p gather copies elements from a source array into a destination range according + * to a map. For each input iterator \c i in the range [map_first, map_last), the + * value input_first[\*i] is assigned to *(result + (i - map_first)). + * \p RandomAccessIterator must permit random access. + * + * \param map_first Beginning of the range of gather locations. + * \param map_last End of the range of gather locations. + * \param input_first Beginning of the source range. + * \param result Beginning of the destination range. + * + * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam RandomAccessIterator must be a model of Random Access Iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). + * + * \remark \p gather is the inverse of thrust::scatter. + * + * The following code snippet demonstrates how to use \p gather to reorder + * a range. + * + * \code + * #include + * #include + * ... + * // mark even indices with a 1; odd indices with a 0 + * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * thrust::device_vector d_values(values, values + 10); + * + * // gather all even indices into the first half of the range + * // and odd indices to the last half of the range + * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10); + * thrust::gather(d_map.begin(), d_map.end(), + * d_values.begin(), + * d_output.begin()); + * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * \endcode + */ +template + OutputIterator gather(InputIterator map_first, + InputIterator map_last, + RandomAccessIterator input_first, + OutputIterator result); + + +/*! \p gather_if conditionally copies elements from a source array into a destination + * range according to a map. For each input iterator \c i in the range [map_first, map_last), + * such that the value of \*(stencil + (i - map_first)) is \c true, the value + * input_first[\*i] is assigned to *(result + (i - map_first)). + * \p RandomAccessIterator must permit random access. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param map_first Beginning of the range of gather locations. + * \param map_last End of the range of gather locations. + * \param stencil Beginning of the range of predicate values. + * \param input_first Beginning of the source range. + * \param result Beginning of the destination range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c bool. + * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). + * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). + * + * \remark \p gather_if is the inverse of \p scatter_if. + * + * The following code snippet demonstrates how to use \p gather_if to gather selected values from + * an input range using the \p thrust::device execution policy: + * + * \code + * #include + * #include + * #include + * ... + * + * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + * thrust::device_vector d_values(values, values + 10); + * + * // select elements at even-indexed locations + * int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * thrust::device_vector d_stencil(stencil, stencil + 10); + * + * // map all even indices into the first half of the range + * // and odd indices to the last half of the range + * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10, 7); + * thrust::gather_if(thrust::device, + * d_map.begin(), d_map.end(), + * d_stencil.begin(), + * d_values.begin(), + * d_output.begin()); + * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} + * \endcode + */ +template + OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result); + + +/*! \p gather_if conditionally copies elements from a source array into a destination + * range according to a map. For each input iterator \c i in the range [map_first, map_last), + * such that the value of \*(stencil + (i - map_first)) is \c true, the value + * input_first[\*i] is assigned to *(result + (i - map_first)). + * \p RandomAccessIterator must permit random access. + * + * \param map_first Beginning of the range of gather locations. + * \param map_last End of the range of gather locations. + * \param stencil Beginning of the range of predicate values. + * \param input_first Beginning of the source range. + * \param result Beginning of the destination range. + * + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c bool. + * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * + * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). + * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). + * + * \remark \p gather_if is the inverse of \p scatter_if. + * + * The following code snippet demonstrates how to use \p gather_if to gather selected values from + * an input range. + * + * \code + * #include + * #include + * ... + * + * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + * thrust::device_vector d_values(values, values + 10); + * + * // select elements at even-indexed locations + * int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * thrust::device_vector d_stencil(stencil, stencil + 10); + * + * // map all even indices into the first half of the range + * // and odd indices to the last half of the range + * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10, 7); + * thrust::gather_if(d_map.begin(), d_map.end(), + * d_stencil.begin(), + * d_values.begin(), + * d_output.begin()); + * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} + * \endcode + */ +template + OutputIterator gather_if(InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result); + + +/*! \p gather_if conditionally copies elements from a source array into a destination + * range according to a map. For each input iterator \c i in the range [map_first, map_last) + * such that the value of pred(\*(stencil + (i - map_first))) is \c true, + * the value input_first[\*i] is assigned to *(result + (i - map_first)). + * \p RandomAccessIterator must permit random access. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param map_first Beginning of the range of gather locations. + * \param map_last End of the range of gather locations. + * \param stencil Beginning of the range of predicate values. + * \param input_first Beginning of the source range. + * \param result Beginning of the destination range. + * \param pred Predicate to apply to the stencil values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c Predicate's \c argument_type. + * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * \tparam Predicate must be a model of Predicate. + * + * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). + * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). + * + * \remark \p gather_if is the inverse of \p scatter_if. + * + * The following code snippet demonstrates how to use \p gather_if to gather selected values from + * an input range based on an arbitrary selection function using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * + * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + * thrust::device_vector d_values(values, values + 10); + * + * // we will select an element when our stencil is even + * int stencil[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9}; + * thrust::device_vector d_stencil(stencil, stencil + 10); + * + * // map all even indices into the first half of the range + * // and odd indices to the last half of the range + * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10, 7); + * thrust::gather_if(thrust::device, + * d_map.begin(), d_map.end(), + * d_stencil.begin(), + * d_values.begin(), + * d_output.begin(), + * is_even()); + * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} + * \endcode + */ +template + OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result, + Predicate pred); + + +/*! \p gather_if conditionally copies elements from a source array into a destination + * range according to a map. For each input iterator \c i in the range [map_first, map_last) + * such that the value of pred(\*(stencil + (i - map_first))) is \c true, + * the value input_first[\*i] is assigned to *(result + (i - map_first)). + * \p RandomAccessIterator must permit random access. + * + * \param map_first Beginning of the range of gather locations. + * \param map_last End of the range of gather locations. + * \param stencil Beginning of the range of predicate values. + * \param input_first Beginning of the source range. + * \param result Beginning of the destination range. + * \param pred Predicate to apply to the stencil values. + * + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c Predicate's \c argument_type. + * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator must be a model of Output Iterator. + * \tparam Predicate must be a model of Predicate. + * + * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). + * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). + * + * \remark \p gather_if is the inverse of \p scatter_if. + * + * The following code snippet demonstrates how to use \p gather_if to gather selected values from + * an input range based on an arbitrary selection function. + * + * \code + * #include + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * + * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + * thrust::device_vector d_values(values, values + 10); + * + * // we will select an element when our stencil is even + * int stencil[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9}; + * thrust::device_vector d_stencil(stencil, stencil + 10); + * + * // map all even indices into the first half of the range + * // and odd indices to the last half of the range + * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10, 7); + * thrust::gather_if(d_map.begin(), d_map.end(), + * d_stencil.begin(), + * d_values.begin(), + * d_output.begin(), + * is_even()); + * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} + * \endcode + */ +template + OutputIterator gather_if(InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result, + Predicate pred); + +/*! \} // gathering + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/generate.h b/compat/thrust/generate.h new file mode 100644 index 0000000..1d52721 --- /dev/null +++ b/compat/thrust/generate.h @@ -0,0 +1,211 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file generate.h + * \brief Fills a range with values "generated" from a function of no arguments + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup transformations + * \{ + */ + + +/*! \p generate assigns the result of invoking \p gen, a function object that takes no arguments, + * to each element in the range [first,last). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element in the range of interest. + * \param last The last element in the range of interest. + * \param gen A function argument, taking no parameters, used to generate values to assign to + * elements in the range [first,last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam Generator is a model of Generator, + * and \p Generator's \c result_type is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to fill a \c host_vector with random numbers, + * using the standard C library function \c rand using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::host_vector v(10); + * srand(13); + * thrust::generate(thrust::host, v.begin(), v.end(), rand); + * + * // the elements of v are now pseudo-random numbers + * \endcode + * + * \see generate_n + * \see http://www.sgi.com/tech/stl/generate.html + */ +template + void generate(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Generator gen); + + +/*! \p generate assigns the result of invoking \p gen, a function object that takes no arguments, + * to each element in the range [first,last). + * + * \param first The first element in the range of interest. + * \param last The last element in the range of interest. + * \param gen A function argument, taking no parameters, used to generate values to assign to + * elements in the range [first,last). + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam Generator is a model of Generator, + * and \p Generator's \c result_type is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to fill a \c host_vector with random numbers, + * using the standard C library function \c rand. + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::host_vector v(10); + * srand(13); + * thrust::generate(v.begin(), v.end(), rand); + * + * // the elements of v are now pseudo-random numbers + * \endcode + * + * \see generate_n + * \see http://www.sgi.com/tech/stl/generate.html + */ +template + void generate(ForwardIterator first, + ForwardIterator last, + Generator gen); + + +/*! \p generate_n assigns the result of invoking \p gen, a function object that takes no arguments, + * to each element in the range [first,first + n). The return value is first + n. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element in the range of interest. + * \param n The size of the range of interest. + * \param gen A function argument, taking no parameters, used to generate values to assign to + * elements in the range [first,first + n). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Size is an integral type (either signed or unsigned). + * \tparam Generator is a model of Generator, + * and \p Generator's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. + * + * The following code snippet demonstrates how to fill a \c host_vector with random numbers, + * using the standard C library function \c rand using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::host_vector v(10); + * srand(13); + * thrust::generate_n(thrust::host, v.begin(), 10, rand); + * + * // the elements of v are now pseudo-random numbers + * \endcode + * + * \see generate + * \see http://www.sgi.com/tech/stl/generate.html + */ +template + OutputIterator generate_n(const thrust::detail::execution_policy_base &exec, + OutputIterator first, + Size n, + Generator gen); + + +/*! \p generate_n assigns the result of invoking \p gen, a function object that takes no arguments, + * to each element in the range [first,first + n). The return value is first + n. + * + * \param first The first element in the range of interest. + * \param n The size of the range of interest. + * \param gen A function argument, taking no parameters, used to generate values to assign to + * elements in the range [first,first + n). + * + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Size is an integral type (either signed or unsigned). + * \tparam Generator is a model of Generator, + * and \p Generator's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. + * + * The following code snippet demonstrates how to fill a \c host_vector with random numbers, + * using the standard C library function \c rand. + * + * \code + * #include + * #include + * #include + * ... + * thrust::host_vector v(10); + * srand(13); + * thrust::generate_n(v.begin(), 10, rand); + * + * // the elements of v are now pseudo-random numbers + * \endcode + * + * \see generate + * \see http://www.sgi.com/tech/stl/generate.html + */ +template + OutputIterator generate_n(OutputIterator first, + Size n, + Generator gen); + + +/*! \} // end transformations + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/host_vector.h b/compat/thrust/host_vector.h new file mode 100644 index 0000000..11b1ae0 --- /dev/null +++ b/compat/thrust/host_vector.h @@ -0,0 +1,424 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file host_vector.h + * \brief A dynamically-sizable array of elements which reside in the "host" memory space + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +// forward declaration of device_vector +template class device_vector; + +/*! \addtogroup container_classes Container Classes + * \addtogroup host_containers Host Containers + * \ingroup container_classes + * \{ + */ + +/*! A \p host_vector is a container that supports random access to elements, + * constant time removal of elements at the end, and linear time insertion + * and removal of elements at the beginning or in the middle. The number of + * elements in a \p host_vector may vary dynamically; memory management is + * automatic. The memory associated with a \p host_vector resides in the memory + * space of the host associated with a parallel device. + * + * \see http://www.sgi.com/tech/stl/Vector.html + * \see device_vector + */ +template > + class host_vector + : public detail::vector_base +{ + private: + typedef detail::vector_base Parent; + + public: + /*! \cond */ + typedef typename Parent::size_type size_type; + typedef typename Parent::value_type value_type; + /*! \endcond */ + + /*! This constructor creates an empty \p host_vector. + */ + __host__ + host_vector(void) + :Parent() {} + + /*! This constructor creates a \p host_vector with the given + * size. + * \param n The number of elements to initially craete. + */ + __host__ + explicit host_vector(size_type n) + :Parent(n) {} + + /*! This constructor creates a \p host_vector with copies + * of an exemplar element. + * \param n The number of elements to initially create. + * \param value An element to copy. + */ + __host__ + explicit host_vector(size_type n, const value_type &value) + :Parent(n,value) {} + + /*! Copy constructor copies from an exemplar \p host_vector. + * \param v The \p host_vector to copy. + */ + __host__ + host_vector(const host_vector &v) + :Parent(v) {} + + /*! Assign operator copies from an exemplar \p host_vector. + * \param v The \p host_vector to copy. + */ + __host__ + host_vector &operator=(const host_vector &v) + { Parent::operator=(v); return *this; } + + /*! Copy constructor copies from an exemplar \p host_vector with different type. + * \param v The \p host_vector to copy. + */ + template + __host__ + host_vector(const host_vector &v) + :Parent(v) {} + + /*! Assign operator copies from an exemplar \p host_vector with different type. + * \param v The \p host_vector to copy. + */ + template + __host__ + host_vector &operator=(const host_vector &v) + { Parent::operator=(v); return *this; } + + /*! Copy constructor copies from an exemplar std::vector. + * \param v The std::vector to copy. + */ + template + __host__ + host_vector(const std::vector &v) + :Parent(v) {} + + /*! Assign operator copies from an exemplar std::vector. + * \param v The std::vector to copy. + */ + template + __host__ + host_vector &operator=(const std::vector &v) + { Parent::operator=(v); return *this;} + + /*! Copy constructor copies from an exemplar \p device_vector with possibly different type. + * \param v The \p device_vector to copy. + */ + template + __host__ + host_vector(const device_vector &v); + + /*! Assign operator copies from an exemplar \p device_vector. + * \param v The \p device_vector to copy. + */ + template + __host__ + host_vector &operator=(const device_vector &v) + { Parent::operator=(v); return *this; } + + /*! This constructor builds a \p host_vector from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + __host__ + host_vector(InputIterator first, InputIterator last) + :Parent(first, last) {} + +// declare these members for the purpose of Doxygenating them +// they actually exist in a derived-from class +#if 0 + /*! \brief Resizes this vector to the specified number of elements. + * \param new_size Number of elements this vector should contain. + * \param x Data with which new elements should be populated. + * \throw std::length_error If n exceeds max_size(). + * + * This method will resize this vector to the specified number of + * elements. If the number is smaller than this vector's current + * size this vector is truncated, otherwise this vector is + * extended and new elements are populated with given data. + */ + void resize(size_type new_size, const value_type &x = value_type()); + + /*! Returns the number of elements in this vector. + */ + size_type size(void) const; + + /*! Returns the size() of the largest possible vector. + * \return The largest possible return value of size(). + */ + size_type max_size(void) const; + + /*! \brief If n is less than or equal to capacity(), this call has no effect. + * Otherwise, this method is a request for allocation of additional memory. If + * the request is successful, then capacity() is greater than or equal to + * n; otherwise, capacity() is unchanged. In either case, size() is unchanged. + * \throw std::length_error If n exceeds max_size(). + */ + void reserve(size_type n); + + /*! Returns the number of elements which have been reserved in this + * vector. + */ + size_type capacity(void) const; + + /*! This method shrinks the capacity of this vector to exactly + * fit its elements. + */ + void shrink_to_fit(void); + + /*! \brief Subscript access to the data contained in this vector_dev. + * \param n The index of the element for which data should be accessed. + * \return Read/write reference to data. + * + * This operator allows for easy, array-style, data access. + * Note that data access with this operator is unchecked and + * out_of_range lookups are not defined. + */ + reference operator[](size_type n); + + /*! \brief Subscript read access to the data contained in this vector_dev. + * \param n The index of the element for which data should be accessed. + * \return Read reference to data. + * + * This operator allows for easy, array-style, data access. + * Note that data access with this operator is unchecked and + * out_of_range lookups are not defined. + */ + const_reference operator[](size_type n) const; + + /*! This method returns an iterator pointing to the beginning of + * this vector. + * \return mStart + */ + iterator begin(void); + + /*! This method returns a const_iterator pointing to the beginning + * of this vector. + * \return mStart + */ + const_iterator begin(void) const; + + /*! This method returns a const_iterator pointing to the beginning + * of this vector. + * \return mStart + */ + const_iterator cbegin(void) const; + + /*! This method returns a reverse_iterator pointing to the beginning of + * this vector's reversed sequence. + * \return A reverse_iterator pointing to the beginning of this + * vector's reversed sequence. + */ + reverse_iterator rbegin(void); + + /*! This method returns a const_reverse_iterator pointing to the beginning of + * this vector's reversed sequence. + * \return A const_reverse_iterator pointing to the beginning of this + * vector's reversed sequence. + */ + const_reverse_iterator rbegin(void) const; + + /*! This method returns a const_reverse_iterator pointing to the beginning of + * this vector's reversed sequence. + * \return A const_reverse_iterator pointing to the beginning of this + * vector's reversed sequence. + */ + const_reverse_iterator crbegin(void) const; + + /*! This method returns an iterator pointing to one element past the + * last of this vector. + * \return begin() + size(). + */ + iterator end(void); + + /*! This method returns a const_iterator pointing to one element past the + * last of this vector. + * \return begin() + size(). + */ + const_iterator end(void) const; + + /*! This method returns a const_iterator pointing to one element past the + * last of this vector. + * \return begin() + size(). + */ + const_iterator cend(void) const; + + /*! This method returns a reverse_iterator pointing to one element past the + * last of this vector's reversed sequence. + * \return rbegin() + size(). + */ + reverse_iterator rend(void); + + /*! This method returns a const_reverse_iterator pointing to one element past the + * last of this vector's reversed sequence. + * \return rbegin() + size(). + */ + const_reverse_iterator rend(void) const; + + /*! This method returns a const_reverse_iterator pointing to one element past the + * last of this vector's reversed sequence. + * \return rbegin() + size(). + */ + const_reverse_iterator crend(void) const; + + /*! This method returns a const_reference referring to the first element of this + * vector. + * \return The first element of this vector. + */ + const_reference front(void) const; + + /*! This method returns a reference pointing to the first element of this + * vector. + * \return The first element of this vector. + */ + reference front(void); + + /*! This method returns a const reference pointing to the last element of + * this vector. + * \return The last element of this vector. + */ + const_reference back(void) const; + + /*! This method returns a reference referring to the last element of + * this vector_dev. + * \return The last element of this vector. + */ + reference back(void); + + /*! This method returns a pointer to this vector's first element. + * \return A pointer to the first element of this vector. + */ + pointer data(void); + + /*! This method returns a const_pointer to this vector's first element. + * \return a const_pointer to the first element of this vector. + */ + const_pointer data(void) const; + + /*! This method resizes this vector to 0. + */ + void clear(void); + + /*! This method returns true iff size() == 0. + * \return true if size() == 0; false, otherwise. + */ + bool empty(void) const; + + /*! This method appends the given element to the end of this vector. + * \param x The element to append. + */ + void push_back(const value_type &x); + + /*! This method erases the last element of this vector, invalidating + * all iterators and references to it. + */ + void pop_back(void); + + /*! This method swaps the contents of this vector_base with another vector. + * \param v The vector with which to swap. + */ + void swap(host_vector &v); + + /*! This method removes the element at position pos. + * \param pos The position of the element of interest. + * \return An iterator pointing to the new location of the element that followed the element + * at position pos. + */ + iterator erase(iterator pos); + + /*! This method removes the range of elements [first,last) from this vector. + * \param first The beginning of the range of elements to remove. + * \param last The end of the range of elements to remove. + * \return An iterator pointing to the new location of the element that followed the last + * element in the sequence [first,last). + */ + iterator erase(iterator first, iterator last); + + /*! This method inserts a single copy of a given exemplar value at the + * specified position in this vector. + * \param position The insertion position. + * \param x The exemplar element to copy & insert. + * \return An iterator pointing to the newly inserted element. + */ + iterator insert(iterator position, const T &x); + + /*! This method inserts a copy of an exemplar value to a range at the + * specified position in this vector. + * \param position The insertion position + * \param n The number of insertions to perform. + * \param x The value to replicate and insert. + */ + void insert(iterator position, size_type n, const T &x); + + /*! This method inserts a copy of an input range at the specified position + * in this vector. + * \param position The insertion position. + * \param first The beginning of the range to copy. + * \param last The end of the range to copy. + * + * \tparam InputIterator is a model of Assignable. + */ + template + void insert(iterator position, InputIterator first, InputIterator last); + + /*! This version of \p assign replicates a given exemplar + * \p n times into this vector. + * \param n The number of times to copy \p x. + * \param x The exemplar element to replicate. + */ + void assign(size_type n, const T &x); + + /*! This version of \p assign makes this vector a copy of a given input range. + * \param first The beginning of the range to copy. + * \param last The end of the range to copy. + * + * \tparam InputIterator is a model of Input Iterator. + */ + template + void assign(InputIterator first, InputIterator last); + + /*! This method returns a copy of this vector's allocator. + * \return A copy of the alloctor used by this vector. + */ + allocator_type get_allocator(void) const; +#endif // end doxygen-only members +}; // end host_vector + +/*! \} + */ + +} // end thrust + +#include + diff --git a/compat/thrust/inner_product.h b/compat/thrust/inner_product.h new file mode 100644 index 0000000..01f5541 --- /dev/null +++ b/compat/thrust/inner_product.h @@ -0,0 +1,262 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file inner_product.h + * \brief Mathematical inner product between ranges + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reductions + * \{ + * \addtogroup transformed_reductions Transformed Reductions + * \ingroup reductions + * \{ + */ + + +/*! \p inner_product calculates an inner product of the ranges + * [first1, last1) and [first2, first2 + (last1 - first1)). + * + * Specifically, this version of \p inner_product computes the sum + * init + (*first1 * *first2) + (*(first1+1) * *(first2+1)) + ... + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param init Initial value of the result. + * \return The inner product of sequences [first1, last1) + * and [first2, last2) plus \p init. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputType is a model of Assignable, + * and if \c x is an object of type \p OutputType, and \c y is an object of \p InputIterator1's \c value_type, + * and \c z is an object of \p InputIterator2's \c value_type, then x + y * z is defined + * and is convertible to \p OutputType. + * + * The following code demonstrates how to use \p inner_product to + * compute the dot product of two vectors using the \p thrust::host execution policy for parallelization. + * + * \code + * #include + * #include + * ... + * float vec1[3] = {1.0f, 2.0f, 5.0f}; + * float vec2[3] = {4.0f, 1.0f, 5.0f}; + * + * float result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, 0.0f); + * + * // result == 31.0f + * \endcode + * + * \see http://www.sgi.com/tech/stl/inner_product.html + */ +template +OutputType inner_product(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init); + + +/*! \p inner_product calculates an inner product of the ranges + * [first1, last1) and [first2, first2 + (last1 - first1)). + * + * Specifically, this version of \p inner_product computes the sum + * init + (*first1 * *first2) + (*(first1+1) * *(first2+1)) + ... + * + * Unlike the C++ Standard Template Library function std::inner_product, + * this version offers no guarantee on order of execution. + * + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param init Initial value of the result. + * \return The inner product of sequences [first1, last1) + * and [first2, last2) plus \p init. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputType is a model of Assignable, + * and if \c x is an object of type \p OutputType, and \c y is an object of \p InputIterator1's \c value_type, + * and \c z is an object of \p InputIterator2's \c value_type, then x + y * z is defined + * and is convertible to \p OutputType. + * + * The following code demonstrates how to use \p inner_product to + * compute the dot product of two vectors. + * + * \code + * #include + * ... + * float vec1[3] = {1.0f, 2.0f, 5.0f}; + * float vec2[3] = {4.0f, 1.0f, 5.0f}; + * + * float result = thrust::inner_product(vec1, vec1 + 3, vec2, 0.0f); + * + * // result == 31.0f + * \endcode + * + * \see http://www.sgi.com/tech/stl/inner_product.html + */ +template +OutputType inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputType init); + + +/*! \p inner_product calculates an inner product of the ranges + * [first1, last1) and [first2, first2 + (last1 - first1)). + * + * This version of \p inner_product is identical to the first, except that is uses + * two user-supplied function objects instead of \c operator+ and \c operator*. + * + * Specifically, this version of \p inner_product computes the sum + * binary_op1( init, binary_op2(*first1, *first2) ), ... + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param init Initial value of the result. + * \param binary_op1 Generalized addition operation. + * \param binary_op2 Generalized multiplication operation. + * \return The inner product of sequences [first1, last1) and [first2, last2). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator1's \c value_type is convertible to \p BinaryFunction2's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator. + * and \p InputIterator2's \c value_type is convertible to \p BinaryFunction2's \c second_argument_type. + * \tparam OutputType is a model of Assignable, + * and \p OutputType is convertible to \p BinaryFunction1's \c first_argument_type. + * \tparam BinaryFunction1 is a model of Binary Function, + * and \p BinaryFunction1's \c return_type is convertible to \p OutputType. + * \tparam BinaryFunction2 is a model of Binary Function, + * and \p BinaryFunction2's \c return_type is convertible to \p BinaryFunction1's \c second_argument_type. + * + * \code + * #include + * #include + * ... + * float vec1[3] = {1.0f, 2.0f, 5.0f}; + * float vec2[3] = {4.0f, 1.0f, 5.0f}; + * + * float init = 0.0f; + * thrust::plus binary_op1; + * thrust::multiplies binary_op2; + * + * float result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, init, binary_op1, binary_op2); + * + * // result == 31.0f + * \endcode + * + * \see http://www.sgi.com/tech/stl/inner_product.html + */ +template +OutputType inner_product(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init, + BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2); + + +/*! \p inner_product calculates an inner product of the ranges + * [first1, last1) and [first2, first2 + (last1 - first1)). + * + * This version of \p inner_product is identical to the first, except that is uses + * two user-supplied function objects instead of \c operator+ and \c operator*. + * + * Specifically, this version of \p inner_product computes the sum + * binary_op1( init, binary_op2(*first1, *first2) ), ... + * + * Unlike the C++ Standard Template Library function std::inner_product, + * this version offers no guarantee on order of execution. + * + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param init Initial value of the result. + * \param binary_op1 Generalized addition operation. + * \param binary_op2 Generalized multiplication operation. + * \return The inner product of sequences [first1, last1) and [first2, last2). + * + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator1's \c value_type is convertible to \p BinaryFunction2's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator. + * and \p InputIterator2's \c value_type is convertible to \p BinaryFunction2's \c second_argument_type. + * \tparam OutputType is a model of Assignable, + * and \p OutputType is convertible to \p BinaryFunction1's \c first_argument_type. + * \tparam BinaryFunction1 is a model of Binary Function, + * and \p BinaryFunction1's \c return_type is convertible to \p OutputType. + * \tparam BinaryFunction2 is a model of Binary Function, + * and \p BinaryFunction2's \c return_type is convertible to \p BinaryFunction1's \c second_argument_type. + * + * \code + * #include + * ... + * float vec1[3] = {1.0f, 2.0f, 5.0f}; + * float vec2[3] = {4.0f, 1.0f, 5.0f}; + * + * float init = 0.0f; + * thrust::plus binary_op1; + * thrust::multiplies binary_op2; + * + * float result = thrust::inner_product(vec1, vec1 + 3, vec2, init, binary_op1, binary_op2); + * + * // result == 31.0f + * \endcode + * + * \see http://www.sgi.com/tech/stl/inner_product.html + */ +template +OutputType inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputType init, + BinaryFunction1 binary_op1, BinaryFunction2 binary_op2); + + +/*! \} // end transformed_reductions + * \} // end reductions + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/iterator/constant_iterator.h b/compat/thrust/iterator/constant_iterator.h new file mode 100644 index 0000000..e9e03c1 --- /dev/null +++ b/compat/thrust/iterator/constant_iterator.h @@ -0,0 +1,251 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/constant_iterator.h + * \brief An iterator which returns a constant value when + * dereferenced + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p constant_iterator is an iterator which represents a pointer into a range + * of constant values. This iterator is useful for creating a range filled with the same + * value without explicitly storing it in memory. Using \p constant_iterator saves both + * memory capacity and bandwidth. + * + * The following code snippet demonstrates how to create a \p constant_iterator whose + * \c value_type is \c int and whose value is \c 10. + * + * \code + * #include + * + * thrust::constant_iterator iter(10); + * + * *iter; // returns 10 + * iter[0]; // returns 10 + * iter[1]; // returns 10 + * iter[13]; // returns 10 + * + * // and so on... + * \endcode + * + * This next example demonstrates how to use a \p constant_iterator with the + * \p thrust::transform function to increment all elements of a sequence by the + * same value. We will create a temporary \p constant_iterator with the function + * \p make_constant_iterator function in order to avoid explicitly specifying + * its type: + * + * \code + * #include + * #include + * #include + * #include + * + * int main(void) + * { + * thrust::device_vector data(4); + * data[0] = 3; + * data[1] = 7; + * data[2] = 2; + * data[3] = 5; + * + * // add 10 to all values in data + * thrust::transform(data.begin(), data.end(), + * thrust::make_constant_iterator(10), + * data.begin(), + * thrust::plus()); + * + * // data is now [13, 17, 12, 15] + * + * return 0; + * } + * \endcode + * + * \see make_constant_iterator + */ +template + class constant_iterator + : public detail::constant_iterator_base::type +{ + /*! \cond + */ + friend class thrust::iterator_core_access; + typedef typename detail::constant_iterator_base::type super_t; + typedef typename detail::constant_iterator_base::incrementable incrementable; + typedef typename detail::constant_iterator_base::base_iterator base_iterator; + + public: + typedef typename super_t::reference reference; + typedef typename super_t::value_type value_type; + + /*! \endcond + */ + + /*! Null constructor initializes this \p constant_iterator's constant using its + * null constructor. + */ + __host__ __device__ + constant_iterator(void) + : super_t(), m_value(){}; + + /*! Copy constructor copies the value of another \p constant_iterator into this + * \p constant_iterator. + * + * \p rhs The constant_iterator to copy. + */ + __host__ __device__ + constant_iterator(constant_iterator const &rhs) + : super_t(rhs.base()), m_value(rhs.m_value) {} + + /*! Copy constructor copies the value of another \p constant_iterator with related + * System type. + * + * \param rhs The \p constant_iterator to copy. + */ + template + __host__ __device__ + constant_iterator(constant_iterator const &rhs, + typename thrust::detail::enable_if_convertible< + typename thrust::iterator_system >::type, + typename thrust::iterator_system::type + >::type * = 0) + : super_t(rhs.base()), m_value(rhs.value()) {} + + /*! This constructor receives a value to use as the constant value of this + * \p constant_iterator and an index specifying the location of this + * \p constant_iterator in a sequence. + * + * \p v The value of this \p constant_iterator's constant value. + * \p i The index of this \p constant_iterator in a sequence. Defaults to the + * value returned by \c Incrementable's null constructor. For example, + * when Incrementable == int, \c 0. + */ + __host__ __device__ + constant_iterator(value_type const& v, incrementable const &i = incrementable()) + : super_t(base_iterator(i)), m_value(v) {} + + /*! This constructor is templated to allow construction from a value type and + * incrementable type related this this \p constant_iterator's respective types. + * + * \p v The value of this \p constant_iterator's constant value. + * \p i The index of this \p constant_iterator in a sequence. Defaults to the + * value returned by \c Incrementable's null constructor. For example, + * when Incrementable == int, \c 0. + */ + template + __host__ __device__ + constant_iterator(OtherValue const& v, OtherIncrementable const& i = incrementable()) + : super_t(base_iterator(i)), m_value(v) {} + + /*! This method returns the value of this \p constant_iterator's constant value. + * \return A \c const reference to this \p constant_iterator's constant value. + */ + __host__ __device__ + Value const& value(void) const + { return m_value; } + + /*! \cond + */ + + protected: + __host__ __device__ + Value const& value_reference(void) const + { return m_value; } + + __host__ __device__ + Value & value_reference(void) + { return m_value; } + + private: // Core iterator interface + __host__ __device__ + reference dereference(void) const + { + return m_value; + } + + private: + Value m_value; + + /*! \endcond + */ +}; // end constant_iterator + + +/*! This version of \p make_constant_iterator creates a \p constant_iterator + * from values given for both value and index. The type of \p constant_iterator + * may be inferred by the compiler from the types of its parameters. + * + * \param x The value of the returned \p constant_iterator's constant value. + * \param i The index of the returned \p constant_iterator within a sequence. + * The type of this parameter defaults to \c int. In the default case, + * the value of this parameter is \c 0. + * + * \return A new \p constant_iterator with constant value & index as given + * by \p x & \p i. + * + * \see constant_iterator + */ +template +inline __host__ __device__ +constant_iterator make_constant_iterator(V x, I i = int()) +{ + return constant_iterator(x, i); +} // end make_constant_iterator() + + +/*! This version of \p make_constant_iterator creates a \p constant_iterator + * using only a parameter for the desired constant value. The value of the + * returned \p constant_iterator's index is set to \c 0. + * + * \param x The value of the returned \p constant_iterator's constant value. + * \return A new \p constant_iterator with constant value equal to \p x and + * index equal to \c 0. + * \see constant_iterator + */ +template +inline __host__ __device__ +constant_iterator make_constant_iterator(V x) +{ + return constant_iterator(x, 0); +} // end make_constant_iterator() + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end namespace thrust + diff --git a/compat/thrust/iterator/counting_iterator.h b/compat/thrust/iterator/counting_iterator.h new file mode 100644 index 0000000..99812ca --- /dev/null +++ b/compat/thrust/iterator/counting_iterator.h @@ -0,0 +1,243 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/counting_iterator.h + * \brief An iterator which returns an increasing incrementable value + * when dereferenced + */ + +/* + * Copyright David Abrahams 2003. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include +#include + +// #include the details first +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p counting_iterator is an iterator which represents a pointer into a range + * of sequentially changing values. This iterator is useful for creating a range + * filled with a sequence without explicitly storing it in memory. Using + * \p counting_iterator saves memory capacity and bandwidth. + * + * The following code snippet demonstrates how to create a \p counting_iterator whose + * \c value_type is \c int and which sequentially increments by \c 1. + * + * \code + * #include + * ... + * // create iterators + * thrust::counting_iterator first(10); + * thrust::counting_iterator last = first + 3; + * + * first[0] // returns 10 + * first[1] // returns 11 + * first[100] // returns 110 + * + * // sum of [first, last) + * thrust::reduce(first, last); // returns 33 (i.e. 10 + 11 + 12) + * + * // initialize vector to [0,1,2,..] + * thrust::counting_iterator iter(0); + * thrust::device_vector vec(500); + * thrust::copy(iter, iter + vec.size(), vec.begin()); + * \endcode + * + * This next example demonstrates how to use a \p counting_iterator with the + * \p thrust::copy_if function to compute the indices of the non-zero elements + * of a \p device_vector. In this example, we use the \p make_counting_iterator + * function to avoid specifying the type of the \p counting_iterator. + * + * \code + * #include + * #include + * #include + * #include + * + * int main(void) + * { + * // this example computes indices for all the nonzero values in a sequence + * + * // sequence of zero and nonzero values + * thrust::device_vector stencil(8); + * stencil[0] = 0; + * stencil[1] = 1; + * stencil[2] = 1; + * stencil[3] = 0; + * stencil[4] = 0; + * stencil[5] = 1; + * stencil[6] = 0; + * stencil[7] = 1; + * + * // storage for the nonzero indices + * thrust::device_vector indices(8); + * + * // compute indices of nonzero elements + * typedef thrust::device_vector::iterator IndexIterator; + * + * // use make_counting_iterator to define the sequence [0, 8) + * IndexIterator indices_end = thrust::copy_if(thrust::make_counting_iterator(0), + * thrust::make_counting_iterator(8), + * stencil.begin(), + * indices.begin(), + * thrust::identity()); + * // indices now contains [1,2,5,7] + * + * return 0; + * } + * \endcode + * + * \see make_counting_iterator + */ +template + class counting_iterator + : public detail::counting_iterator_base::type +{ + /*! \cond + */ + typedef typename detail::counting_iterator_base::type super_t; + + friend class thrust::iterator_core_access; + + public: + typedef typename super_t::reference reference; + typedef typename super_t::difference_type difference_type; + + /*! \endcond + */ + + /*! Null constructor initializes this \p counting_iterator's \c Incrementable + * counter using its null constructor. + */ + __host__ __device__ + counting_iterator(void){}; + + /*! Copy constructor copies the value of another \p counting_iterator into a + * new \p counting_iterator. + * + * \p rhs The \p counting_iterator to copy. + */ + __host__ __device__ + counting_iterator(counting_iterator const &rhs):super_t(rhs.base()){} + + /*! Copy constructor copies the value of another counting_iterator + * with related System type. + * + * \param rhs The \p counting_iterator to copy. + */ + template + __host__ __device__ + counting_iterator(counting_iterator const &rhs, + typename thrust::detail::enable_if_convertible< + typename thrust::iterator_system >::type, + typename thrust::iterator_system::type + >::type * = 0) + : super_t(rhs.base()){} + + /*! This \c explicit constructor copies the value of an \c Incrementable + * into a new \p counting_iterator's \c Incrementable counter. + * + * \param x The initial value of the new \p counting_iterator's \c Incrementable + * counter. + */ + __host__ __device__ + explicit counting_iterator(Incrementable x):super_t(x){} + + /*! \cond + */ + private: + __host__ __device__ + reference dereference(void) const + { + return this->base_reference(); + } + + // note that we implement equal specially for floating point counting_iterator + template + __host__ __device__ + bool equal(counting_iterator const& y) const + { + typedef thrust::detail::counting_iterator_equal e; + return e::equal(this->base(), y.base()); + } + + template + __host__ __device__ + difference_type + distance_to(counting_iterator const& y) const + { + typedef typename + thrust::detail::eval_if< + thrust::detail::is_numeric::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + >::type d; + + return d::distance(this->base(), y.base()); + } + + /*! \endcond + */ +}; // end counting_iterator + + +/*! \p make_counting_iterator creates a \p counting_iterator + * using an initial value for its \c Incrementable counter. + * + * \param x The initial value of the new \p counting_iterator's counter. + * \return A new \p counting_iterator whose counter has been initialized to \p x. + */ +template +inline __host__ __device__ +counting_iterator make_counting_iterator(Incrementable x) +{ + return counting_iterator(x); +} + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + diff --git a/compat/thrust/iterator/detail/any_assign.h b/compat/thrust/iterator/detail/any_assign.h new file mode 100644 index 0000000..e08a829 --- /dev/null +++ b/compat/thrust/iterator/detail/any_assign.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + + +// a type which may be assigned any other type +struct any_assign +{ + inline __host__ __device__ any_assign(void) + {} + + template + inline __host__ __device__ any_assign(T) + {} + + template + inline __host__ __device__ + any_assign &operator=(T) + { + if(0) + { + // trick the compiler into silencing "warning: this expression has no effect" + int *x = 0; + *x = 13; + } // end if + + return *this; + } +}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/any_system_tag.h b/compat/thrust/iterator/detail/any_system_tag.h new file mode 100644 index 0000000..fc6417a --- /dev/null +++ b/compat/thrust/iterator/detail/any_system_tag.h @@ -0,0 +1,37 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +struct any_system_tag + : thrust::execution_policy +{ + // allow any_system_tag to convert to any type at all + // XXX make this safer using enable_if> upon c++11 + template operator T () const {return T();} +}; + +// TODO remove this in 1.7.0 +typedef THRUST_DEPRECATED any_system_tag any_space_tag; + +} // end thrust + diff --git a/compat/thrust/iterator/detail/constant_iterator_base.h b/compat/thrust/iterator/detail/constant_iterator_base.h new file mode 100644 index 0000000..276e5ff --- /dev/null +++ b/compat/thrust/iterator/detail/constant_iterator_base.h @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +// forward declaration of constant_iterator +template class constant_iterator; + +namespace detail +{ + +template + struct constant_iterator_base +{ + typedef Value value_type; + + // the reference type is the same as the value_type. + // we wish to avoid returning a reference to the internal state + // of the constant_iterator, which is prone to subtle bugs. + // consider the temporary iterator created in the expression + // *(iter + i) + typedef value_type reference; + + // the incrementable type is int unless otherwise specified + typedef typename thrust::detail::ia_dflt_help< + Incrementable, + thrust::detail::identity_ + >::type incrementable; + + typedef typename thrust::counting_iterator< + incrementable, + System, + thrust::random_access_traversal_tag + > base_iterator; + + typedef typename thrust::iterator_adaptor< + constant_iterator, + base_iterator, + value_type, // XXX we may need to pass const value_type here as boost counting_iterator does + typename thrust::iterator_system::type, + typename thrust::iterator_traversal::type, + reference + > type; +}; // end constant_iterator_base + +} // end detail + +} // end thrust + diff --git a/compat/thrust/iterator/detail/counting_iterator.inl b/compat/thrust/iterator/detail/counting_iterator.inl new file mode 100644 index 0000000..ad4fcff --- /dev/null +++ b/compat/thrust/iterator/detail/counting_iterator.inl @@ -0,0 +1,141 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +// forward declaration of counting_iterator +template + class counting_iterator; + +namespace detail +{ + +template + struct counting_iterator_base +{ + typedef typename thrust::detail::eval_if< + // use any_system_tag if we are given use_default + thrust::detail::is_same::value, + thrust::detail::identity_, + thrust::detail::identity_ + >::type system; + + typedef typename thrust::detail::ia_dflt_help< + Traversal, + thrust::detail::eval_if< + thrust::detail::is_numeric::value, + thrust::detail::identity_, + thrust::iterator_traversal + > + >::type traversal; + + // unlike Boost, we explicitly use std::ptrdiff_t as the difference type + // for floating point counting_iterators + typedef typename thrust::detail::ia_dflt_help< + Difference, + thrust::detail::eval_if< + thrust::detail::is_numeric::value, + thrust::detail::eval_if< + thrust::detail::is_integral::value, + thrust::detail::numeric_difference, + thrust::detail::identity_ + >, + thrust::iterator_difference + > + >::type difference; + + // our implementation departs from Boost's in that counting_iterator::dereference + // returns a copy of its counter, rather than a reference to it. returning a reference + // to the internal state of an iterator causes subtle bugs (consider the temporary + // iterator created in the expression *(iter + i) ) and has no compelling use case + typedef thrust::iterator_adaptor< + counting_iterator, // self + Incrementable, // Base + Incrementable, // XXX we may need to pass const here as Boost does + system, + traversal, + Incrementable, + difference + > type; +}; // end counting_iterator_base + + +template + struct iterator_distance +{ + __host__ __device__ + static Difference distance(Incrementable1 x, Incrementable2 y) + { + return y - x; + } +}; + + +template + struct number_distance +{ + __host__ __device__ + static Difference distance(Incrementable1 x, Incrementable2 y) + { + return static_cast(numeric_distance(x,y)); + } +}; + + +template + struct counting_iterator_equal +{ + __host__ __device__ + static bool equal(Incrementable1 x, Incrementable2 y) + { + return x == y; + } +}; + + +// specialization for floating point equality +template + struct counting_iterator_equal< + Difference, + Incrementable1, + Incrementable2, + typename thrust::detail::enable_if< + thrust::detail::is_floating_point::value || + thrust::detail::is_floating_point::value + >::type + > +{ + __host__ __device__ + static bool equal(Incrementable1 x, Incrementable2 y) + { + typedef number_distance d; + return d::distance(x,y) == 0; + } +}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/device_system_tag.h b/compat/thrust/iterator/detail/device_system_tag.h new file mode 100644 index 0000000..ab66fb4 --- /dev/null +++ b/compat/thrust/iterator/detail/device_system_tag.h @@ -0,0 +1,40 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// #include the device system's execution_policy header +#define __THRUST_DEVICE_SYSTEM_TAG_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/execution_policy.h> +#include __THRUST_DEVICE_SYSTEM_TAG_HEADER +#undef __THRUST_DEVICE_SYSTEM_TAG_HEADER + +namespace thrust +{ + +typedef thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::tag device_system_tag; + +} // end thrust + +// TODO remove this in 1.8.0 +namespace thrust +{ + +typedef THRUST_DEPRECATED device_system_tag device_space_tag; + +} // end thrust + diff --git a/compat/thrust/iterator/detail/discard_iterator_base.h b/compat/thrust/iterator/detail/discard_iterator_base.h new file mode 100644 index 0000000..1909ca8 --- /dev/null +++ b/compat/thrust/iterator/detail/discard_iterator_base.h @@ -0,0 +1,65 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include // for std::ptrdiff_t + +namespace thrust +{ + +// forward declaration of discard_iterator +template class discard_iterator; + +namespace detail +{ + + +template + struct discard_iterator_base +{ + // XXX value_type should actually be void + // but this interferes with zip_iterator + typedef any_assign value_type; + typedef any_assign& reference; + typedef std::ptrdiff_t incrementable; + + typedef typename thrust::counting_iterator< + incrementable, + System, + thrust::random_access_traversal_tag + > base_iterator; + + typedef typename thrust::iterator_adaptor< + discard_iterator, + base_iterator, + value_type, + typename thrust::iterator_system::type, + typename thrust::iterator_traversal::type, + reference + > type; +}; // end discard_iterator_base + + +} // end detail + +} // end thrust + + diff --git a/compat/thrust/iterator/detail/distance_from_result.h b/compat/thrust/iterator/detail/distance_from_result.h new file mode 100644 index 0000000..bf83e6c --- /dev/null +++ b/compat/thrust/iterator/detail/distance_from_result.h @@ -0,0 +1,42 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace detail +{ + +// since both arguments are known to be specializations of iterator_facade, +// it's legal to access IteratorFacade2::difference_type +template + struct distance_from_result + : eval_if< + is_convertible::value, + identity_, + identity_ + > +{}; + +} // end detail + +} // end thrust + diff --git a/compat/thrust/iterator/detail/host_system_tag.h b/compat/thrust/iterator/detail/host_system_tag.h new file mode 100644 index 0000000..26d3f7d --- /dev/null +++ b/compat/thrust/iterator/detail/host_system_tag.h @@ -0,0 +1,40 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// #include the host system's execution_policy header +#define __THRUST_HOST_SYSTEM_TAG_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/execution_policy.h> +#include __THRUST_HOST_SYSTEM_TAG_HEADER +#undef __THRUST_HOST_SYSTEM_TAG_HEADER + +namespace thrust +{ + +typedef thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::tag host_system_tag; + +} // end thrust + +// TODO remove this in 1.8.0 +namespace thrust +{ + +typedef THRUST_DEPRECATED host_system_tag host_space_tag; + +} // end thrust + diff --git a/compat/thrust/iterator/detail/is_iterator_category.h b/compat/thrust/iterator/detail/is_iterator_category.h new file mode 100644 index 0000000..95f14d5 --- /dev/null +++ b/compat/thrust/iterator/detail/is_iterator_category.h @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +template + struct is_host_iterator_category + : thrust::detail::or_< + thrust::detail::is_convertible, + thrust::detail::is_convertible + > +{ +}; // end is_host_iterator_category + +template + struct is_device_iterator_category + : thrust::detail::or_< + thrust::detail::is_convertible, + thrust::detail::is_convertible + > +{ +}; // end is_device_iterator_category + + +template + struct is_iterator_category + : thrust::detail::or_< + is_host_iterator_category, + is_device_iterator_category + > +{ +}; // end is_iterator_category + +} // end detail + +} // end thrust + diff --git a/compat/thrust/iterator/detail/is_trivial_iterator.h b/compat/thrust/iterator/detail/is_trivial_iterator.h new file mode 100644 index 0000000..ca37e74 --- /dev/null +++ b/compat/thrust/iterator/detail/is_trivial_iterator.h @@ -0,0 +1,96 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#if __GNUC__ +// forward declaration of gnu's __normal_iterator +namespace __gnu_cxx +{ + +template class __normal_iterator; + +} // end __gnu_cxx +#endif // __GNUC__ + +#if _MSC_VER +// forward declaration of MSVC's "normal iterators" +namespace std +{ + +template struct _Ranit; + +} // end std +#endif // _MSC_VER + +namespace thrust +{ +namespace detail +{ + +#ifdef __GNUC__ +template + struct is_gnu_normal_iterator + : false_type +{}; + + +// catch gnu __normal_iterators +template + struct is_gnu_normal_iterator< __gnu_cxx::__normal_iterator > + : true_type +{}; +#endif // __GNUC__ + + +#ifdef _MSC_VER +// catch msvc _Ranit +template + struct is_convertible_to_msvc_Ranit : + is_convertible< + Iterator, + std::_Ranit< + typename iterator_value::type, + typename iterator_difference::type, + typename iterator_pointer::type, + typename iterator_reference::type + > + > +{}; +#endif // _MSC_VER + + +template + struct is_trivial_iterator : + integral_constant< + bool, + is_pointer::value + | thrust::detail::is_thrust_pointer::value +#if __GNUC__ + | is_gnu_normal_iterator::value +#endif // __GNUC__ +#ifdef _MSC_VER + | is_convertible_to_msvc_Ranit::value +#endif // _MSC_VER + > +{}; + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/iterator_adaptor_base.h b/compat/thrust/iterator/detail/iterator_adaptor_base.h new file mode 100644 index 0000000..8b77f05 --- /dev/null +++ b/compat/thrust/iterator/detail/iterator_adaptor_base.h @@ -0,0 +1,111 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + + +// forward declaration of iterator_adaptor for iterator_adaptor_base below +template +class iterator_adaptor; + + +namespace detail +{ + +// If T is use_default, return the result of invoking +// DefaultNullaryFn, otherwise return T. +// XXX rename to dflt_help +template +struct ia_dflt_help + : thrust::detail::eval_if< + thrust::detail::is_same::value + , DefaultNullaryFn + , thrust::detail::identity_ + > +{ +}; // end ia_dflt_help + + +// A metafunction which computes an iterator_adaptor's base class, +// a specialization of iterator_facade. +template + struct iterator_adaptor_base +{ + typedef typename ia_dflt_help< + Value, + iterator_value + >::type value; + + typedef typename ia_dflt_help< + System, + thrust::iterator_system + >::type system; + + typedef typename ia_dflt_help< + Traversal, + thrust::iterator_traversal + >::type traversal; + + typedef typename ia_dflt_help< + Reference, + thrust::detail::eval_if< + thrust::detail::is_same::value, + thrust::iterator_reference, + thrust::detail::add_reference + > + >::type reference; + + typedef typename ia_dflt_help< + Difference, + iterator_difference + >::type difference; + + typedef thrust::iterator_facade< + Derived, + value, + system, + traversal, + reference, + difference + > type; +}; // end iterator_adaptor_base + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/iterator_category_to_system.h b/compat/thrust/iterator/detail/iterator_category_to_system.h new file mode 100644 index 0000000..17e7d78 --- /dev/null +++ b/compat/thrust/iterator/detail/iterator_category_to_system.h @@ -0,0 +1,95 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +// XXX WAR circular #inclusion with forward declarations +struct random_access_universal_iterator_tag; +struct input_universal_iterator_tag; +struct output_universal_iterator_tag; + +namespace detail +{ + +// forward declaration +template struct is_iterator_system; + +template struct device_iterator_category_to_backend_system; + +// XXX this should work entirely differently +// we should just specialize this metafunction for iterator_category_with_system_and_traversal +template + struct iterator_category_to_system + // convertible to any iterator? + : eval_if< + or_< + is_convertible, + is_convertible + >::value, + + detail::identity_, + + // convertible to host iterator? + eval_if< + or_< + is_convertible, + is_convertible + >::value, + + detail::identity_, + + // convertible to device iterator? + eval_if< + or_< + is_convertible, + is_convertible + >::value, + + detail::identity_, + + // unknown system + detail::identity_ + > // if device + > // if host + > // if any +{ +}; // end iterator_category_to_system + + +template + struct iterator_category_or_traversal_to_system + : eval_if< + is_iterator_system::value, + detail::identity_, + iterator_category_to_system + > +{ +}; // end iterator_category_or_traversal_to_system + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/iterator_category_to_traversal.h b/compat/thrust/iterator/detail/iterator_category_to_traversal.h new file mode 100644 index 0000000..04ef60c --- /dev/null +++ b/compat/thrust/iterator/detail/iterator_category_to_traversal.h @@ -0,0 +1,178 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +// XXX WAR circular #inclusion with these forward declarations +struct bidirectional_universal_iterator_tag; +struct forward_universal_iterator_tag; + +namespace detail +{ + +// forward declarations +template struct is_iterator_system; +template struct is_iterator_traversal; + +// make type_traits easy to access +using namespace thrust::detail; + +template + struct host_system_category_to_traversal + : eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + void + > + > + > + > + > +{ +}; // end host_system_category_to_traversal + + + +template + struct device_system_category_to_traversal + : eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + eval_if< + is_convertible::value, + detail::identity_, + void + > + > + > + > + > +{ +}; // end device_system_category_to_traversal + + + +template + struct any_system_category_to_traversal + : eval_if< + is_convertible::value, + identity_, + eval_if< + is_convertible::value, + identity_, + eval_if< + is_convertible::value, + identity_, + eval_if< + is_convertible::value, + identity_, + eval_if< + is_convertible::value, + identity_, + + // unknown traversal + void + > + > + > + > + > +{ +}; // end any_system_category_to_traversal + + +template + struct category_to_traversal + // check for any system + : eval_if< + or_< + is_convertible, + is_convertible + >::value, + + any_system_category_to_traversal, + + // check for host system + eval_if< + or_< + is_convertible, + is_convertible + >::value, + + host_system_category_to_traversal, + + // check for device system + eval_if< + or_< + is_convertible, + is_convertible + >::value, + + device_system_category_to_traversal, + + // unknown category + void + > + > + > +{}; + + +template + struct iterator_category_to_traversal + : eval_if< + is_iterator_traversal::value, + detail::identity_, + category_to_traversal + > +{ +}; // end iterator_category_to_traversal + + +} // end detail + +} // end thrust + diff --git a/compat/thrust/iterator/detail/iterator_facade_category.h b/compat/thrust/iterator/detail/iterator_facade_category.h new file mode 100644 index 0000000..fbb8bd6 --- /dev/null +++ b/compat/thrust/iterator/detail/iterator_facade_category.h @@ -0,0 +1,283 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace detail +{ + +template + struct iterator_category_with_system_and_traversal + : Category +{ +}; // end iterator_category_with_system_and_traversal + +// specialize iterator_category_to_system for iterator_category_with_system_and_traversal +template struct iterator_category_to_system; + +template + struct iterator_category_to_system > +{ + typedef System type; +}; // end iterator_category_with_system_and_traversal + + +// adapted from http://www.boost.org/doc/libs/1_37_0/libs/iterator/doc/iterator_facade.html#iterator-category +// +// in our implementation, R need not be a reference type to result in a category +// derived from forward_XXX_iterator_tag +// +// iterator-category(T,V,R) := +// if(T is convertible to input_host_iterator_tag +// || T is convertible to output_host_iterator_tag +// || T is convertible to input_device_iterator_tag +// || T is convertible to output_device_iterator_tag +// ) +// return T +// +// else if (T is not convertible to incrementable_traversal_tag) +// the program is ill-formed +// +// else return a type X satisfying the following two constraints: +// +// 1. X is convertible to X1, and not to any more-derived +// type, where X1 is defined by: +// +// if (T is convertible to forward_traversal_tag) +// { +// if (T is convertible to random_access_traversal_tag) +// X1 = random_access_host_iterator_tag +// else if (T is convertible to bidirectional_traversal_tag) +// X1 = bidirectional_host_iterator_tag +// else +// X1 = forward_host_iterator_tag +// } +// else +// { +// if (T is convertible to single_pass_traversal_tag +// && R is convertible to V) +// X1 = input_host_iterator_tag +// else +// X1 = T +// } +// +// 2. category-to-traversal(X) is convertible to the most +// derived traversal tag type to which X is also convertible, +// and not to any more-derived traversal tag type. + + +template + struct iterator_facade_default_category; + + +// Thrust's implementation of iterator_facade_default_category is slightly +// different from Boost's equivalent. +// Thrust does not check is_convertible because Reference +// may not be a complete type at this point, and implementations of is_convertible +// typically require that both types be complete. +// Instead, it simply assumes that if is_convertible, +// then the category is input_iterator_tag + + +// this is the function for standard system iterators +template + struct iterator_facade_default_category_std : + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::identity_ + > + >, + thrust::detail::eval_if< // XXX note we differ from Boost here + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::identity_ + > + > +{ +}; // end iterator_facade_default_category_std + + +// this is the function for host system iterators +template + struct iterator_facade_default_category_host : + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::identity_ + > + >, + thrust::detail::eval_if< // XXX note we differ from Boost here + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::identity_ + > + > +{ +}; // end iterator_facade_default_category_host + + +// this is the function for device system iterators +template + struct iterator_facade_default_category_device : + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::identity_ + > + >, + thrust::detail::eval_if< + thrust::detail::is_convertible::value, // XXX note we differ from Boost here + thrust::detail::identity_, + thrust::detail::identity_ + > + > +{ +}; // end iterator_facade_default_category_device + + +// this is the function for any system iterators +template + struct iterator_facade_default_category_any : + thrust::detail::eval_if< + + thrust::detail::is_convertible::value, + + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + thrust::detail::identity_, + thrust::detail::identity_ + > + >, + + thrust::detail::eval_if< + thrust::detail::is_convertible::value, // XXX note we differ from Boost here + thrust::detail::identity_, + thrust::detail::identity_ + > + > +{ +}; // end iterator_facade_default_category_any + + +template + struct iterator_facade_default_category + // check for any system + : thrust::detail::eval_if< + thrust::detail::is_convertible::value, + iterator_facade_default_category_any, + + // check for host system + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + iterator_facade_default_category_host, + + // check for device system + thrust::detail::eval_if< + thrust::detail::is_convertible::value, + iterator_facade_default_category_device, + + // if we don't recognize the system, get a standard iterator category + // and combine it with System & Traversal + thrust::detail::identity_< + thrust::detail::iterator_category_with_system_and_traversal< + typename iterator_facade_default_category_std::type, + System, + Traversal + > + > + > + > + > +{}; + + +template + struct iterator_facade_category_impl +{ + typedef typename iterator_facade_default_category< + System,Traversal,ValueParam,Reference + >::type category; + + // we must be able to deduce both Traversal & System from category + // otherwise, munge them all together + typedef typename thrust::detail::eval_if< + thrust::detail::and_< + thrust::detail::is_same< + Traversal, + typename thrust::detail::iterator_category_to_traversal::type + >, + thrust::detail::is_same< + System, + typename thrust::detail::iterator_category_to_system::type + > + >::value, + thrust::detail::identity_, + thrust::detail::identity_ > + >::type type; +}; // end iterator_facade_category_impl + + +template + struct iterator_facade_category +{ + typedef typename + thrust::detail::eval_if< + thrust::detail::is_iterator_category::value, + thrust::detail::identity_, // categories are fine as-is + iterator_facade_category_impl + >::type type; +}; // end iterator_facade_category + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/iterator_traits.inl b/compat/thrust/iterator/detail/iterator_traits.inl new file mode 100644 index 0000000..924eabb --- /dev/null +++ b/compat/thrust/iterator/detail/iterator_traits.inl @@ -0,0 +1,112 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file iterator_traits.inl + * \brief Inline file for iterator_traits.h. + */ + +#include +#include +#include + +namespace thrust +{ + +template + struct iterator_value +{ + typedef typename thrust::iterator_traits::value_type type; +}; // end iterator_value + + +template + struct iterator_pointer +{ + typedef typename thrust::iterator_traits::pointer type; +}; // end iterator_pointer + + +template + struct iterator_reference +{ + typedef typename iterator_traits::reference type; +}; // end iterator_reference + + +template + struct iterator_difference +{ + typedef typename thrust::iterator_traits::difference_type type; +}; // end iterator_difference + + +template + struct iterator_system + : detail::iterator_category_to_system< + typename thrust::iterator_traits::iterator_category + > +{ +}; // end iterator_system + +// specialize iterator_system for void *, which has no category +template<> + struct iterator_system +{ + typedef thrust::iterator_system::type type; +}; // end iterator_system + +template<> + struct iterator_system +{ + typedef thrust::iterator_system::type type; +}; // end iterator_system + + +template + struct iterator_traversal + : detail::iterator_category_to_traversal< + typename thrust::iterator_traits::iterator_category + > +{ +}; // end iterator_traversal + +namespace detail +{ + +template + struct is_iterator_traversal + : thrust::detail::is_convertible +{ +}; // end is_iterator_traversal + + +template + struct is_iterator_system + : detail::or_< + detail::is_convertible, + detail::or_< + detail::is_convertible, + detail::is_convertible + > + > +{ +}; // end is_iterator_system + + +} // end namespace detail +} // end namespace thrust + diff --git a/compat/thrust/iterator/detail/iterator_traversal_tags.h b/compat/thrust/iterator/detail/iterator_traversal_tags.h new file mode 100644 index 0000000..dcbebf3 --- /dev/null +++ b/compat/thrust/iterator/detail/iterator_traversal_tags.h @@ -0,0 +1,41 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace thrust +{ + +// define Boost's traversal tags +struct no_traversal_tag {}; + +struct incrementable_traversal_tag + : no_traversal_tag {}; + +struct single_pass_traversal_tag + : incrementable_traversal_tag {}; + +struct forward_traversal_tag + : single_pass_traversal_tag {}; + +struct bidirectional_traversal_tag + : forward_traversal_tag {}; + +struct random_access_traversal_tag + : bidirectional_traversal_tag {}; + +} // end thrust + diff --git a/compat/thrust/iterator/detail/minimum_category.h b/compat/thrust/iterator/detail/minimum_category.h new file mode 100644 index 0000000..e07e096 --- /dev/null +++ b/compat/thrust/iterator/detail/minimum_category.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ + +namespace detail +{ + +template + struct minimum_category + : minimum_type +{ +}; // end minimum_category + +} // end detail + +} // end thrust + + diff --git a/compat/thrust/iterator/detail/minimum_system.h b/compat/thrust/iterator/detail/minimum_system.h new file mode 100644 index 0000000..5448a0d --- /dev/null +++ b/compat/thrust/iterator/detail/minimum_system.h @@ -0,0 +1,49 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace detail +{ + +template + struct minimum_system + : minimum_type +{ +}; // end minimum_system + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/normal_iterator.h b/compat/thrust/iterator/detail/normal_iterator.h new file mode 100644 index 0000000..7fe61bf --- /dev/null +++ b/compat/thrust/iterator/detail/normal_iterator.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file normal_iterator.h + * \brief Defines the interface to an iterator class + * which adapts a pointer type. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template + class normal_iterator + : public iterator_adaptor< + normal_iterator, + Pointer + > +{ + typedef iterator_adaptor, Pointer> super_t; + + public: + __host__ __device__ + normal_iterator() {} + + __host__ __device__ + normal_iterator(Pointer p) + : super_t(p) {} + + template + __host__ __device__ + normal_iterator(const normal_iterator &other, + typename thrust::detail::enable_if_convertible< + OtherPointer, + Pointer + >::type * = 0) + : super_t(other.base()) {} + +}; // end normal_iterator + + +template + inline __host__ __device__ normal_iterator make_normal_iterator(Pointer ptr) +{ + return normal_iterator(ptr); +} + + +template struct is_trivial_iterator< normal_iterator > : public true_type {}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/permutation_iterator_base.h b/compat/thrust/iterator/detail/permutation_iterator_base.h new file mode 100644 index 0000000..a145b88 --- /dev/null +++ b/compat/thrust/iterator/detail/permutation_iterator_base.h @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +template class permutation_iterator; + + +namespace detail +{ + +template + struct permutation_iterator_base +{ + typedef typename thrust::iterator_system::type System1; + typedef typename thrust::iterator_system::type System2; + + typedef thrust::iterator_adaptor< + permutation_iterator, + IndexIterator, + typename thrust::iterator_value::type, + typename detail::minimum_system::type, + thrust::use_default, + typename thrust::iterator_reference::type + > type; +}; // end permutation_iterator_base + +} // end detail + +} // end thrust + diff --git a/compat/thrust/iterator/detail/retag.h b/compat/thrust/iterator/detail/retag.h new file mode 100644 index 0000000..4417fa5 --- /dev/null +++ b/compat/thrust/iterator/detail/retag.h @@ -0,0 +1,140 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +// we can retag an iterator if FromTag converts to ToTag +// or vice versa +template + struct is_retaggable + : integral_constant< + bool, + (is_convertible::value || is_convertible::value) + > +{}; + + +template + struct enable_if_retaggable + : enable_if< + is_retaggable::value, + Result + > +{}; // end enable_if_retaggable + + +} // end detail + + +template + thrust::detail::tagged_iterator + reinterpret_tag(Iterator iter) +{ + return thrust::detail::tagged_iterator(iter); +} // end reinterpret_tag() + + +// specialization for raw pointer +template + thrust::pointer + reinterpret_tag(T *ptr) +{ + return thrust::pointer(ptr); +} // end reinterpret_tag() + + +// specialization for thrust::pointer +template + thrust::pointer + reinterpret_tag(thrust::pointer ptr) +{ + return reinterpret_tag(ptr.get()); +} // end reinterpret_tag() + + +// avoid deeply-nested tagged_iterator +template + thrust::detail::tagged_iterator + reinterpret_tag(thrust::detail::tagged_iterator iter) +{ + return reinterpret_tag(iter.base()); +} // end reinterpret_tag() + + +template + typename thrust::detail::enable_if_retaggable< + typename thrust::iterator_system::type, + Tag, + thrust::detail::tagged_iterator + >::type + retag(Iterator iter) +{ + return reinterpret_tag(iter); +} // end retag() + + +// specialization for raw pointer +template + typename thrust::detail::enable_if_retaggable< + typename thrust::iterator_system::type, + Tag, + thrust::pointer + >::type + retag(T *ptr) +{ + return reinterpret_tag(ptr); +} // end retag() + + +// specialization for thrust::pointer +template + typename thrust::detail::enable_if_retaggable< + OtherTag, + Tag, + thrust::pointer + >::type + retag(thrust::pointer ptr) +{ + return reinterpret_tag(ptr); +} // end retag() + + +// avoid deeply-nested tagged_iterator +template + typename thrust::detail::enable_if_retaggable< + OtherTag, + Tag, + thrust::detail::tagged_iterator + >::type + retag(thrust::detail::tagged_iterator iter) +{ + return reinterpret_tag(iter); +} // end retag() + + +} // end thrust + diff --git a/compat/thrust/iterator/detail/reverse_iterator.inl b/compat/thrust/iterator/detail/reverse_iterator.inl new file mode 100644 index 0000000..03e9032 --- /dev/null +++ b/compat/thrust/iterator/detail/reverse_iterator.inl @@ -0,0 +1,108 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace thrust +{ + +namespace detail +{ + +__thrust_hd_warning_disable__ +template +__host__ __device__ + Iterator prior(Iterator x) +{ + return --x; +} // end prior() + +} // end detail + +template + reverse_iterator + ::reverse_iterator(BidirectionalIterator x) + :super_t(x) +{ +} // end reverse_iterator::reverse_iterator() + +template + template + reverse_iterator + ::reverse_iterator(reverse_iterator const &r +// XXX msvc screws this up +#ifndef _MSC_VER + , typename thrust::detail::enable_if< + thrust::detail::is_convertible< + OtherBidirectionalIterator, + BidirectionalIterator + >::value + >::type * +#endif // _MSC_VER + ) + :super_t(r.base()) +{ +} // end reverse_iterator::reverse_iterator() + +template + typename reverse_iterator::super_t::reference + reverse_iterator + ::dereference(void) const +{ + return *thrust::detail::prior(this->base()); +} // end reverse_iterator::increment() + +template + void reverse_iterator + ::increment(void) +{ + --this->base_reference(); +} // end reverse_iterator::increment() + +template + void reverse_iterator + ::decrement(void) +{ + ++this->base_reference(); +} // end reverse_iterator::decrement() + +template + void reverse_iterator + ::advance(typename super_t::difference_type n) +{ + this->base_reference() += -n; +} // end reverse_iterator::advance() + +template + template + typename reverse_iterator::super_t::difference_type + reverse_iterator + ::distance_to(reverse_iterator const &y) const +{ + return this->base_reference() - y.base(); +} // end reverse_iterator::distance_to() + +template +__host__ __device__ +reverse_iterator make_reverse_iterator(BidirectionalIterator x) +{ + return reverse_iterator(x); +} // end make_reverse_iterator() + + +} // end thrust + diff --git a/compat/thrust/iterator/detail/reverse_iterator_base.h b/compat/thrust/iterator/detail/reverse_iterator_base.h new file mode 100644 index 0000000..c10c5b7 --- /dev/null +++ b/compat/thrust/iterator/detail/reverse_iterator_base.h @@ -0,0 +1,42 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +template class reverse_iterator; + +namespace detail +{ + +template + struct reverse_iterator_base +{ + typedef thrust::iterator_adaptor< + thrust::reverse_iterator, + BidirectionalIterator + > type; +}; // end reverse_iterator_base + +} // end detail + +} // end thrust + diff --git a/compat/thrust/iterator/detail/tagged_iterator.h b/compat/thrust/iterator/detail/tagged_iterator.h new file mode 100644 index 0000000..69e6445 --- /dev/null +++ b/compat/thrust/iterator/detail/tagged_iterator.h @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +template class tagged_iterator; + +template + struct tagged_iterator_base +{ + typedef thrust::iterator_adaptor< + tagged_iterator, + Iterator, + typename thrust::iterator_value::type, + Tag, + typename thrust::iterator_traversal::type, + typename thrust::iterator_reference::type, + typename thrust::iterator_difference::type + > type; +}; // end tagged_iterator_base + +template + class tagged_iterator + : public tagged_iterator_base::type +{ + private: + typedef typename tagged_iterator_base::type super_t; + + public: + __host__ __device__ + tagged_iterator(void) {} + + __host__ __device__ + explicit tagged_iterator(Iterator x) + : super_t(x) {} +}; // end tagged_iterator + + +// specialize is_trivial_iterator for tagged_iterator +template struct is_trivial_iterator; + +// tagged_iterator is trivial if its base iterator is +template + struct is_trivial_iterator > + : is_trivial_iterator +{}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/transform_iterator.inl b/compat/thrust/iterator/detail/transform_iterator.inl new file mode 100644 index 0000000..a5a36a7 --- /dev/null +++ b/compat/thrust/iterator/detail/transform_iterator.inl @@ -0,0 +1,72 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ + +template + class transform_iterator; + +namespace detail +{ + +// Compute the iterator_adaptor instantiation to be used for transform_iterator +template +struct transform_iterator_base +{ + private: + // By default, dereferencing the iterator yields the same as the function. + typedef typename thrust::detail::ia_dflt_help< + Reference, + thrust::detail::result_of::type)> + >::type reference; + + // To get the default for Value: remove any reference on the + // result type, but retain any constness to signal + // non-writability. Note that if we adopt Thomas' suggestion + // to key non-writability *only* on the Reference argument, + // we'd need to strip constness here as well. + typedef typename thrust::detail::ia_dflt_help< + Value, + thrust::detail::remove_reference + >::type cv_value_type; + + public: + typedef thrust::iterator_adaptor + < + transform_iterator + , Iterator + , cv_value_type + , thrust::use_default // Leave the system alone + //, thrust::use_default // Leave the traversal alone + // use the Iterator's category to let any system iterators remain random access even though + // transform_iterator's reference type may not be a reference + // XXX figure out why only iterators whose reference types are true references are random access + , typename thrust::iterator_traits::iterator_category + , reference + > type; +}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/tuple_of_iterator_references.h b/compat/thrust/iterator/detail/tuple_of_iterator_references.h new file mode 100644 index 0000000..fdbf6b8 --- /dev/null +++ b/compat/thrust/iterator/detail/tuple_of_iterator_references.h @@ -0,0 +1,246 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + + +template< + typename T0, typename T1, typename T2, + typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, + typename T9 +> + class tuple_of_iterator_references + : public thrust::tuple +{ + private: + typedef thrust::tuple super_t; + + public: + // allow implicit construction from tuple + inline __host__ __device__ + tuple_of_iterator_references(const super_t &other) + : super_t(other) + {} + + // allow assignment from tuples + // XXX might be worthwhile to guard this with an enable_if is_assignable + template + inline __host__ __device__ + tuple_of_iterator_references &operator=(const detail::cons &other) + { + super_t::operator=(other); + return *this; + } + + // allow assignment from pairs + // XXX might be worthwhile to guard this with an enable_if is_assignable + template + inline __host__ __device__ + tuple_of_iterator_references &operator=(const thrust::pair &other) + { + super_t::operator=(other); + return *this; + } + + // allow assignment from reference + // XXX perhaps we should generalize to reference + // we could captures reference this way + template + inline __host__ __device__ +// XXX gcc-4.2 crashes on is_assignable +// typename thrust::detail::enable_if< +// thrust::detail::is_assignable< +// super_t, +// const thrust::tuple +// >::value, +// tuple_of_iterator_references & +// >::type + tuple_of_iterator_references & + operator=(const thrust::reference, Pointer, Derived> &other) + { + typedef thrust::tuple tuple_type; + + // XXX perhaps this could be accelerated + tuple_type other_tuple = other; + super_t::operator=(other_tuple); + return *this; + } + + + // duplicate thrust::tuple's constructors + inline __host__ __device__ + tuple_of_iterator_references() {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0) + : super_t(t0, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1) + : super_t(t0, t1, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2) + : super_t(t0, t1, t2, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3) + : super_t(t0, t1, t2, t3, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4) + : super_t(t0, t1, t2, t3, t4, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5) + : super_t(t0, t1, t2, t3, t4, t5, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6) + : super_t(t0, t1, t2, t3, t4, t5, t6, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6, + typename access_traits::parameter_type t7) + : super_t(t0, t1, t2, t3, t4, t5, t6, t7, + static_cast(null_type()), + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6, + typename access_traits::parameter_type t7, + typename access_traits::parameter_type t8) + : super_t(t0, t1, t2, t3, t4, t5, t6, t7, t8, + static_cast(null_type())) + {} + + inline __host__ __device__ + tuple_of_iterator_references(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6, + typename access_traits::parameter_type t7, + typename access_traits::parameter_type t8, + typename access_traits::parameter_type t9) + : super_t(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) + {} +}; + + +} // end detail +} // end thrust + diff --git a/compat/thrust/iterator/detail/universal_categories.h b/compat/thrust/iterator/detail/universal_categories.h new file mode 100644 index 0000000..7c39222 --- /dev/null +++ b/compat/thrust/iterator/detail/universal_categories.h @@ -0,0 +1,85 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +// define these types without inheritance to avoid ambiguous conversion to base classes + +struct input_universal_iterator_tag +{ + operator input_host_iterator_tag () {return input_host_iterator_tag();} + + operator input_device_iterator_tag () {return input_device_iterator_tag();} +}; + +struct output_universal_iterator_tag +{ + operator output_host_iterator_tag () {return output_host_iterator_tag();} + + operator output_device_iterator_tag () {return output_device_iterator_tag();} +}; + +struct forward_universal_iterator_tag + : input_universal_iterator_tag +{ + operator forward_host_iterator_tag () {return forward_host_iterator_tag();}; + + operator forward_device_iterator_tag () {return forward_device_iterator_tag();}; +}; + +struct bidirectional_universal_iterator_tag + : forward_universal_iterator_tag +{ + operator bidirectional_host_iterator_tag () {return bidirectional_host_iterator_tag();}; + + operator bidirectional_device_iterator_tag () {return bidirectional_device_iterator_tag();}; +}; + + +namespace detail +{ + +// create this struct to control conversion precedence in random_access_universal_iterator_tag +template +struct one_degree_of_separation + : T +{ +}; + +} // end detail + + +struct random_access_universal_iterator_tag +{ + // these conversions are all P0 + operator random_access_host_iterator_tag () {return random_access_host_iterator_tag();}; + + operator random_access_device_iterator_tag () {return random_access_device_iterator_tag();}; + + // bidirectional_universal_iterator_tag is P1 + operator detail::one_degree_of_separation () {return detail::one_degree_of_separation();} + +}; + + +} // end thrust + diff --git a/compat/thrust/iterator/detail/zip_iterator.inl b/compat/thrust/iterator/detail/zip_iterator.inl new file mode 100644 index 0000000..fddd0ad --- /dev/null +++ b/compat/thrust/iterator/detail/zip_iterator.inl @@ -0,0 +1,151 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +template + zip_iterator + ::zip_iterator(void) +{ +} // end zip_iterator::zip_iterator() + + +template + zip_iterator + ::zip_iterator(IteratorTuple iterator_tuple) + :m_iterator_tuple(iterator_tuple) +{ +} // end zip_iterator::zip_iterator() + + +template + template + zip_iterator + ::zip_iterator(const zip_iterator &other, + typename thrust::detail::enable_if_convertible< + OtherIteratorTuple, + IteratorTuple + >::type *) + :m_iterator_tuple(other.get_iterator_tuple()) +{ +} // end zip_iterator::zip_iterator() + + +template +const IteratorTuple &zip_iterator + ::get_iterator_tuple(void) const +{ + return m_iterator_tuple; +} // end zip_iterator::get_iterator_tuple() + + +template + typename zip_iterator::super_t::reference + zip_iterator + ::dereference(void) const +{ + using namespace detail::tuple_impl_specific; + + return thrust::detail::tuple_host_device_transform(get_iterator_tuple(), detail::dereference_iterator()); +} // end zip_iterator::dereference() + + +__thrust_hd_warning_disable__ +template + template + bool zip_iterator + ::equal(const zip_iterator &other) const +{ + return get<0>(get_iterator_tuple()) == get<0>(other.get_iterator_tuple()); +} // end zip_iterator::equal() + + +template + void zip_iterator + ::advance(typename super_t::difference_type n) +{ + using namespace detail::tuple_impl_specific; + + // XXX note that we use a pointer to System to dispatch to avoid + // default construction of a System + typename thrust::iterator_system::type *use_me_to_dispatch = 0; + + // dispatch on system + tuple_for_each(m_iterator_tuple, + detail::advance_iterator(n), + use_me_to_dispatch); +} // end zip_iterator::advance() + + +template + void zip_iterator + ::increment(void) +{ + using namespace detail::tuple_impl_specific; + + // XXX note that we use a pointer to System to dispatch to avoid + // default construction of a System + typename thrust::iterator_system::type *use_me_to_dispatch = 0; + + // dispatch on system + tuple_for_each(m_iterator_tuple, detail::increment_iterator(), + use_me_to_dispatch); +} // end zip_iterator::increment() + + +template + void zip_iterator + ::decrement(void) +{ + using namespace detail::tuple_impl_specific; + + // XXX note that we use a pointer to System to dispatch to avoid + // default construction of a System + typename thrust::iterator_system::type *use_me_to_dispatch = 0; + + // dispatch on system + tuple_for_each(m_iterator_tuple, detail::decrement_iterator(), + use_me_to_dispatch); +} // end zip_iterator::decrement() + + +__thrust_hd_warning_disable__ +template + template + typename zip_iterator::super_t::difference_type + zip_iterator + ::distance_to(const zip_iterator &other) const +{ + return get<0>(other.get_iterator_tuple()) - get<0>(get_iterator_tuple()); +} // end zip_iterator::distance_to() + + +template + zip_iterator make_zip_iterator(IteratorTuple t) +{ + return zip_iterator(t); +} // end make_zip_iterator() + + +} // end thrust + diff --git a/compat/thrust/iterator/detail/zip_iterator_base.h b/compat/thrust/iterator/detail/zip_iterator_base.h new file mode 100644 index 0000000..9dd7789 --- /dev/null +++ b/compat/thrust/iterator/detail/zip_iterator_base.h @@ -0,0 +1,418 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +// forward declare zip_iterator for zip_iterator_base +template class zip_iterator; + +namespace detail +{ + + +// Functors to be used with tuple algorithms +// +template +class advance_iterator +{ +public: + inline __host__ __device__ + advance_iterator(DiffType step) : m_step(step) {} + + template + inline __host__ __device__ + void operator()(Iterator& it) const + { it += m_step; } + +private: + DiffType m_step; +}; // end advance_iterator + + +struct increment_iterator +{ + template + inline __host__ __device__ + void operator()(Iterator& it) + { ++it; } +}; // end increment_iterator + + +struct decrement_iterator +{ + template + inline __host__ __device__ + void operator()(Iterator& it) + { --it; } +}; // end decrement_iterator + + +struct dereference_iterator +{ + template + struct apply + { + typedef typename + iterator_traits::reference + type; + }; // end apply + + // XXX silence warnings of the form "calling a __host__ function from a __host__ __device__ function is not allowed + __thrust_hd_warning_disable__ + template + __host__ __device__ + typename apply::type operator()(Iterator const& it) + { + return *it; + } +}; // end dereference_iterator + + +// The namespace tuple_impl_specific provides two meta- +// algorithms and two algorithms for tuples. +namespace tuple_impl_specific +{ + +// define apply1 for tuple_meta_transform_impl +template + struct apply1 + : UnaryMetaFunctionClass::template apply +{ +}; // end apply1 + + +// define apply2 for tuple_meta_accumulate_impl +template + struct apply2 + : UnaryMetaFunctionClass::template apply +{ +}; // end apply2 + + +// Meta-accumulate algorithm for tuples. Note: The template +// parameter StartType corresponds to the initial value in +// ordinary accumulation. +// +template + struct tuple_meta_accumulate; + +template< + typename Tuple + , class BinaryMetaFun + , typename StartType +> + struct tuple_meta_accumulate_impl +{ + typedef typename apply2< + BinaryMetaFun + , typename Tuple::head_type + , typename tuple_meta_accumulate< + typename Tuple::tail_type + , BinaryMetaFun + , StartType + >::type + >::type type; +}; + + +template< + typename Tuple + , class BinaryMetaFun + , typename StartType +> +struct tuple_meta_accumulate + : thrust::detail::eval_if< + thrust::detail::is_same::value + , thrust::detail::identity_ + , tuple_meta_accumulate_impl< + Tuple + , BinaryMetaFun + , StartType + > + > // end eval_if +{ +}; // end tuple_meta_accumulate + + +// transform algorithm for tuples. The template parameter Fun +// must be a unary functor which is also a unary metafunction +// class that computes its return type based on its argument +// type. For example: +// +// struct to_ptr +// { +// template +// struct apply +// { +// typedef Arg* type; +// } +// +// template +// Arg* operator()(Arg x); +// }; + + + +// for_each algorithm for tuples. +// +template +inline __host__ __device__ +Fun tuple_for_each(thrust::null_type, Fun f, System *) +{ + return f; +} // end tuple_for_each() + + +template +inline __host__ __device__ +Fun tuple_for_each(Tuple& t, Fun f, System *dispatch_tag) +{ + f( t.get_head() ); + return tuple_for_each(t.get_tail(), f, dispatch_tag); +} // end tuple_for_each() + + +template +inline __host__ __device__ +Fun tuple_for_each(Tuple& t, Fun f, thrust::host_system_tag *dispatch_tag) +{ +// XXX this path is required in order to accomodate pure host iterators +// (such as std::vector::iterator) in a zip_iterator +#ifndef __CUDA_ARCH__ + f( t.get_head() ); + return tuple_for_each(t.get_tail(), f, dispatch_tag); +#else + // this code will never be called + return f; +#endif +} // end tuple_for_each() + + +// Equality of tuples. NOTE: "==" for tuples currently (7/2003) +// has problems under some compilers, so I just do my own. +// No point in bringing in a bunch of #ifdefs here. This is +// going to go away with the next tuple implementation anyway. +// +__host__ __device__ +inline bool tuple_equal(thrust::null_type, thrust::null_type) +{ return true; } + + +template +__host__ __device__ +bool tuple_equal(Tuple1 const& t1, Tuple2 const& t2) +{ + return t1.get_head() == t2.get_head() && + tuple_equal(t1.get_tail(), t2.get_tail()); +} // end tuple_equal() + +} // end end tuple_impl_specific + + +// Metafunction to obtain the type of the tuple whose element types +// are the value_types of an iterator tupel. +// +template + struct tuple_of_value_types + : tuple_meta_transform< + IteratorTuple, + iterator_value + > +{ +}; // end tuple_of_value_types + + +struct minimum_category_lambda +{ + template + struct apply : minimum_category + {}; +}; + + + +// Metafunction to obtain the minimal traversal tag in a tuple +// of iterators. +// +template +struct minimum_traversal_category_in_iterator_tuple +{ + typedef typename tuple_meta_transform< + IteratorTuple + , thrust::iterator_traversal + >::type tuple_of_traversal_tags; + + typedef typename tuple_impl_specific::tuple_meta_accumulate< + tuple_of_traversal_tags + , minimum_category_lambda + , thrust::random_access_traversal_tag + >::type type; +}; + + +struct minimum_system_lambda +{ + template + struct apply : minimum_system + {}; +}; + + + +// Metafunction to obtain the minimal system tag in a tuple +// of iterators. +template +struct minimum_system_in_iterator_tuple +{ + typedef typename thrust::detail::tuple_meta_transform< + IteratorTuple, + thrust::iterator_system + >::type tuple_of_system_tags; + + typedef typename tuple_impl_specific::tuple_meta_accumulate< + tuple_of_system_tags, + minimum_system_lambda, + thrust::any_system_tag + >::type type; +}; + +namespace zip_iterator_base_ns +{ + + +template + struct tuple_elements_helper + : eval_if< + (i < tuple_size::value), + tuple_element, + identity_ + > +{}; + + +template + struct tuple_elements +{ + typedef typename tuple_elements_helper<0,Tuple>::type T0; + typedef typename tuple_elements_helper<1,Tuple>::type T1; + typedef typename tuple_elements_helper<2,Tuple>::type T2; + typedef typename tuple_elements_helper<3,Tuple>::type T3; + typedef typename tuple_elements_helper<4,Tuple>::type T4; + typedef typename tuple_elements_helper<5,Tuple>::type T5; + typedef typename tuple_elements_helper<6,Tuple>::type T6; + typedef typename tuple_elements_helper<7,Tuple>::type T7; + typedef typename tuple_elements_helper<8,Tuple>::type T8; + typedef typename tuple_elements_helper<9,Tuple>::type T9; +}; + + +template + struct tuple_of_iterator_references +{ + // get a thrust::tuple of the iterators' references + typedef typename tuple_meta_transform< + IteratorTuple, + iterator_reference + >::type tuple_of_references; + + // get at the individual tuple element types by name + typedef tuple_elements elements; + + // map thrust::tuple to tuple_of_iterator_references + typedef thrust::detail::tuple_of_iterator_references< + typename elements::T0, + typename elements::T1, + typename elements::T2, + typename elements::T3, + typename elements::T4, + typename elements::T5, + typename elements::T6, + typename elements::T7, + typename elements::T8, + typename elements::T9 + > type; +}; + + +} // end zip_iterator_base_ns + +/////////////////////////////////////////////////////////////////// +// +// Class zip_iterator_base +// +// Builds and exposes the iterator facade type from which the zip +// iterator will be derived. +// +template + struct zip_iterator_base +{ + //private: + // reference type is the type of the tuple obtained from the + // iterators' reference types. + typedef typename zip_iterator_base_ns::tuple_of_iterator_references::type reference; + + // Boost's Value type is the same as reference type. + //typedef reference value_type; + typedef typename tuple_of_value_types::type value_type; + + // Difference type is the first iterator's difference type + typedef typename thrust::iterator_traits< + typename thrust::tuple_element<0, IteratorTuple>::type + >::difference_type difference_type; + + // Iterator system is the minimum system tag in the + // iterator tuple + typedef typename + minimum_system_in_iterator_tuple::type system; + + // Traversal category is the minimum traversal category in the + // iterator tuple + typedef typename + minimum_traversal_category_in_iterator_tuple::type traversal_category; + + public: + + // The iterator facade type from which the zip iterator will + // be derived. + typedef thrust::iterator_facade< + zip_iterator, + value_type, + system, + traversal_category, + reference, + difference_type + > type; +}; // end zip_iterator_base + +} // end detail + +} // end thrust + + diff --git a/compat/thrust/iterator/discard_iterator.h b/compat/thrust/iterator/discard_iterator.h new file mode 100644 index 0000000..6e089b5 --- /dev/null +++ b/compat/thrust/iterator/discard_iterator.h @@ -0,0 +1,171 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/discard_iterator.h + * \brief An iterator which "discards" (ignores) values assigned to it upon dereference + */ + +#pragma once + +#include +#include +#include + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p discard_iterator is an iterator which represents a special kind of pointer that + * ignores values written to it upon dereference. This iterator is useful for ignoring + * the output of certain algorithms without wasting memory capacity or bandwidth. + * \p discard_iterator may also be used to count the size of an algorithm's output which + * may not be known a priori. + * + * The following code snippet demonstrates how to use \p discard_iterator to ignore + * ignore one of the output ranges of reduce_by_key + * + * \code + * #include + * #include + * #include + * + * int main(void) + * { + * thrust::device_vector keys(7), values(7); + * + * keys[0] = 1; + * keys[1] = 3; + * keys[2] = 3; + * keys[3] = 3; + * keys[4] = 2; + * keys[5] = 2; + * keys[6] = 1; + * + * values[0] = 9; + * values[1] = 8; + * values[2] = 7; + * values[3] = 6; + * values[4] = 5; + * values[5] = 4; + * values[6] = 3; + * + * thrust::device_vector result(4); + * + * // we are only interested in the reduced values + * // use discard_iterator to ignore the output keys + * thrust::reduce_by_key(keys.begin(), keys.end(), + * values.begin(), values.end(), + * thrust::make_discard_iterator(), + * result.begin()); + * + * // result is now [9, 21, 9, 3] + * + * return 0; + * } + * \endcode + * + * \see make_discard_iterator + */ +template + class discard_iterator + : public detail::discard_iterator_base::type +{ + /*! \cond + */ + friend class thrust::iterator_core_access; + typedef typename detail::discard_iterator_base::type super_t; + typedef typename detail::discard_iterator_base::incrementable incrementable; + typedef typename detail::discard_iterator_base::base_iterator base_iterator; + + public: + typedef typename super_t::reference reference; + typedef typename super_t::value_type value_type; + + /*! \endcond + */ + + /*! Copy constructor copies from a source discard_iterator. + * + * \p rhs The discard_iterator to copy. + */ + __host__ __device__ + discard_iterator(discard_iterator const &rhs) + : super_t(rhs.base()) {} + + /*! This constructor receives an optional index specifying the position of this + * \p discard_iterator in a range. + * + * \p i The index of this \p discard_iterator in a range. Defaults to the + * value returned by \c Incrementable's null constructor. For example, + * when Incrementable == int, \c 0. + */ + __host__ __device__ + discard_iterator(incrementable const &i = incrementable()) + : super_t(base_iterator(i)) {} + + /*! \cond + */ + + private: // Core iterator interface + __host__ __device__ + reference dereference(void) const + { + return m_element; + } + + mutable value_type m_element; + + /*! \endcond + */ +}; // end constant_iterator + + +/*! \p make_discard_iterator creates a \p discard_iterator from an optional index parameter. + * + * \param i The index of the returned \p discard_iterator within a range. + * In the default case, the value of this parameter is \c 0. + * + * \return A new \p discard_iterator with index as given by \p i. + * + * \see constant_iterator + */ +inline __host__ __device__ +discard_iterator<> make_discard_iterator(discard_iterator<>::difference_type i = discard_iterator<>::difference_type(0)) +{ + return discard_iterator<>(i); +} // end make_discard_iterator() + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end namespace thrust + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + diff --git a/compat/thrust/iterator/iterator_adaptor.h b/compat/thrust/iterator/iterator_adaptor.h new file mode 100644 index 0000000..7b9cca3 --- /dev/null +++ b/compat/thrust/iterator/iterator_adaptor.h @@ -0,0 +1,239 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/iterator_adaptor.h + * \brief An iterator which adapts a base iterator + */ + +/* + * (C) Copyright David Abrahams 2002. + * (C) Copyright Jeremy Siek 2002. + * (C) Copyright Thomas Witt 2002. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p iterator_adaptor is an iterator which adapts an existing type of iterator to create a new type of + * iterator. Most of Thrust's fancy iterators are defined via inheritance from \p iterator_adaptor. + * While composition of these existing Thrust iterators is often sufficient for expressing the desired + * functionality, it is occasionally more straightforward to derive from \p iterator_adaptor directly. + * + * To see how to use \p iterator_adaptor to create a novel iterator type, let's examine how to use it to + * define \p repeat_iterator, a fancy iterator which repeats elements from another range a given number of time: + * + * \code + * #include + * + * // derive repeat_iterator from iterator_adaptor + * template + * class repeat_iterator + * : public thrust::iterator_adaptor< + * repeat_iterator, // the first template parameter is the name of the iterator we're creating + * Iterator // the second template parameter is the name of the iterator we're adapting + * // we can use the default for the additional template parameters + * > + * { + * public: + * // shorthand for the name of the iterator_adaptor we're deriving from + * typedef thrust::iterator_adaptor< + * repeat_iterator, + * Iterator + * > super_t; + * + * __host__ __device__ + * repeat_iterator(const Iterator &x, int n) : super_t(x), begin(x), n(n) {} + * + * // befriend thrust::iterator_core_access to allow it access to the private interface below + * friend class thrust::iterator_core_access; + * + * private: + * // repeat each element of the adapted range n times + * unsigned int n; + * + * // used to keep track of where we began + * const Iterator begin; + * + * // it is private because only thrust::iterator_core_access needs access to it + * __host__ __device__ + * typename super_t::reference dereference() const + * { + * return *(begin + (this->base() - begin) / n); + * } + * }; + * \endcode + * + * Except for the first two, \p iterator_adaptor's template parameters are optional. When omitted, or when the + * user specifies \p thrust::use_default in its place, \p iterator_adaptor will use a default type inferred from \p Base. + * + * \p iterator_adaptor's functionality is derived from and generally equivalent to \p boost::iterator_adaptor. + * The exception is Thrust's addition of the template parameter \p System, which is necessary to allow Thrust + * to dispatch an algorithm to one of several parallel backend systems. + * + * \p iterator_adaptor is a powerful tool for creating custom iterators directly. However, the large set of iterator semantics which must be satisfied + * for algorithm compatibility can make \p iterator_adaptor difficult to use correctly. Unless you require the full expressivity of \p iterator_adaptor, + * consider building a custom iterator through composition of existing higher-level fancy iterators instead. + * + * Interested users may refer to boost::iterator_adaptor's documentation for further usage examples. + */ +template + class iterator_adaptor: + public detail::iterator_adaptor_base< + Derived, Base, Value, System, Traversal, Reference, Difference + >::type +{ + /*! \cond + */ + + friend class thrust::iterator_core_access; + + protected: + typedef typename detail::iterator_adaptor_base< + Derived, Base, Value, System, Traversal, Reference, Difference + >::type super_t; + + /*! \endcond + */ + + public: + /*! \p iterator_adaptor's default constructor does nothing. + */ + __host__ __device__ + iterator_adaptor(){} + + /*! This constructor copies from a given instance of the \p Base iterator. + */ + __host__ __device__ + explicit iterator_adaptor(Base const& iter) + : m_iterator(iter) + {} + + /*! The type of iterator this \p iterator_adaptor's \p adapts. + */ + typedef Base base_type; + + /*! \cond + */ + typedef typename super_t::reference reference; + + typedef typename super_t::difference_type difference_type; + /*! \endcond + */ + + /*! \return A \p const reference to the \p Base iterator this \p iterator_adaptor adapts. + */ + __host__ __device__ + Base const& base() const + { return m_iterator; } + + protected: + /*! \return A \p const reference to the \p Base iterator this \p iterator_adaptor adapts. + */ + __host__ __device__ + Base const& base_reference() const + { return m_iterator; } + + /*! \return A mutable reference to the \p Base iterator this \p iterator_adaptor adapts. + */ + __host__ __device__ + Base& base_reference() + { return m_iterator; } + + /*! \cond + */ + private: // Core iterator interface for iterator_facade + + __thrust_hd_warning_disable__ + __host__ __device__ + typename iterator_adaptor::reference dereference() const + { return *m_iterator; } + + __thrust_hd_warning_disable__ + template + __host__ __device__ + bool equal(iterator_adaptor const& x) const + { return m_iterator == x.base(); } + + __thrust_hd_warning_disable__ + __host__ __device__ + void advance(typename iterator_adaptor::difference_type n) + { + // XXX statically assert on random_access_traversal_tag + m_iterator += n; + } + + __thrust_hd_warning_disable__ + __host__ __device__ + void increment() + { ++m_iterator; } + + __thrust_hd_warning_disable__ + __host__ __device__ + void decrement() + { + // XXX statically assert on bidirectional_traversal_tag + --m_iterator; + } + + __thrust_hd_warning_disable__ + template + __host__ __device__ + typename iterator_adaptor::difference_type distance_to(iterator_adaptor const& y) const + { return y.base() - m_iterator; } + + private: + Base m_iterator; + + /*! \endcond + */ +}; // end iterator_adaptor + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + diff --git a/compat/thrust/iterator/iterator_categories.h b/compat/thrust/iterator/iterator_categories.h new file mode 100644 index 0000000..81601b4 --- /dev/null +++ b/compat/thrust/iterator/iterator_categories.h @@ -0,0 +1,191 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/iterator_categories.h + * \brief Types for reasoning about the categories of iterators + */ + +/* + * (C) Copyright Jeremy Siek 2002. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + + +#pragma once + +#include + +// #include this for stl's iterator tags +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \addtogroup iterator_tags Iterator Tags + * \ingroup iterators + * \addtogroup iterator_tag_classes Iterator Tag Classes + * \ingroup iterator_tags + * \{ + */ + +/*! \p input_device_iterator_tag is an empty class: it has no member functions, + * member variables, or nested types. It is used solely as a "tag": a + * representation of the Input Device Iterator concept within the C++ type + * system. + * + * \see http://www.sgi.com/tech/sgi/input_iterator_tag.html, iterator_traits, + * output_device_iterator_tag, forward_device_iterator_tag, + * bidirectional_device_iterator_tag, random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +struct input_device_iterator_tag {}; + +/*! \p output_device_iterator_tag is an empty class: it has no member functions, + * member variables, or nested types. It is used solely as a "tag": a + * representation of the Output Device Iterator concept within the C++ type + * system. + * + * \see http://www.sgi.com/tech/sgi/output_iterator_tag.html, iterator_traits, + * input_device_iterator_tag, forward_device_iterator_tag, + * bidirectional_device_iterator_tag, random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +struct output_device_iterator_tag {}; + +/*! \p forward_device_iterator_tag is an empty class: it has no member functions, + * member variables, or nested types. It is used solely as a "tag": a + * representation of the Forward Device Iterator concept within the C++ type + * system. + * + * \see http://www.sgi.com/tech/sgi/forward_iterator_tag.html, iterator_traits, + * input_device_iterator_tag, output_device_iterator_tag, + * bidirectional_device_iterator_tag, random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +struct forward_device_iterator_tag : public input_device_iterator_tag {}; + +/*! \p bidirectional_device_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Bidirectional Device Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/bidirectional_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +struct bidirectional_device_iterator_tag : public forward_device_iterator_tag {}; + +/*! \p random_access_device_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Random Access Device Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/random_access_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, bidirectional_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +struct random_access_device_iterator_tag : public bidirectional_device_iterator_tag {}; + +/*! \p input_host_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Input Host Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/input_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, bidirectional_device_iterator_tag, + * random_access_device_iterator_tag, + * output_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +typedef std::input_iterator_tag input_host_iterator_tag; + +/*! \p output_host_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Output Host Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/output_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, bidirectional_device_iterator_tag, + * random_access_device_iterator_tag, + * input_host_iterator_tag, forward_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +typedef std::output_iterator_tag output_host_iterator_tag; + +/*! \p forward_host_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Forward Host Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/forward_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, bidirectional_device_iterator_tag, + * random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, + * bidirectional_host_iterator_tag, random_access_host_iterator_tag + */ +typedef std::forward_iterator_tag forward_host_iterator_tag; + +/*! \p bidirectional_host_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Forward Host Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/bidirectional_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, bidirectional_device_iterator_tag, + * random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, + * forward_host_iterator_tag, random_access_host_iterator_tag + */ +typedef std::bidirectional_iterator_tag bidirectional_host_iterator_tag; + +/*! \p random_access_host_iterator_tag is an empty class: it has no member + * functions, member variables, or nested types. It is used solely as a "tag": a + * representation of the Forward Host Iterator concept within the C++ + * type system. + * + * \see http://www.sgi.com/tech/sgi/random_access_iterator_tag.html, + * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, + * forward_device_iterator_tag, bidirectional_device_iterator_tag, + * random_access_device_iterator_tag, + * input_host_iterator_tag, output_host_iterator_tag, + * forward_host_iterator_tag, bidirectional_host_iterator_tag + */ +typedef std::random_access_iterator_tag random_access_host_iterator_tag; + +/*! \} // end iterator_tag_classes + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/iterator/iterator_facade.h b/compat/thrust/iterator/iterator_facade.h new file mode 100644 index 0000000..232c150 --- /dev/null +++ b/compat/thrust/iterator/iterator_facade.h @@ -0,0 +1,538 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/iterator/iterator_facade.h + * \brief A class which exposes a public interface for iterators + */ + +/* + * (C) Copyright David Abrahams 2002. + * (C) Copyright Jeremy Siek 2002. + * (C) Copyright Thomas Witt 2002. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + + +// This forward declaration is required for the friend declaration +// in iterator_core_access +template class iterator_facade; + + +/*! \p iterator_core_access is the class which user iterator types derived from \p thrust::iterator_adaptor + * or \p thrust::iterator_facade must befriend to allow it to access their private interface. + */ +class iterator_core_access +{ + /*! \cond + */ + + // declare our friends + template friend class iterator_facade; + + // iterator comparisons are our friends + template + inline __host__ __device__ + friend bool + operator ==(iterator_facade const& lhs, + iterator_facade const& rhs); + + template + inline __host__ __device__ + friend bool + operator !=(iterator_facade const& lhs, + iterator_facade const& rhs); + + template + inline __host__ __device__ + friend bool + operator <(iterator_facade const& lhs, + iterator_facade const& rhs); + + template + inline __host__ __device__ + friend bool + operator >(iterator_facade const& lhs, + iterator_facade const& rhs); + + template + inline __host__ __device__ + friend bool + operator <=(iterator_facade const& lhs, + iterator_facade const& rhs); + + template + inline __host__ __device__ + friend bool + operator >=(iterator_facade const& lhs, + iterator_facade const& rhs); + + // iterator difference is our friend + template + inline __host__ __device__ + friend + typename thrust::detail::distance_from_result< + iterator_facade, + iterator_facade + >::type + operator-(iterator_facade const& lhs, + iterator_facade const& rhs); + + template + __host__ __device__ + static typename Facade::reference dereference(Facade const& f) + { + return f.dereference(); + } + + template + __host__ __device__ + static void increment(Facade& f) + { + f.increment(); + } + + template + __host__ __device__ + static void decrement(Facade& f) + { + f.decrement(); + } + + template + __host__ __device__ + static bool equal(Facade1 const& f1, Facade2 const& f2) + { + return f1.equal(f2); + } + + // XXX TODO: Investigate whether we need both of these cases + //template + //__host__ __device__ + //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::true_) + //{ + // return f1.equal(f2); + //} + + //template + //__host__ __device__ + //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::false_) + //{ + // return f2.equal(f1); + //} + + template + __host__ __device__ + static void advance(Facade& f, typename Facade::difference_type n) + { + f.advance(n); + } + + // Facade2 is convertible to Facade1, + // so return Facade1's difference_type + template + __host__ __device__ + static typename Facade1::difference_type + distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::true_type) + { + return -f1.distance_to(f2); + } + + // Facade2 is not convertible to Facade1, + // so return Facade2's difference_type + template + __host__ __device__ + static typename Facade2::difference_type + distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::false_type) + { + return f2.distance_to(f1); + } + + template + __host__ __device__ + static typename thrust::detail::distance_from_result::type + distance_from(Facade1 const& f1, Facade2 const& f2) + { + // dispatch the implementation of this method upon whether or not + // Facade2 is convertible to Facade1 + return distance_from(f1, f2, + typename thrust::detail::is_convertible::type()); + } + + // + // Curiously Recurring Template interface. + // + template + __host__ __device__ + static Derived& derived(iterator_facade& facade) + { + return *static_cast(&facade); + } + + template + __host__ __device__ + static Derived const& derived(iterator_facade const& facade) + { + return *static_cast(&facade); + } + + /*! \endcond + */ +}; // end iterator_core_access + + +/*! \p iterator_facade is a template which allows the programmer to define a novel iterator with a standards-conforming interface + * which Thrust can use to reason about algorithm acceleration opportunities. + * + * Because most of a standard iterator's interface is defined in terms of a small set of core primitives, \p iterator_facade + * defines the non-primitive portion mechanically. In principle a novel iterator could explicitly provide the entire interface in + * an ad hoc fashion but doing so might be tedious and prone to subtle errors. + * + * Often \p iterator_facade is too primitive a tool to use for defining novel iterators. In these cases, \p iterator_adaptor + * or a specific fancy iterator should be used instead. + * + * \p iterator_facade's functionality is derived from and generally equivalent to \p boost::iterator_facade. + * The exception is Thrust's addition of the template parameter \p System, which is necessary to allow Thrust + * to dispatch an algorithm to one of several parallel backend systems. An additional exception is Thrust's omission + * of the \c operator-> member function. + * + * Interested users may refer to boost::iterator_facade's documentation for usage examples. + * + * \note \p iterator_facade's arithmetic operator free functions exist with the usual meanings but are omitted here for brevity. + */ +template + class iterator_facade +{ + private: + /*! \cond + */ + + // + // Curiously Recurring Template interface. + // + __host__ __device__ + Derived& derived() + { + return *static_cast(this); + } + + __host__ __device__ + Derived const& derived() const + { + return *static_cast(this); + } + /*! \endcond + */ + + public: + /*! The type of element pointed to by \p iterator_facade. + */ + typedef typename thrust::detail::remove_const::type value_type; + + /*! The return type of \p iterator_facade::operator*(). + */ + typedef Reference reference; + + /*! The return type of \p iterator_facade's non-existent \c operator->() + * member function. Unlike \c boost::iterator_facade, \p iterator_facade + * disallows access to the \p value_type's members through expressions of the + * form iter->member. \p pointer is defined to \c void to indicate + * that these expressions are not allowed. This limitation may be relaxed in a + * future version of Thrust. + */ + typedef void pointer; + + /*! The type of expressions of the form x - y where x and y + * are of type \p iterator_facade. + */ + typedef Difference difference_type; + + /*! The type of iterator category of \p iterator_facade. + */ + typedef typename thrust::detail::iterator_facade_category< + System, Traversal, Value, Reference + >::type iterator_category; + + /*! \p operator*() dereferences this \p iterator_facade. + * \return A reference to the element pointed to by this \p iterator_facade. + */ + __host__ __device__ + reference operator*() const + { + return iterator_core_access::dereference(this->derived()); + } + + // XXX unimplemented for now, consider implementing it later + //pointer operator->() const + //{ + // return; + //} + + // XXX investigate whether or not we need to go to the lengths + // boost does to determine the return type + + /*! \p operator[] performs indexed dereference. + * \return A reference to the element \p n distance away from this \p iterator_facade. + */ + __host__ __device__ + reference operator[](difference_type n) const + { + return *(this->derived() + n); + } + + /*! \p operator++ pre-increments this \p iterator_facade to refer to the element in the next position. + * \return *this + */ + __host__ __device__ + Derived& operator++() + { + iterator_core_access::increment(this->derived()); + return this->derived(); + } + + /*! \p operator++ post-increments this \p iterator_facade and returns a new \p iterator_facade referring to the element in the next position. + * \return A copy of *this before increment. + */ + __host__ __device__ + Derived operator++(int) + { + Derived tmp(this->derived()); + ++*this; + return tmp; + } + + /*! \p operator-- pre-decrements this \p iterator_facade to refer to the element in the previous position. + * \return *this + */ + __host__ __device__ + Derived& operator--() + { + iterator_core_access::decrement(this->derived()); + return this->derived(); + } + + /*! \p operator-- post-decrements this \p iterator_facade and returns a new \p iterator_facade referring to the element in the previous position. + * \return A copy of *this before decrement. + */ + __host__ __device__ + Derived operator--(int) + { + Derived tmp(this->derived()); + --*this; + return tmp; + } + + /*! \p operator+= increments this \p iterator_facade to refer to an element a given distance after its current position. + * \param n The quantity to increment. + * \return *this + */ + __host__ __device__ + Derived& operator+=(difference_type n) + { + iterator_core_access::advance(this->derived(), n); + return this->derived(); + } + + /*! \p operator-= decrements this \p iterator_facade to refer to an element a given distance before its current postition. + * \param n The quantity to decrement. + * \return *this + */ + __host__ __device__ + Derived& operator-=(difference_type n) + { + iterator_core_access::advance(this->derived(), -n); + return this->derived(); + } + + /*! \p operator- subtracts a given quantity from this \p iterator_facade and returns a new \p iterator_facade referring to the element at the given position before this \p iterator_facade. + * \param n The quantity to decrement + * \return An \p iterator_facade pointing \p n elements before this \p iterator_facade. + */ + __host__ __device__ + Derived operator-(difference_type n) const + { + Derived result(this->derived()); + return result -= n; + } +}; // end iterator_facade + +/*! \cond + */ + +// Comparison operators +template +inline __host__ __device__ +// XXX it might be nice to implement this at some point +//typename enable_if_interoperable::type // exposition +bool +operator ==(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return iterator_core_access + ::equal(*static_cast(&lhs), + *static_cast(&rhs)); +} + +template +inline __host__ __device__ +// XXX it might be nice to implement this at some point +//typename enable_if_interoperable::type // exposition +bool +operator !=(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return !iterator_core_access + ::equal(*static_cast(&lhs), + *static_cast(&rhs)); +} + +template +inline __host__ __device__ +// XXX it might be nice to implement this at some point +//typename enable_if_interoperable::type // exposition +bool +operator <(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return 0 > iterator_core_access + ::distance_from(*static_cast(&lhs), + *static_cast(&rhs)); +} + +template +inline __host__ __device__ +// XXX it might be nice to implement this at some point +//typename enable_if_interoperable::type // exposition +bool +operator >(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return 0 < iterator_core_access + ::distance_from(*static_cast(&lhs), + *static_cast(&rhs)); +} + +template +inline __host__ __device__ +// XXX it might be nice to implement this at some point +//typename enable_if_interoperable::type // exposition +bool +operator <=(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return 0 >= iterator_core_access + ::distance_from(*static_cast(&lhs), + *static_cast(&rhs)); +} + +template +inline __host__ __device__ +// XXX it might be nice to implement this at some point +//typename enable_if_interoperable::type // exposition +bool +operator >=(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return 0 <= iterator_core_access + ::distance_from(*static_cast(&lhs), + *static_cast(&rhs)); +} + +// Iterator difference +template +inline __host__ __device__ + +// divine the type this operator returns +typename thrust::detail::distance_from_result< + iterator_facade, + iterator_facade +>::type + +operator-(iterator_facade const& lhs, + iterator_facade const& rhs) +{ + return iterator_core_access + ::distance_from(*static_cast(&lhs), + *static_cast(&rhs)); +} + +// Iterator addition +template +inline __host__ __device__ +Derived operator+ (iterator_facade const& i, + typename Derived::difference_type n) +{ + Derived tmp(static_cast(i)); + return tmp += n; +} + +template +inline __host__ __device__ +Derived operator+ (typename Derived::difference_type n, + iterator_facade const& i) +{ + Derived tmp(static_cast(i)); + return tmp += n; +} + +/*! \endcond + */ + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + diff --git a/compat/thrust/iterator/iterator_traits.h b/compat/thrust/iterator/iterator_traits.h new file mode 100644 index 0000000..a16f219 --- /dev/null +++ b/compat/thrust/iterator/iterator_traits.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/iterator_traits.h + * \brief Traits and metafunctions for reasoning about the traits of iterators + */ + +/* + * (C) Copyright David Abrahams 2003. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +/*! \p iterator_traits is a type trait class that provides a uniform + * interface for querying the properties of iterators at compile-time. + */ +template + struct iterator_traits + : public std::iterator_traits +{ +}; // end iterator_traits + + +template struct iterator_value; + +template struct iterator_pointer; + +template struct iterator_reference; + +template struct iterator_difference; + +template struct iterator_traversal; + +template struct iterator_system; + +// TODO remove this in Thrust v1.7.0 +template + struct THRUST_DEPRECATED iterator_space +{ + typedef THRUST_DEPRECATED typename iterator_system::type type; +}; + + +} // end thrust + +#include +#include +#include +#include +#include + diff --git a/compat/thrust/iterator/permutation_iterator.h b/compat/thrust/iterator/permutation_iterator.h new file mode 100644 index 0000000..509097b --- /dev/null +++ b/compat/thrust/iterator/permutation_iterator.h @@ -0,0 +1,210 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/iterator/permutation_iterator.h + * \brief An iterator which performs a gather or scatter operation when dereferenced + */ + +/* + * (C) Copyright Toon Knapen 2001. + * (C) Copyright David Abrahams 2003. + * (C) Copyright Roland Richter 2003. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p permutation_iterator is an iterator which represents a pointer into a + * reordered view of a given range. \p permutation_iterator is an imprecise name; + * the reordered view need not be a strict permutation. This iterator is useful + * for fusing a scatter or gather operation with other algorithms. + * + * This iterator takes two arguments: + * + * - an iterator to the range \c V on which the "permutation" will be applied + * - the reindexing scheme that defines how the elements of \c V will be permuted. + * + * Note that \p permutation_iterator is not limited to strict permutations of the + * given range \c V. The distance between begin and end of the reindexing iterators + * is allowed to be smaller compared to the size of the range \c V, in which case + * the \p permutation_iterator only provides a "permutation" of a subrange of \c V. + * The indices neither need to be unique. In this same context, it must be noted + * that the past-the-end \p permutation_iterator is completely defined by means of + * the past-the-end iterator to the indices. + * + * The following code snippet demonstrates how to create a \p permutation_iterator + * which represents a reordering of the contents of a \p device_vector. + * + * \code + * #include + * #include + * ... + * thrust::device_vector values(4); + * values[0] = 10.0f; + * values[1] = 20.0f; + * values[2] = 30.0f; + * values[3] = 40.0f; + * values[4] = 50.0f; + * values[5] = 60.0f; + * values[6] = 70.0f; + * values[7] = 80.0f; + * + * thrust::device_vector indices(4); + * indices[0] = 2; + * indices[1] = 6; + * indices[2] = 1; + * indices[3] = 3; + * + * typedef thrust::device_vector::iterator ElementIterator; + * typedef thrust::device_vector::iterator IndexIterator; + * + * thrust::permutation_iterator iter(values.begin(), indices.begin()); + * + * *iter; // returns 30.0f; + * iter[0]; // returns 30.0f; + * iter[1]; // returns 70.0f; + * iter[2]; // returns 20.0f; + * iter[3]; // returns 40.0f; + * + * // iter[4] is an out-of-bounds error + * + * *iter = -1.0f; // sets values[2] to -1.0f; + * iter[0] = -1.0f; // sets values[2] to -1.0f; + * iter[1] = -1.0f; // sets values[6] to -1.0f; + * iter[2] = -1.0f; // sets values[1] to -1.0f; + * iter[3] = -1.0f; // sets values[3] to -1.0f; + * + * // values is now {10, -1, -1, -1, 50, 60, -1, 80} + * \endcode + * + * \see make_permutation_iterator + */ +template + class permutation_iterator + : public thrust::detail::permutation_iterator_base< + ElementIterator, + IndexIterator + >::type +{ + /*! \cond + */ + private: + typedef typename detail::permutation_iterator_base::type super_t; + + friend class thrust::iterator_core_access; + /*! \endcond + */ + + public: + /*! Null constructor calls the null constructor of this \p permutation_iterator's + * element iterator. + */ + __host__ __device__ + permutation_iterator() + : m_element_iterator() {} + + /*! Constructor accepts an \c ElementIterator into a range of values and an + * \c IndexIterator into a range of indices defining the indexing scheme on the + * values. + * + * \param x An \c ElementIterator pointing this \p permutation_iterator's range of values. + * \param y An \c IndexIterator pointing to an indexing scheme to use on \p x. + */ + __host__ __device__ + explicit permutation_iterator(ElementIterator x, IndexIterator y) + : super_t(y), m_element_iterator(x) {} + + /*! Copy constructor accepts a related \p permutation_iterator. + * \param r A compatible \p permutation_iterator to copy from. + */ + template + __host__ __device__ + permutation_iterator(permutation_iterator const &r + // XXX remove these guards when we have static_assert + , typename detail::enable_if_convertible::type* = 0 + , typename detail::enable_if_convertible::type* = 0 + ) + : super_t(r.base()), m_element_iterator(r.m_element_iterator) + {} + + /*! \cond + */ + private: + __thrust_hd_warning_disable__ + __host__ __device__ + typename super_t::reference dereference() const + { + return *(m_element_iterator + *this->base()); + } + + // make friends for the copy constructor + template friend class permutation_iterator; + + ElementIterator m_element_iterator; + /*! \endcond + */ +}; // end permutation_iterator + + +/*! \p make_permutation_iterator creates a \p permutation_iterator + * from an \c ElementIterator pointing to a range of elements to "permute" + * and an \c IndexIterator pointing to a range of indices defining an indexing + * scheme on the values. + * + * \param e An \c ElementIterator pointing to a range of values. + * \param i An \c IndexIterator pointing to an indexing scheme to use on \p e. + * \return A new \p permutation_iterator which permutes the range \p e by \p i. + * \see permutation_iterator + */ +template +__host__ __device__ +permutation_iterator make_permutation_iterator(ElementIterator e, IndexIterator i) +{ + return permutation_iterator(e,i); +} + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + diff --git a/compat/thrust/iterator/retag.h b/compat/thrust/iterator/retag.h new file mode 100644 index 0000000..660da8f --- /dev/null +++ b/compat/thrust/iterator/retag.h @@ -0,0 +1,68 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/iterator/retag.h + * \brief Functionality for altering an iterator's associated system. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \ingroup iterator_tags + * \{ + */ + +#if 0 +/*! \p reinterpret_tag returns a copy of an iterator and changes the type of the result's system tag. + * \tparam Tag Any system tag. + * \tparam Iterator Any iterator type. + * \param iter The iterator of interest. + * \return An iterator of unspecified type whose system tag is \p Tag and whose behavior is otherwise + * equivalent to \p iter. + * \note Unlike \p retag, \p reinterpret_tag does not enforce that the converted-to system tag be + * related to the converted-from system tag. + * \see retag + */ +template +unspecified_iterator_type reinterpret_tag(Iterator iter); + +/*! \p retag returns a copy of an iterator and changes the type of the result's system tag. + * \tparam Tag \p Tag shall be convertible to thrust::iterator_system::type, + * or thrust::iterator_system::type is a base type of \p Tag. + * \tparam Iterator Any iterator type. + * \param iter The iterator of interest. + * \return An iterator of unspecified type whose system tag is \p Tag and whose behavior is + * otherwise equivalent to \p iter. + * \note Unlike \p reinterpret_tag, \p retag enforces that the converted-to system tag be + * related to the converted-from system tag. + * \see reinterpret_tag + */ +template +unspecified_iterator_type retag(Iterator iter); +#endif + +/*! \} // iterator_tags + */ + + +} // end thrust + diff --git a/compat/thrust/iterator/reverse_iterator.h b/compat/thrust/iterator/reverse_iterator.h new file mode 100644 index 0000000..03f0339 --- /dev/null +++ b/compat/thrust/iterator/reverse_iterator.h @@ -0,0 +1,238 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/reverse_iterator.h + * \brief An iterator adaptor which adapts another iterator to traverse backwards + */ + +/* + * (C) Copyright David Abrahams 2002. + * (C) Copyright Jeremy Siek 2002. + * (C) Copyright Thomas Witt 2002. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p reverse_iterator is an iterator which represents a pointer into a + * reversed view of a given range. In this way, \p reverse_iterator allows + * backwards iteration through a bidirectional input range. + * + * It is important to note that although \p reverse_iterator is constructed + * from a given iterator, it points to the element preceding it. In this way, + * the past-the-end \p reverse_iterator of a given range points to the element + * preceding the first element of the input range. By the same token, the first + * \p reverse_iterator of a given range is constructed from a past-the-end iterator + * of the original range yet points to the last element of the input. + * + * The following code snippet demonstrates how to create a \p reverse_iterator + * which represents a reversed view of the contents of a \p device_vector. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(4); + * v[0] = 0.0f; + * v[1] = 1.0f; + * v[2] = 2.0f; + * v[3] = 3.0f; + * + * typedef thrust::device_vector::iterator Iterator; + * + * // note that we point the iterator to the *end* of the device_vector + * thrust::reverse_iterator iter(values.end()); + * + * *iter; // returns 3.0f; + * iter[0]; // returns 3.0f; + * iter[1]; // returns 2.0f; + * iter[2]; // returns 1.0f; + * iter[3]; // returns 0.0f; + * + * // iter[4] is an out-of-bounds error + * \endcode + * + * Since reversing a range is a common operation, containers like \p device_vector + * have nested typedefs for declaration shorthand and methods for constructing + * reverse_iterators. The following code snippet is equivalent to the previous: + * + * \code + * #include + * ... + * thrust::device_vector v(4); + * v[0] = 0.0f; + * v[1] = 1.0f; + * v[2] = 2.0f; + * v[3] = 3.0f; + * + * // we use the nested type reverse_iterator to refer to a reversed view of + * // a device_vector and the method rbegin() to create a reverse_iterator pointing + * // to the beginning of the reversed device_vector + * thrust::device_iterator::reverse_iterator iter = values.rbegin(); + * + * *iter; // returns 3.0f; + * iter[0]; // returns 3.0f; + * iter[1]; // returns 2.0f; + * iter[2]; // returns 1.0f; + * iter[3]; // returns 0.0f; + * + * // iter[4] is an out-of-bounds error + * + * // similarly, rend() points to the end of the reversed sequence: + * assert(values.rend() == (iter + 4)); + * \endcode + * + * Finally, the following code snippet demonstrates how to use reverse_iterator to + * perform a reversed prefix sum operation on the contents of a device_vector: + * + * \code + * #include + * #include + * ... + * thrust::device_vector v(5); + * v[0] = 0; + * v[1] = 1; + * v[2] = 2; + * v[3] = 3; + * v[4] = 4; + * + * thrust::device_vector result(5); + * + * // exclusive scan v into result in reverse + * thrust::exclusive_scan(v.rbegin(), v.rend(), result.begin()); + * + * // result is now {0, 4, 7, 9, 10} + * \endcode + * + * \see make_reverse_iterator + */ +template + class reverse_iterator + : public detail::reverse_iterator_base::type +{ + /*! \cond + */ + private: + typedef typename thrust::detail::reverse_iterator_base< + BidirectionalIterator + >::type super_t; + + friend class thrust::iterator_core_access; + /*! \endcond + */ + + public: + /*! Default constructor does nothing. + */ + __host__ __device__ + reverse_iterator(void) {} + + /*! \p Constructor accepts a \c BidirectionalIterator pointing to a range + * for this \p reverse_iterator to reverse. + * + * \param x A \c BidirectionalIterator pointing to a range to reverse. + */ + __host__ __device__ + explicit reverse_iterator(BidirectionalIterator x); + + /*! \p Copy constructor allows construction from a related compatible + * \p reverse_iterator. + * + * \param r A \p reverse_iterator to copy from. + */ + template + __host__ __device__ + reverse_iterator(reverse_iterator const &r +// XXX msvc screws this up +// XXX remove these guards when we have static_assert +#ifndef _MSC_VER + , typename thrust::detail::enable_if< + thrust::detail::is_convertible< + OtherBidirectionalIterator, + BidirectionalIterator + >::value + >::type * = 0 +#endif // _MSC_VER + ); + + /*! \cond + */ + private: + __thrust_hd_warning_disable__ + __host__ __device__ + typename super_t::reference dereference(void) const; + + __host__ __device__ + void increment(void); + + __host__ __device__ + void decrement(void); + + __host__ __device__ + void advance(typename super_t::difference_type n); + + template + __host__ __device__ + typename super_t::difference_type + distance_to(reverse_iterator const &y) const; + /*! \endcond + */ +}; // end reverse_iterator + + +/*! \p make_reverse_iterator creates a \p reverse_iterator + * from a \c BidirectionalIterator pointing to a range of elements to reverse. + * + * \param x A \c BidirectionalIterator pointing to a range to reverse. + * \return A new \p reverse_iterator which reverses the range \p x. + */ +template +__host__ __device__ +reverse_iterator make_reverse_iterator(BidirectionalIterator x); + + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + +#include + diff --git a/compat/thrust/iterator/transform_iterator.h b/compat/thrust/iterator/transform_iterator.h new file mode 100644 index 0000000..985b61b --- /dev/null +++ b/compat/thrust/iterator/transform_iterator.h @@ -0,0 +1,344 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/transform_iterator.h + * \brief An iterator which adapts another iterator by applying a function to the result of its dereference + */ + +/* + * (C) Copyright David Abrahams 2002. + * (C) Copyright Jeremy Siek 2002. + * (C) Copyright Thomas Witt 2002. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include + +// #include the details first +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p transform_iterator is an iterator which represents a pointer into a range + * of values after transformation by a function. This iterator is useful for + * creating a range filled with the result of applying an operation to another range + * without either explicitly storing it in memory, or explicitly executing the transformation. + * Using \p transform_iterator facilitates kernel fusion by deferring the execution + * of a transformation until the value is needed while saving both memory capacity + * and bandwidth. + * + * The following code snippet demonstrates how to create a \p transform_iterator + * which represents the result of \c sqrtf applied to the contents of a \p device_vector. + * + * \code + * #include + * #include + * + * // note: functor inherits from unary_function + * struct square_root : public thrust::unary_function + * { + * __host__ __device__ + * float operator()(float x) const + * { + * return sqrtf(x); + * } + * }; + * + * int main(void) + * { + * thrust::device_vector v(4); + * v[0] = 1.0f; + * v[1] = 4.0f; + * v[2] = 9.0f; + * v[3] = 16.0f; + * + * typedef thrust::device_vector::iterator FloatIterator; + * + * thrust::transform_iterator iter(v.begin(), square_root()); + * + * *iter; // returns 1.0f + * iter[0]; // returns 1.0f; + * iter[1]; // returns 2.0f; + * iter[2]; // returns 3.0f; + * iter[3]; // returns 4.0f; + * + * // iter[4] is an out-of-bounds error + * } + * \endcode + * + * This next example demonstrates how to use a \p transform_iterator with the + * \p thrust::reduce function to compute the sum of squares of a sequence. + * We will create temporary \p transform_iterators with the + * \p make_transform_iterator function in order to avoid explicitly specifying their type: + * + * \code + * #include + * #include + * #include + * #include + * + * // note: functor inherits from unary_function + * struct square : public thrust::unary_function + * { + * __host__ __device__ + * float operator()(float x) const + * { + * return x * x; + * } + * }; + * + * int main(void) + * { + * // initialize a device array + * thrust::device_vector v(4); + * v[0] = 1.0f; + * v[1] = 2.0f; + * v[2] = 3.0f; + * v[3] = 4.0f; + * + * float sum_of_squares = + * thrust::reduce(thrust::make_transform_iterator(v.begin(), square()), + * thrust::make_transform_iterator(v.end(), square())); + * + * std::cout << "sum of squares: " << sum_of_squares << std::endl; + * return 0; + * } + * \endcode + * + * Note that in the previous two examples the transform functor (namely \c square_root + * and \c square) inherits from \c thrust::unary_function. Inheriting from + * \c thrust::unary_function ensures that a functor is a valid \c AdaptableUnaryFunction + * and provides all the necessary \c typedef declarations. The \p transform_iterator + * can also be applied to a \c UnaryFunction that does not inherit from + * \c thrust::unary_function using an optional template argument. The following example + * illustrates how to use the third template argument to specify the \c result_type of + * the function. + * + * \code + * #include + * #include + * + * // note: functor *does not* inherit from unary_function + * struct square_root + * { + * __host__ __device__ + * float operator()(float x) const + * { + * return sqrtf(x); + * } + * }; + * + * int main(void) + * { + * thrust::device_vector v(4); + * v[0] = 1.0f; + * v[1] = 4.0f; + * v[2] = 9.0f; + * v[3] = 16.0f; + * + * typedef thrust::device_vector::iterator FloatIterator; + * + * // note: float result_type is specified explicitly + * thrust::transform_iterator iter(v.begin(), square_root()); + * + * *iter; // returns 1.0f + * iter[0]; // returns 1.0f; + * iter[1]; // returns 2.0f; + * iter[2]; // returns 3.0f; + * iter[3]; // returns 4.0f; + * + * // iter[4] is an out-of-bounds error + * } + * \endcode + * + * \see make_transform_iterator + */ +template + class transform_iterator + : public detail::transform_iterator_base::type +{ + /*! \cond + */ + public: + typedef typename + detail::transform_iterator_base::type + super_t; + + friend class thrust::iterator_core_access; + /*! \endcond + */ + + public: + /*! Null constructor does nothing. + */ + __host__ __device__ + transform_iterator() {} + + /*! This constructor takes as arguments an \c Iterator and an \c AdaptableUnaryFunction + * and copies them to a new \p transform_iterator. + * + * \param x An \c Iterator pointing to the input to this \p transform_iterator's \c AdaptableUnaryFunction. + * \param f An \c AdaptableUnaryFunction used to transform the objects pointed to by \p x. + */ + __host__ __device__ + transform_iterator(Iterator const& x, AdaptableUnaryFunction f) + : super_t(x), m_f(f) { + } + + /*! This explicit constructor copies the value of a given \c Iterator and creates + * this \p transform_iterator's \c AdaptableUnaryFunction using its null constructor. + * + * \param x An \c Iterator to copy. + */ + __host__ __device__ + explicit transform_iterator(Iterator const& x) + : super_t(x) { } + + /*! This copy constructor creates a new \p transform_iterator from another + * \p transform_iterator. + * + * \param other The \p transform_iterator to copy. + */ + template + __host__ __device__ + transform_iterator(const transform_iterator &other, + typename thrust::detail::enable_if_convertible::type* = 0, + typename thrust::detail::enable_if_convertible::type* = 0) + : super_t(other.base()), m_f(other.functor()) {} + + /*! Copy assignment operator copies from another \p transform_iterator. + * \p other The other \p transform_iterator to copy + * \return *this + * + * \note If the type of this \p transform_iterator's functor is not copy assignable + * (for example, if it is a lambda) it is not an error to call this function. + * In this case, however, the functor will not be modified. + * + * In any case, this \p transform_iterator's underlying iterator will be copy assigned. + */ + __host__ __device__ + transform_iterator &operator=(const transform_iterator &other) + { + return do_assign(other, + // XXX gcc 4.2.1 crashes on is_copy_assignable; just assume the functor is assignable as a WAR +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION <= 40201) + thrust::detail::true_type() +#else + typename thrust::detail::is_copy_assignable::type() +#endif // THRUST_HOST_COMPILER + ); + } + + /*! This method returns a copy of this \p transform_iterator's \c AdaptableUnaryFunction. + * \return A copy of this \p transform_iterator's \c AdaptableUnaryFunction. + */ + __host__ __device__ + AdaptableUnaryFunction functor() const + { return m_f; } + + /*! \cond + */ + private: + __host__ __device__ + transform_iterator &do_assign(const transform_iterator &other, thrust::detail::true_type) + { + super_t::operator=(other); + + // do assign to m_f + m_f = other.functor(); + + return *this; + } + + __host__ __device__ + transform_iterator &do_assign(const transform_iterator &other, thrust::detail::false_type) + { + super_t::operator=(other); + + // don't assign to m_f + + return *this; + } + + __thrust_hd_warning_disable__ + __host__ __device__ + typename super_t::reference dereference() const + { + // XXX consider making this a member instead of a temporary created inside dereference + thrust::detail::host_device_function wrapped_f(m_f); + + return wrapped_f(*this->base()); + } + + // tag this as mutable per Dave Abrahams in this thread: + // http://lists.boost.org/Archives/boost/2004/05/65332.php + mutable AdaptableUnaryFunction m_f; + + /*! \endcond + */ +}; // end transform_iterator + + +/*! \p make_transform_iterator creates a \p transform_iterator + * from an \c Iterator and \c AdaptableUnaryFunction. + * + * \param it The \c Iterator pointing to the input range of the + * newly created \p transform_iterator. + * \param fun The \c AdaptableUnaryFunction used to transform the range pointed + * to by \p it in the newly created \p transform_iterator. + * \return A new \p transform_iterator which transforms the range at + * \p it by \p fun. + * \see transform_iterator + */ +template +inline __host__ __device__ +transform_iterator +make_transform_iterator(Iterator it, AdaptableUnaryFunction fun) +{ + return transform_iterator(it, fun); +} // end make_transform_iterator + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + diff --git a/compat/thrust/iterator/zip_iterator.h b/compat/thrust/iterator/zip_iterator.h new file mode 100644 index 0000000..8e7299c --- /dev/null +++ b/compat/thrust/iterator/zip_iterator.h @@ -0,0 +1,245 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/iterator/zip_iterator.h + * \brief An iterator which returns a tuple of the result of dereferencing + * a tuple of iterators when dereferenced + */ + +/* + * Copyright David Abrahams and Thomas Becker 2000-2006. + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p zip_iterator is an iterator which represents a pointer into a range + * of \p tuples whose elements are themselves taken from a \p tuple of input + * iterators. This iterator is useful for creating a virtual array of structures + * while achieving the same performance and bandwidth as the structure of arrays + * idiom. \p zip_iterator also facilitates kernel fusion by providing a convenient + * means of amortizing the execution of the same operation over multiple ranges. + * + * The following code snippet demonstrates how to create a \p zip_iterator + * which represents the result of "zipping" multiple ranges together. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector int_v(3); + * int_v[0] = 0; int_v[1] = 1; int_v[2] = 2; + * + * thrust::device_vector float_v(3); + * float_v[0] = 0.0f; float_v[1] = 1.0;f float_v[2] = 2.0f; + * + * thrust::device_vector char_v(3); + * char_v[0] = 'a'; char_v[1] = 'b'; char_v[2] = 'c'; + * + * // typedef these iterators for shorthand + * typedef thrust::device_vector::iterator IntIterator; + * typedef thrust::device_vector::iterator FloatIterator; + * typedef thrust::device_vector::iterator CharIterator; + * + * // typedef a tuple of these iterators + * typedef thrust::tuple IteratorTuple; + * + * // typedef the zip_iterator of this tuple + * typedef thrust::zip_iterator ZipIterator; + * + * // finally, create the zip_iterator + * ZipIterator iter(thrust::make_tuple(int_v.begin(), float_v.begin(), char_v.begin())); + * + * *iter; // returns (0, 0.0f, 'a') + * iter[0]; // returns (0, 0.0f, 'a') + * iter[1]; // returns (1, 1.0f, 'b') + * iter[2]; // returns (2, 2.0f, 'c') + * + * thrust::get<0>(iter[2]); // returns 2 + * thrust::get<1>(iter[0]); // returns 0.0f + * thrust::get<2>(iter[1]); // returns 'b' + * + * // iter[3] is an out-of-bounds error + * \endcode + * + * Defining the type of a \p zip_iterator can be complex. The next code example demonstrates + * how to use the \p make_zip_iterator function with the \p make_tuple function to avoid + * explicitly specifying the type of the \p zip_iterator. This example shows how to use + * \p zip_iterator to copy multiple ranges with a single call to \p thrust::copy. + * + * \code + * #include + * #include + * #include + * + * int main(void) + * { + * thrust::device_vector int_in(3), int_out(3); + * int_in[0] = 0; + * int_in[1] = 1; + * int_in[2] = 2; + * + * thrust::device_vector float_in(3), float_out(3); + * float_in[0] = 0.0f; + * float_in[1] = 10.0f; + * float_in[2] = 20.0f; + * + * thrust::copy(thrust::make_zip_iterator(thrust::make_tuple(int_in.begin(), float_in.begin())), + * thrust::make_zip_iterator(thrust::make_tuple(int_in.end(), float_in.end())), + * thrust::make_zip_iterator(thrust::make_tuple(int_out.begin(),float_out.begin()))); + * + * // int_out is now [0, 1, 2] + * // float_out is now [0.0f, 10.0f, 20.0f] + * + * return 0; + * } + * \endcode + * + * \see make_zip_iterator + * \see make_tuple + * \see tuple + * \see get + */ +template + class zip_iterator + : public detail::zip_iterator_base::type +{ + public: + /*! Null constructor does nothing. + */ + inline __host__ __device__ + zip_iterator(void); + + /*! This constructor creates a new \p zip_iterator from a + * \p tuple of iterators. + * + * \param iterator_tuple The \p tuple of iterators to copy from. + */ + inline __host__ __device__ + zip_iterator(IteratorTuple iterator_tuple); + + /*! This copy constructor creates a new \p zip_iterator from another + * \p zip_iterator. + * + * \param other The \p zip_iterator to copy. + */ + template + inline __host__ __device__ + zip_iterator(const zip_iterator &other, + typename thrust::detail::enable_if_convertible< + OtherIteratorTuple, + IteratorTuple + >::type * = 0); + + /*! This method returns a \c const reference to this \p zip_iterator's + * \p tuple of iterators. + * + * \return A \c const reference to this \p zip_iterator's \p tuple + * of iterators. + */ + inline __host__ __device__ + const IteratorTuple &get_iterator_tuple() const; + + /*! \cond + */ + private: + typedef typename + detail::zip_iterator_base::type super_t; + + friend class thrust::iterator_core_access; + + // Dereferencing returns a tuple built from the dereferenced + // iterators in the iterator tuple. + __host__ __device__ + typename super_t::reference dereference() const; + + // Two zip_iterators are equal if the two first iterators of the + // tuple are equal. Note this differs from Boost's implementation, which + // considers the entire tuple. + template + inline __host__ __device__ + bool equal(const zip_iterator &other) const; + + // Advancing a zip_iterator means to advance all iterators in the tuple + inline __host__ __device__ + void advance(typename super_t::difference_type n); + + // Incrementing a zip iterator means to increment all iterators in the tuple + inline __host__ __device__ + void increment(); + + // Decrementing a zip iterator means to decrement all iterators in the tuple + inline __host__ __device__ + void decrement(); + + // Distance is calculated using the first iterator in the tuple. + template + inline __host__ __device__ + typename super_t::difference_type + distance_to(const zip_iterator &other) const; + + // The iterator tuple. + IteratorTuple m_iterator_tuple; + + /*! \endcond + */ +}; // end zip_iterator + +/*! \p make_zip_iterator creates a \p zip_iterator from a \p tuple + * of iterators. + * + * \param t The \p tuple of iterators to copy. + * \return A newly created \p zip_iterator which zips the iterators encapsulated in \p t. + * + * \see zip_iterator + */ +template +inline __host__ __device__ +zip_iterator make_zip_iterator(IteratorTuple t); + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +} // end thrust + +#include + diff --git a/compat/thrust/logical.h b/compat/thrust/logical.h new file mode 100644 index 0000000..21510f3 --- /dev/null +++ b/compat/thrust/logical.h @@ -0,0 +1,276 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file logical.h + * \brief Logical operations on ranges + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reductions + * \{ + * \addtogroup logical + * \ingroup reductions + * \{ + */ + + +/*! \p all_of determines whether all elements in a range satify a predicate. + * Specifically, \p all_of returns \c true if pred(*i) is \c true + * for every iterator \c i in the range [first, last) and + * \c false otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred A predicate used to test range elements. + * \return \c true, if all elements satisfy the predicate; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * \tparam Predicate must be a model of Predicate. + * + * \code + * #include + * #include + * #include + * ... + * bool A[3] = {true, true, false}; + * + * thrust::all_of(thrust::host, A, A + 2, thrust::identity()); // returns true + * thrust::all_of(thrust::host, A, A + 3, thrust::identity()); // returns false + * + * // empty range + * thrust::all_of(thrust::host, A, A, thrust::identity()); // returns false + * + * \endcode + * + * \see any_of + * \see none_of + * \see transform_reduce + */ +template +bool all_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); + + +/*! \p all_of determines whether all elements in a range satify a predicate. + * Specifically, \p all_of returns \c true if pred(*i) is \c true + * for every iterator \c i in the range [first, last) and + * \c false otherwise. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred A predicate used to test range elements. + * \return \c true, if all elements satisfy the predicate; \c false, otherwise. + * + * \tparam InputIterator is a model of Input Iterator, + * \tparam Predicate must be a model of Predicate. + * + * \code + * #include + * #include + * ... + * bool A[3] = {true, true, false}; + * + * thrust::all_of(A, A + 2, thrust::identity()); // returns true + * thrust::all_of(A, A + 3, thrust::identity()); // returns false + * + * // empty range + * thrust::all_of(A, A, thrust::identity()); // returns false + * + * \endcode + * + * \see any_of + * \see none_of + * \see transform_reduce + */ +template +bool all_of(InputIterator first, InputIterator last, Predicate pred); + + +/*! \p any_of determines whether any element in a range satifies a predicate. + * Specifically, \p any_of returns \c true if pred(*i) is \c true + * for any iterator \c i in the range [first, last) and + * \c false otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred A predicate used to test range elements. + * \return \c true, if any element satisfies the predicate; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * \tparam Predicate must be a model of Predicate. + * + * \code + * #include + * #include + * #include + * ... + * bool A[3] = {true, true, false}; + * + * thrust::any_of(thrust::host, A, A + 2, thrust::identity()); // returns true + * thrust::any_of(thrust::host, A, A + 3, thrust::identity()); // returns true + * + * thrust::any_of(thrust::host, A + 2, A + 3, thrust::identity()); // returns false + * + * // empty range + * thrust::any_of(thrust::host, A, A, thrust::identity()); // returns false + * \endcode + * + * \see all_of + * \see none_of + * \see transform_reduce + */ +template +bool any_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); + + +/*! \p any_of determines whether any element in a range satifies a predicate. + * Specifically, \p any_of returns \c true if pred(*i) is \c true + * for any iterator \c i in the range [first, last) and + * \c false otherwise. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred A predicate used to test range elements. + * \return \c true, if any element satisfies the predicate; \c false, otherwise. + * + * \tparam InputIterator is a model of Input Iterator, + * \tparam Predicate must be a model of Predicate. + * + * \code + * #include + * #include + * ... + * bool A[3] = {true, true, false}; + * + * thrust::any_of(A, A + 2, thrust::identity()); // returns true + * thrust::any_of(A, A + 3, thrust::identity()); // returns true + * + * thrust::any_of(A + 2, A + 3, thrust::identity()); // returns false + * + * // empty range + * thrust::any_of(A, A, thrust::identity()); // returns false + * \endcode + * + * \see all_of + * \see none_of + * \see transform_reduce + */ +template +bool any_of(InputIterator first, InputIterator last, Predicate pred); + + +/*! \p none_of determines whether no element in a range satifies a predicate. + * Specifically, \p none_of returns \c true if there is no iterator \c i in + * the range [first, last) such that pred(*i) is \c true, + * and \c false otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred A predicate used to test range elements. + * \return \c true, if no element satisfies the predicate; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * \tparam Predicate must be a model of Predicate. + * + * \code + * #include + * #include + * #include + * ... + * bool A[3] = {true, true, false}; + * + * thrust::none_of(thrust::host, A, A + 2, thrust::identity()); // returns false + * thrust::none_of(thrust::host, A, A + 3, thrust::identity()); // returns false + * + * thrust::none_of(thrust::host, A + 2, A + 3, thrust::identity()); // returns true + * + * // empty range + * thrust::none_of(thrust::host, A, A, thrust::identity()); // returns true + * \endcode + * + * \see all_of + * \see any_of + * \see transform_reduce + */ +template +bool none_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); + + +/*! \p none_of determines whether no element in a range satifies a predicate. + * Specifically, \p none_of returns \c true if there is no iterator \c i in + * the range [first, last) such that pred(*i) is \c true, + * and \c false otherwise. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param pred A predicate used to test range elements. + * \return \c true, if no element satisfies the predicate; \c false, otherwise. + * + * \tparam InputIterator is a model of Input Iterator, + * \tparam Predicate must be a model of Predicate. + * + * \code + * #include + * #include + * ... + * bool A[3] = {true, true, false}; + * + * thrust::none_of(A, A + 2, thrust::identity()); // returns false + * thrust::none_of(A, A + 3, thrust::identity()); // returns false + * + * thrust::none_of(A + 2, A + 3, thrust::identity()); // returns true + * + * // empty range + * thrust::none_of(A, A, thrust::identity()); // returns true + * \endcode + * + * \see all_of + * \see any_of + * \see transform_reduce + */ +template +bool none_of(InputIterator first, InputIterator last, Predicate pred); + + +/*! \} // end logical + * \} // end reductions + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/memory.h b/compat/thrust/memory.h new file mode 100644 index 0000000..6362de4 --- /dev/null +++ b/compat/thrust/memory.h @@ -0,0 +1,538 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/memory.h + * \brief Abstractions for Thrust's memory model. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup memory_management Memory Management + * \addtogroup memory_management_classes Memory Management Classes + * \ingroup memory_management + * \{ + */ + +/*! \p pointer stores a pointer to an object allocated in memory. Like \p device_ptr, this + * type ensures type safety when dispatching standard algorithms on ranges resident in memory. + * + * \p pointer generalizes \p device_ptr by relaxing the backend system associated with the \p pointer. + * Instead of the backend system specified by \p THRUST_DEFAULT_DEVICE_BACKEND, \p pointer's + * system is given by its second template parameter, \p Tag. For the purpose of Thrust dispatch, + * device_ptr and pointer are considered equivalent. + * + * The raw pointer encapsulated by a \p pointer may be obtained through its get member function + * or the \p raw_pointer_cast free function. + * + * \tparam Element specifies the type of the pointed-to object. + * + * \tparam Tag specifies the system with which this \p pointer is associated. This may be any Thrust + * backend system, or a user-defined tag. + * + * \tparam Reference allows the client to specify the reference type returned upon derereference. + * By default, this type is reference. + * + * \tparam Derived allows the client to specify the name of the derived type when \p pointer is used as + * a base class. This is useful to ensure that arithmetic on values of the derived type return + * values of the derived type as a result. By default, this type is pointer. + * + * \note \p pointer is not a smart pointer; it is the client's responsibility to deallocate memory + * pointer to by \p pointer. + * + * \see device_ptr + * \see reference + * \see raw_pointer_cast + */ +// define pointer for the purpose of Doxygenating it +// it is actually defined elsewhere +#if 0 +template + class pointer +{ + public: + /*! The type of the raw pointer + */ + typedef typename super_t::base_type raw_pointer; + + /*! \p pointer's default constructor initializes its encapsulated pointer to \c 0 + */ + __host__ __device__ + pointer(); + + /*! This constructor allows construction of a pointer from a T*. + * + * \param ptr A raw pointer to copy from, presumed to point to a location in \p Tag's memory. + * \tparam OtherElement \p OtherElement shall be convertible to \p Element. + */ + template + __host__ __device__ + explicit pointer(OtherElement *ptr); + + /*! This contructor allows initialization from another pointer-like object. + * + * \param other The \p OtherPointer to copy. + * + * \tparam OtherPointer The tag associated with \p OtherPointer shall be convertible to \p Tag, + * and its element type shall be convertible to \p Element. + */ + template + __host__ __device__ + pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type * = 0); + + /*! Assignment operator allows assigning from another pointer-like object with related type. + * + * \param other The other pointer-like object to assign from. + * \return *this + * + * \tparam OtherPointer The tag associated with \p OtherPointer shall be convertible to \p Tag, + * and its element type shall be convertible to \p Element. + */ + template + __host__ __device__ + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + derived_type & + >::type + operator=(const OtherPointer &other); + + /*! \p get returns this \p pointer's encapsulated raw pointer. + * \return This \p pointer's raw pointer. + */ + __host__ __device__ + Element *get() const; +}; +#endif + +/*! \p reference is a wrapped reference to an object stored in memory. \p reference generalizes + * \p device_reference by relaxing the type of pointer associated with the object. \p reference + * is the type of the result of dereferencing a tagged pointer-like object such as \p pointer, and + * intermediates operations on objects existing in a remote memory. + * + * \tparam Element specifies the type of the referent object. + * \tparam Pointer specifies the type of the result of taking the address of \p reference. + * \tparam Derived allows the client to specify the name of the derived type when \p reference is used as + * a base class. This is useful to ensure that assignment to objects of the derived type return + * values of the derived type as a result. By default, this type is reference. + */ +// define pointer for the purpose of Doxygenating it +// it is actually defined elsewhere +#if 0 +template + class reference +{ + public: + /*! The type of this \p reference's wrapped pointers. + */ + typedef Pointer pointer; + + /*! The \p value_type of this \p reference. + */ + typedef typename thrust::detail::remove_const::type value_type; + + /*! This copy constructor initializes this \p reference + * to refer to an object pointed to by the given \p pointer. After + * this \p reference is constructed, it shall refer to the + * object pointed to by \p ptr. + * + * \param ptr A \p pointer to copy from. + */ + __host__ __device__ + explicit reference(const pointer &ptr); + + /*! This copy constructor accepts a const reference to another + * \p reference of related type. After this \p reference is constructed, + * it shall refer to the same object as \p other. + * + * \param other A \p reference to copy from. + * \tparam OtherElement the element type of the other \p reference. + * \tparam OtherPointer the pointer type of the other \p reference. + * \tparam OtherDerived the derived type of the other \p reference. + * + * \note This constructor is templated primarily to allow initialization of + * reference from reference. + */ + template + __host__ __device__ + reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type * = 0); + + /*! Copy assignment operator copy assigns from another \p reference. + * + * \param other The other \p reference to assign from. + * \return static_cast(*this) + */ + __host__ __device__ + derived_type &operator=(const reference &other); + + /*! Assignment operator copy assigns from another \p reference of related type. + * + * \param other The other \p reference to assign from. + * \return static_cast(*this) + * + * \tparam OtherElement the element type of the other \p reference. + * \tparam OtherPointer the pointer type of the other \p reference. + * \tparam OtherDerived the derived type of the other \p reference. + */ + template + __host__ __device__ + derived_type &operator=(const reference &other); + + /*! Assignment operator assigns from a \p value_type. + * + * \param x The \p value_type to assign from. + * \return static_cast(*this). + */ + __host__ __device__ + derived_type &operator=(const value_type &x); + + /*! Address-of operator returns a \p pointer pointing to the object + * referenced by this \p reference. It does not return the address of this + * \p reference. + * + * \return A \p pointer pointing to the referenct object. + */ + __host__ __device__ + pointer operator&() const; + + /*! Conversion operator converts this \p reference to \p value_type by + * returning a copy of the referent object. + * + * \return A copy of the referent object. + */ + __host__ __device__ + operator value_type () const; + + /*! Swaps the value of the referent object with another. + * + * \param other The other \p reference with which to swap. + * \note The argument is of type \p derived_type rather than \p reference. + */ + __host__ __device__ + void swap(derived_type &other); + + /*! Prefix increment operator increments the referent object. + * + * \return static_Cast(*this). + * + * \note Documentation for other arithmetic operators omitted for brevity. + */ + derived_type &operator++(); +}; +#endif + +/*! \} + */ + +/*! + * \addtogroup memory_management_functions Memory Management Functions + * \ingroup memory_management + * \{ + */ + + +/*! \addtogroup allocation_functions + * \{ + */ + + +/*! This version of \p malloc allocates untyped uninitialized storage associated with a given system. + * + * \param system The Thrust system with which to associate the storage. + * \param n The number of bytes of storage to allocate. + * \return If allocation succeeds, a pointer to the allocated storage; a null pointer otherwise. + * The pointer must be deallocated with \p thrust::free. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * + * \pre \p DerivedPolicy must be publically derived from thrust::execution_policy. + * + * The following code snippet demonstrates how to use \p malloc to allocate a range of memory + * associated with Thrust's device system. + * + * \code + * #include + * ... + * // allocate some memory with thrust::malloc + * const int N = 100; + * thrust::device_system_tag device_sys; + * thrust::pointer void_ptr = thrust::malloc(device_sys, N); + * + * // manipulate memory + * ... + * + * // deallocate void_ptr with thrust::free + * thrust::free(device_sys, void_ptr); + * \endcode + * + * \see free + * \see device_malloc + */ +template +pointer malloc(const thrust::detail::execution_policy_base &system, std::size_t n); + + +/*! This version of \p malloc allocates typed uninitialized storage associated with a given system. + * + * \param system The Thrust system with which to associate the storage. + * \param n The number of elements of type \c T which the storage should accomodate. + * \return If allocation succeeds, a pointer to an allocation large enough to accomodate \c n + * elements of type \c T; a null pointer otherwise. + * The pointer must be deallocated with \p thrust::free. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * + * \pre \p DerivedPolicy must be publically derived from thrust::execution_policy. + * + * The following code snippet demonstrates how to use \p malloc to allocate a range of memory + * to accomodate integers associated with Thrust's device system. + * + * \code + * #include + * ... + * // allocate storage for 100 ints with thrust::malloc + * const int N = 100; + * thrust::device_system_tag device_sys; + * thrust::pointer ptr = thrust::malloc(device_sys, N); + * + * // manipulate memory + * ... + * + * // deallocate ptr with thrust::free + * thrust::free(device_sys, ptr); + * \endcode + * + * \see free + * \see device_malloc + */ +template +pointer malloc(const thrust::detail::execution_policy_base &system, std::size_t n); + + +/*! \p get_temporary_buffer returns a pointer to storage associated with a given Thrust system sufficient to store up to + * \p n objects of type \c T. If not enough storage is available to accomodate \p n objects, an implementation may return + * a smaller buffer. The number of objects the returned buffer can accomodate is also returned. + * + * Thrust uses \p get_temporary_buffer internally when allocating temporary storage required by algorithm implementations. + * + * The storage allocated with \p get_temporary_buffer must be returned to the system with \p return_temporary_buffer. + * + * \param system The Thrust system with which to associate the storage. + * \param n The requested number of objects of type \c T the storage should accomodate. + * \return A pair \c p such that p.first is a pointer to the allocated storage and p.second is the number of + * contiguous objects of type \c T that the storage can accomodate. If no storage can be allocated, p.first if + * no storage can be obtained. The storage must be returned to the system using \p return_temporary_buffer. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * + * \pre \p DerivedPolicy must be publically derived from thrust::execution_policy. + * + * The following code snippet demonstrates how to use \p get_temporary_buffer to allocate a range of memory + * to accomodate integers associated with Thrust's device system. + * + * \code + * #include + * ... + * // allocate storage for 100 ints with thrust::get_temporary_buffer + * const int N = 100; + * + * typedef thrust::pair< + * thrust::pointer, + * std::ptrdiff_t + * > ptr_and_size_t; + * + * thrust::device_system_tag device_sys; + * ptr_and_size_t ptr_and_size = thrust::get_temporary_buffer(device_sys, N); + * + * // manipulate up to 100 ints + * for(int i = 0; i < ptr_and_size.second; ++i) + * { + * *ptr_and_size.first = i; + * } + * + * // deallocate storage with thrust::return_temporary_buffer + * thrust::return_temporary_buffer(device_sys, ptr_and_size.first); + * \endcode + * + * \see malloc + * \see return_temporary_buffer + */ +template +thrust::pair, typename thrust::pointer::difference_type> +get_temporary_buffer(const thrust::detail::execution_policy_base &system, typename thrust::pointer::difference_type n); + + +/*! \} allocation_functions + */ + + +/*! \addtogroup deallocation_functions + * \{ + */ + + +/*! \p free deallocates the storage previously allocated by \p thrust::malloc. + * + * \param system The Thrust system with which the storage is associated. + * \param ptr A pointer previously returned by \p thrust::malloc. If \p ptr is null, \p free + * does nothing. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * + * \pre \p ptr shall have been returned by a previous call to thrust::malloc(system, n) or thrust::malloc(system, n) for some type \c T. + * + * The following code snippet demonstrates how to use \p free to deallocate a range of memory + * previously allocated with \p thrust::malloc. + * + * \code + * #include + * ... + * // allocate storage for 100 ints with thrust::malloc + * const int N = 100; + * thrust::device_system_tag device_sys; + * thrust::pointer ptr = thrust::malloc(device_sys, N); + * + * // mainpulate memory + * ... + * + * // deallocate ptr with thrust::free + * thrust::free(device_sys, ptr); + * \endcode + */ +template +void free(const thrust::detail::execution_policy_base &system, Pointer ptr); + + +/*! \p return_temporary_buffer deallocates storage associated with a given Thrust system previously allocated by \p get_temporary_buffer. + * + * Thrust uses \p return_temporary_buffer internally when deallocating temporary storage required by algorithm implementations. + * + * \param system The Thrust system with which the storage is associated. + * \param p A pointer previously returned by \p thrust::get_temporary_buffer. If \p ptr is null, \p return_temporary_buffer does nothing. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * + * \pre \p p shall have been previously allocated by \p thrust::get_temporary_buffer. + * + * The following code snippet demonstrates how to use \p return_temporary_buffer to deallocate a range of memory + * previously allocated by \p get_temporary_buffer. + * + * \code + * #include + * ... + * // allocate storage for 100 ints with thrust::get_temporary_buffer + * const int N = 100; + * + * typedef thrust::pair< + * thrust::pointer, + * std::ptrdiff_t + * > ptr_and_size_t; + * + * thrust::device_system_tag device_sys; + * ptr_and_size_t ptr_and_size = thrust::get_temporary_buffer(device_sys, N); + * + * // manipulate up to 100 ints + * for(int i = 0; i < ptr_and_size.second; ++i) + * { + * *ptr_and_size.first = i; + * } + * + * // deallocate storage with thrust::return_temporary_buffer + * thrust::return_temporary_buffer(device_sys, ptr_and_size.first); + * \endcode + * + * \see free + * \see get_temporary_buffer + */ +template +void return_temporary_buffer(const thrust::detail::execution_policy_base &system, Pointer p); + + +/*! \} deallocation_functions + */ + + +/*! \p raw_pointer_cast creates a "raw" pointer from a pointer-like type, + * simply returning the wrapped pointer, should it exist. + * + * \param ptr The pointer of interest. + * \return ptr.get(), if the expression is well formed; ptr, otherwise. + * \see raw_reference_cast + */ +template +__host__ __device__ +inline typename thrust::detail::pointer_traits::raw_pointer + raw_pointer_cast(const Pointer &ptr); + + +/*! \p raw_reference_cast creates a "raw" reference from a wrapped reference type, + * simply returning the underlying reference, should it exist. + * + * If the argument is not a reference wrapper, the result is a reference to the argument. + * + * \param ref The reference of interest. + * \return *thrust::raw_pointer_cast(&ref). + * \note There are two versions of \p raw_reference_cast. One for const references, + * and one for non-const. + * \see raw_pointer_cast + */ +template +__host__ __device__ +inline typename detail::raw_reference::type + raw_reference_cast(T &ref); + + +/*! \p raw_reference_cast creates a "raw" reference from a wrapped reference type, + * simply returning the underlying reference, should it exist. + * + * If the argument is not a reference wrapper, the result is a reference to the argument. + * + * \param ref The reference of interest. + * \return *thrust::raw_pointer_cast(&ref). + * \note There are two versions of \p raw_reference_cast. One for const references, + * and one for non-const. + * \see raw_pointer_cast + */ +template +__host__ __device__ +inline typename detail::raw_reference::type + raw_reference_cast(const T &ref); + + +/*! \} + */ + +} // end thrust + diff --git a/compat/thrust/merge.h b/compat/thrust/merge.h new file mode 100644 index 0000000..e5fa7b4 --- /dev/null +++ b/compat/thrust/merge.h @@ -0,0 +1,676 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file merge.h + * \brief Merging sorted ranges + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup merging Merging + * \ingroup algorithms + * \{ + */ + + +/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) + * into a single sorted range. That is, it copies from [first1, last1) and + * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) + * such that the resulting range is in ascending order. \p merge is stable, meaning both that the + * relative order of elements within each input range is preserved, and that for equivalent elements + * in both input ranges the element from the first range precedes the element from the second. The + * return value is result + (last1 - first1) + (last2 - first2). + * + * This version of \p merge compares elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the merged output. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use + * \p merge to compute the merger of two sorted sets of integers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {1, 3, 5, 7, 9, 11}; + * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; + * + * int result[13]; + * + * int *result_end = + * thrust::merge(thrust::host, + * A1, A1 + 6, + * A2, A2 + 7, + * result); + * // result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} + * \endcode + * + * \see http://www.sgi.com/tech/stl/merge.html + * \see \p set_union + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator merge(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) + * into a single sorted range. That is, it copies from [first1, last1) and + * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) + * such that the resulting range is in ascending order. \p merge is stable, meaning both that the + * relative order of elements within each input range is preserved, and that for equivalent elements + * in both input ranges the element from the first range precedes the element from the second. The + * return value is result + (last1 - first1) + (last2 - first2). + * + * This version of \p merge compares elements using \c operator<. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the merged output. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use + * \p merge to compute the merger of two sorted sets of integers. + * + * \code + * #include + * ... + * int A1[6] = {1, 3, 5, 7, 9, 11}; + * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; + * + * int result[13]; + * + * int *result_end = thrust::merge(A1, A1 + 6, A2, A2 + 7, result); + * // result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} + * \endcode + * + * \see http://www.sgi.com/tech/stl/merge.html + * \see \p set_union + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) + * into a single sorted range. That is, it copies from [first1, last1) and + * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) + * such that the resulting range is in ascending order. \p merge is stable, meaning both that the + * relative order of elements within each input range is preserved, and that for equivalent elements + * in both input ranges the element from the first range precedes the element from the second. The + * return value is result + (last1 - first1) + (last2 - first2). + * + * This version of \p merge compares elements using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the merged output. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use + * \p merge to compute the merger of two sets of integers sorted in + * descending order using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A1[6] = {11, 9, 7, 5, 3, 1}; + * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; + * + * int result[13]; + * + * int *result_end = thrust::merge(thrust::host, + * A1, A1 + 6, + * A2, A2 + 7, + * result, + * thrust::greater()); + * // result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/merge.html + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator merge(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) + * into a single sorted range. That is, it copies from [first1, last1) and + * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) + * such that the resulting range is in ascending order. \p merge is stable, meaning both that the + * relative order of elements within each input range is preserved, and that for equivalent elements + * in both input ranges the element from the first range precedes the element from the second. The + * return value is result + (last1 - first1) + (last2 - first2). + * + * This version of \p merge compares elements using a function object \p comp. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the merged output. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use + * \p merge to compute the merger of two sets of integers sorted in + * descending order. + * + * \code + * #include + * #include + * ... + * int A1[6] = {11, 9, 7, 5, 3, 1}; + * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; + * + * int result[13]; + * + * int *result_end = thrust::merge(A1, A1 + 6, A2, A2 + 7, result, thrust::greater()); + * // result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/merge.html + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from + * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, + * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending key order. + * + * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) + * and [values_first2 + (keys_last2 - keys_first2)) into a single range, + * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending order implied by each input element's associated key. + * + * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is + * preserved, and that for equivalent elements in all input key ranges the element from the first range + * precedes the element from the second. + * + * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) + * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the merged output range of keys. + * \param values_result The beginning of the merged output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use + * \p merge_by_key to compute the merger of two sets of integers sorted in + * ascending order using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A_keys[6] = {1, 3, 5, 7, 9, 11}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; + * int B_vals[7] = {1, 1, 1, 1, 1, 1, 1}; + * + * int keys_result[13]; + * int vals_result[13]; + * + * thrust::pair end = + * thrust::merge_by_key(thrust::host, + * A_keys, A_keys + 6, + * B_keys, B_keys + 7, + * A_vals, B_vals, + * keys_result, vals_result); + * + * // keys_result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} + * // vals_result = {0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1} + * \endcode + * + * \see merge + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + merge_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from + * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, + * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending key order. + * + * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) + * and [values_first2 + (keys_last2 - keys_first2)) into a single range, + * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending order implied by each input element's associated key. + * + * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is + * preserved, and that for equivalent elements in all input key ranges the element from the first range + * precedes the element from the second. + * + * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) + * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the merged output range of keys. + * \param values_result The beginning of the merged output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use + * \p merge_by_key to compute the merger of two sets of integers sorted in + * ascending order. + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {1, 3, 5, 7, 9, 11}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; + * int B_vals[7] = {1, 1, 1, 1, 1, 1, 1}; + * + * int keys_result[13]; + * int vals_result[13]; + * + * thrust::pair end = thrust::merge_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, B_vals, keys_result, vals_result); + * + * // keys_result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} + * // vals_result = {0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1} + * \endcode + * + * \see merge + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + merge_by_key(InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from + * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, + * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending key order. + * + * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) + * and [values_first2 + (keys_last2 - keys_first2)) into a single range, + * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending order implied by each input element's associated key. + * + * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is + * preserved, and that for equivalent elements in all input key ranges the element from the first range + * precedes the element from the second. + * + * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) + * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). + * + * This version of \p merge_by_key compares key elements using a function object \p comp. + * + * The algorithm's execution is parallelized using \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the merged output range of keys. + * \param values_result The beginning of the merged output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use + * \p merge_by_key to compute the merger of two sets of integers sorted in + * descending order using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A_keys[6] = {11, 9, 7, 5, 3, 1}; + * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; + * int B_vals[7] = { 1, 1, 1, 1, 1, 1, 1}; + * + * int keys_result[13]; + * int vals_result[13]; + * + * thrust::pair end = + * thrust::merge_by_key(thrust::host, + * A_keys, A_keys + 6, + * B_keys, B_keys + 7, + * A_vals, B_vals, + * keys_result, vals_result, + * thrust::greater()); + * + * // keys_result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} + * // vals_result = { 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1} + * \endcode + * + * \see merge + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + merge_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + Compare comp); + + +/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from + * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, + * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending key order. + * + * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) + * and [values_first2 + (keys_last2 - keys_first2)) into a single range, + * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that + * the resulting range is in ascending order implied by each input element's associated key. + * + * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is + * preserved, and that for equivalent elements in all input key ranges the element from the first range + * precedes the element from the second. + * + * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) + * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). + * + * This version of \p merge_by_key compares key elements using a function object \p comp. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the merged output range of keys. + * \param values_result The beginning of the merged output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use + * \p merge_by_key to compute the merger of two sets of integers sorted in + * descending order. + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {11, 9, 7, 5, 3, 1}; + * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; + * int B_vals[7] = { 1, 1, 1, 1, 1, 1, 1}; + * + * int keys_result[13]; + * int vals_result[13]; + * + * thrust::pair end = thrust::merge_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, B_vals, keys_result, vals_result, thrust::greater()); + * + * // keys_result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} + * // vals_result = { 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1} + * \endcode + * + * \see merge + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + merge_by_key(InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \} // merging + */ + +} // end thrust + +#include + diff --git a/compat/thrust/mismatch.h b/compat/thrust/mismatch.h new file mode 100644 index 0000000..898157a --- /dev/null +++ b/compat/thrust/mismatch.h @@ -0,0 +1,258 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file mismatch.h + * \brief Search for differences between ranges + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + +/*! \addtogroup searching + * \ingroup algorithms + * \{ + */ + + +/*! \p mismatch finds the first position where the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) differ. The two versions of + * \p mismatch use different tests for whether elements differ. + * + * This version of \p mismatch finds the first iterator \c i in [first1, last1) + * such that *i == *(first2 + (i - first1)) is \c false. The return value is a + * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). + * If no such iterator \c i exists, the return value is a \c pair whose first element + * is \c last1 and whose second element is *(first2 + (last1 - first1)). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \return The first position where the sequences differ. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * and \p InputIterator1's \c value_type is equality comparable to \p InputIterator2's \c value_type. + * \tparam InputIterator2 is a model of Input Iterator. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector vec1(4); + * thrust::device_vector vec2(4); + * + * vec1[0] = 0; vec2[0] = 0; + * vec1[1] = 5; vec2[1] = 5; + * vec1[2] = 3; vec2[2] = 8; + * vec1[3] = 7; vec2[3] = 7; + * + * typedef thrust::device_vector::iterator Iterator; + * thrust::pair result; + * + * result = thrust::mismatch(thrust::device, vec1.begin(), vec1.end(), vec2.begin()); + * + * // result.first is vec1.begin() + 2 + * // result.second is vec2.begin() + 2 + * \endcode + * + * \see find + * \see find_if + */ +template +thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2); + + +/*! \p mismatch finds the first position where the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) differ. The two versions of + * \p mismatch use different tests for whether elements differ. + * + * This version of \p mismatch finds the first iterator \c i in [first1, last1) + * such that *i == *(first2 + (i - first1)) is \c false. The return value is a + * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). + * If no such iterator \c i exists, the return value is a \c pair whose first element + * is \c last1 and whose second element is *(first2 + (last1 - first1)). + * + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \return The first position where the sequences differ. + * + * \tparam InputIterator1 is a model of Input Iterator + * and \p InputIterator1's \c value_type is equality comparable to \p InputIterator2's \c value_type. + * \tparam InputIterator2 is a model of Input Iterator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector vec1(4); + * thrust::device_vector vec2(4); + * + * vec1[0] = 0; vec2[0] = 0; + * vec1[1] = 5; vec2[1] = 5; + * vec1[2] = 3; vec2[2] = 8; + * vec1[3] = 7; vec2[3] = 7; + * + * typedef thrust::device_vector::iterator Iterator; + * thrust::pair result; + * + * result = thrust::mismatch(vec1.begin(), vec1.end(), vec2.begin()); + * + * // result.first is vec1.begin() + 2 + * // result.second is vec2.begin() + 2 + * \endcode + * + * \see find + * \see find_if + */ +template +thrust::pair mismatch(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2); + + +/*! \p mismatch finds the first position where the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) differ. The two versions of + * \p mismatch use different tests for whether elements differ. + * + * This version of \p mismatch finds the first iterator \c i in [first1, last1) + * such that pred(\*i, \*(first2 + (i - first1)) is \c false. The return value is a + * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). + * If no such iterator \c i exists, the return value is a \c pair whose first element is + * \c last1 and whose second element is *(first2 + (last1 - first1)). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param pred The binary predicate to compare elements. + * \return The first position where the sequences differ. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator. + * \tparam InputIterator2 is a model of Input Iterator. + * \tparam Predicate is a model of Input Iterator. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector vec1(4); + * thrust::device_vector vec2(4); + * + * vec1[0] = 0; vec2[0] = 0; + * vec1[1] = 5; vec2[1] = 5; + * vec1[2] = 3; vec2[2] = 8; + * vec1[3] = 7; vec2[3] = 7; + * + * typedef thrust::device_vector::iterator Iterator; + * thrust::pair result; + * + * result = thrust::mismatch(thrust::device, vec1.begin(), vec1.end(), vec2.begin(), thrust::equal_to()); + * + * // result.first is vec1.begin() + 2 + * // result.second is vec2.begin() + 2 + * \endcode + * + * \see find + * \see find_if + */ +template +thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + BinaryPredicate pred); + + +/*! \p mismatch finds the first position where the two ranges [first1, last1) + * and [first2, first2 + (last1 - first1)) differ. The two versions of + * \p mismatch use different tests for whether elements differ. + * + * This version of \p mismatch finds the first iterator \c i in [first1, last1) + * such that pred(\*i, \*(first2 + (i - first1)) is \c false. The return value is a + * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). + * If no such iterator \c i exists, the return value is a \c pair whose first element is + * \c last1 and whose second element is *(first2 + (last1 - first1)). + * + * \param first1 The beginning of the first sequence. + * \param last1 The end of the first sequence. + * \param first2 The beginning of the second sequence. + * \param pred The binary predicate to compare elements. + * \return The first position where the sequences differ. + * + * \tparam InputIterator1 is a model of Input Iterator. + * \tparam InputIterator2 is a model of Input Iterator. + * \tparam Predicate is a model of Input Iterator. + * + * \code + * #include + * #include + * ... + * thrust::device_vector vec1(4); + * thrust::device_vector vec2(4); + * + * vec1[0] = 0; vec2[0] = 0; + * vec1[1] = 5; vec2[1] = 5; + * vec1[2] = 3; vec2[2] = 8; + * vec1[3] = 7; vec2[3] = 7; + * + * typedef thrust::device_vector::iterator Iterator; + * thrust::pair result; + * + * result = thrust::mismatch(vec1.begin(), vec1.end(), vec2.begin(), thrust::equal_to()); + * + * // result.first is vec1.begin() + 2 + * // result.second is vec2.begin() + 2 + * \endcode + * + * \see find + * \see find_if + */ +template +thrust::pair mismatch(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + BinaryPredicate pred); + +/*! \} // end searching + */ + +} // end namespace thrust + +#include + diff --git a/compat/thrust/pair.h b/compat/thrust/pair.h new file mode 100644 index 0000000..897cc07 --- /dev/null +++ b/compat/thrust/pair.h @@ -0,0 +1,283 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file pair.h + * \brief A type encapsulating a heterogeneous pair of elements + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +/*! \addtogroup utility + * \{ + */ + +/*! \addtogroup pair + * \{ + */ + +/*! \p pair is a generic data structure encapsulating a heterogeneous + * pair of values. + * + * \tparam T1 The type of \p pair's first object type. There are no + * requirements on the type of \p T1. T1's type is + * provided by pair::first_type. + * + * \tparam T2 The type of \p pair's second object type. There are no + * requirements on the type of \p T2. T2's type is + * provided by pair::second_type. + */ +template + struct pair +{ + /*! \p first_type is the type of \p pair's first object type. + */ + typedef T1 first_type; + + /*! \p second_type is the type of \p pair's second object type. + */ + typedef T2 second_type; + + /*! The \p pair's first object. + */ + first_type first; + + /*! The \p pair's second object. + */ + second_type second; + + /*! \p pair's default constructor constructs \p first + * and \p second using \c first_type & \c second_type's + * default constructors, respectively. + */ + __host__ __device__ pair(void); + + /*! This constructor accepts two objects to copy into this \p pair. + * + * \param x The object to copy into \p first. + * \param y The object to copy into \p second. + */ + inline __host__ __device__ + pair(const T1 &x, const T2 &y); + + /*! This copy constructor copies from a \p pair whose types are + * convertible to this \p pair's \c first_type and \c second_type, + * respectively. + * + * \param p The \p pair to copy from. + * + * \tparam U1 is convertible to \c first_type. + * \tparam U2 is convertible to \c second_type. + */ + template + inline __host__ __device__ + pair(const pair &p); + + /*! This copy constructor copies from a std::pair whose types are + * convertible to this \p pair's \c first_type and \c second_type, + * respectively. + * + * \param p The std::pair to copy from. + * + * \tparam U1 is convertible to \c first_type. + * \tparam U2 is convertible to \c second_type. + */ + template + inline __host__ __device__ + pair(const std::pair &p); + + /*! \p swap swaps the elements of two pairs. + * + * \param p The other pair with which to swap. + */ + inline __host__ __device__ + void swap(pair &p); +}; // end pair + + +/*! This operator tests two \p pairs for equality. + * + * \param x The first \p pair to compare. + * \param y The second \p pair to compare. + * \return \c true if and only if x.first == y.first && x.second == y.second. + * + * \tparam T1 is a model of Equality Comparable. + * \tparam T2 is a model of Equality Comparable. + */ +template + inline __host__ __device__ + bool operator==(const pair &x, const pair &y); + + +/*! This operator tests two pairs for ascending ordering. + * + * \param x The first \p pair to compare. + * \param y The second \p pair to compare. + * \return \c true if and only if x.first < y.first || (!(y.first < x.first) && x.second < y.second). + * + * \tparam T1 is a model of LessThan Comparable. + * \tparam T2 is a model of LessThan Comparable. + */ +template + inline __host__ __device__ + bool operator<(const pair &x, const pair &y); + + +/*! This operator tests two pairs for inequality. + * + * \param x The first \p pair to compare. + * \param y The second \p pair to compare. + * \return \c true if and only if !(x == y). + * + * \tparam T1 is a model of Equality Comparable. + * \tparam T2 is a model of Equality Comparable. + */ +template + inline __host__ __device__ + bool operator!=(const pair &x, const pair &y); + + +/*! This operator tests two pairs for descending ordering. + * + * \param x The first \p pair to compare. + * \param y The second \p pair to compare. + * \return \c true if and only if y < x. + * + * \tparam T1 is a model of LessThan Comparable. + * \tparam T2 is a model of LessThan Comparable. + */ +template + inline __host__ __device__ + bool operator>(const pair &x, const pair &y); + + +/*! This operator tests two pairs for ascending ordering or equivalence. + * + * \param x The first \p pair to compare. + * \param y The second \p pair to compare. + * \return \c true if and only if !(y < x). + * + * \tparam T1 is a model of LessThan Comparable. + * \tparam T2 is a model of LessThan Comparable. + */ +template + inline __host__ __device__ + bool operator<=(const pair &x, const pair &y); + + +/*! This operator tests two pairs for descending ordering or equivalence. + * + * \param x The first \p pair to compare. + * \param y The second \p pair to compare. + * \return \c true if and only if !(x < y). + * + * \tparam T1 is a model of LessThan Comparable. + * \tparam T2 is a model of LessThan Comparable. + */ +template + inline __host__ __device__ + bool operator>=(const pair &x, const pair &y); + + +/*! \p swap swaps the contents of two pairs. + * + * \param x The first \p pair to swap. + * \param y The second \p pair to swap. + */ +template + inline __host__ __device__ + void swap(pair &x, pair &y); + + +/*! This convenience function creates a \p pair from two objects. + * + * \param x The first object to copy from. + * \param y The second object to copy from. + * \return A newly-constructed \p pair copied from \p a and \p b. + * + * \tparam T1 There are no requirements on the type of \p T1. + * \tparam T2 There are no requirements on the type of \p T2. + */ +template + inline __host__ __device__ + pair make_pair(T1 x, T2 y); + + +/*! This convenience metafunction is included for compatibility with + * \p tuple. It returns either the type of a \p pair's + * \c first_type or \c second_type in its nested type, \c type. + * + * \tparam N This parameter selects the member of interest. + * \tparam T A \c pair type of interest. + */ +template struct tuple_element; + + +/*! This convenience metafunction is included for compatibility with + * \p tuple. It returns \c 2, the number of elements of a \p pair, + * in its nested data member, \c value. + * + * \tparam Pair A \c pair type of interest. + */ +template struct tuple_size; + + +/*! This convenience function returns a reference to either the first or + * second member of a \p pair. + * + * \param p The \p pair of interest. + * \return \c p.first or \c p.second, depending on the template + * parameter. + * + * \tparam N This parameter selects the member of interest. + */ +// XXX comment out these prototypes as a WAR to a problem on MSVC 2005 +//template +// inline __host__ __device__ +// typename tuple_element >::type & +// get(pair &p); + + +/*! This convenience function returns a const reference to either the + * first or second member of a \p pair. + * + * \param p The \p pair of interest. + * \return \c p.first or \c p.second, depending on the template + * parameter. + * + * \tparam i This parameter selects the member of interest. + */ +// XXX comment out these prototypes as a WAR to a problem on MSVC 2005 +//template +// inline __host__ __device__ +// const typename tuple_element >::type & +// get(const pair &p); + +/*! \} // pair + */ + +/*! \} // utility + */ + +} // end thrust + +#include + diff --git a/compat/thrust/partition.h b/compat/thrust/partition.h new file mode 100644 index 0000000..61a6278 --- /dev/null +++ b/compat/thrust/partition.h @@ -0,0 +1,1429 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file partition.h + * \brief Reorganizes a range based on a predicate + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reordering + * \ingroup algorithms + * + * \addtogroup partitioning + * \ingroup reordering + * \{ + */ + + +/*! \p partition reorders the elements [first, last) based on the function + * object \p pred, such that all of the elements that satisfy \p pred precede the + * elements that fail to satisfy it. The postcondition is that, for some iterator + * \c middle in the range [first, last), pred(*i) is \c true for every + * iterator \c i in the range [first,middle) and \c false for every iterator + * \c i in the range [middle, last). The return value of \p partition is + * \c middle. + * + * Note that the relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition, does guarantee to preserve the relative order. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements which do not satisfy \p pred. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, + * and \p ForwardIterator is mutable. + * \tparam Predicate is a model of Predicate. + * + * The following code snippet demonstrates how to use \p partition to reorder a + * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::partition(thrust::host, + * A, A + N, + * is_even()); + * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partition.html + * \see \p stable_partition + * \see \p partition_copy + */ +template + ForwardIterator partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p partition reorders the elements [first, last) based on the function + * object \p pred, such that all of the elements that satisfy \p pred precede the + * elements that fail to satisfy it. The postcondition is that, for some iterator + * \c middle in the range [first, last), pred(*i) is \c true for every + * iterator \c i in the range [first,middle) and \c false for every iterator + * \c i in the range [middle, last). The return value of \p partition is + * \c middle. + * + * Note that the relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition, does guarantee to preserve the relative order. + * + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements which do not satisfy \p pred. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, + * and \p ForwardIterator is mutable. + * \tparam Predicate is a model of Predicate. + * + * The following code snippet demonstrates how to use \p partition to reorder a + * sequence so that even numbers precede odd numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::partition(A, A + N, + * is_even()); + * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partition.html + * \see \p stable_partition + * \see \p partition_copy + */ +template + ForwardIterator partition(ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p partition reorders the elements [first, last) based on the function + * object \p pred applied to a stencil range [stencil, stencil + (last - first)), + * such that all of the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose + * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator + * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator + * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i + * in the range [stencil + (middle - first), stencil + (last - first)). + * The return value of \p stable_partition is \c middle. + * + * Note that the relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition, does guarantee to preserve the relative order. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements whose stencil elements do not satisfy \p pred. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The ranges [first,last) and [stencil, stencil + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p partition to reorder a + * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::partition(thrust::host, A, A + N, S, is_even()); + * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * // S is unmodified + * \endcode + * + * \see http://www.sgi.com/tech/stl/partition.html + * \see \p stable_partition + * \see \p partition_copy + */ +template + ForwardIterator partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +/*! \p partition reorders the elements [first, last) based on the function + * object \p pred applied to a stencil range [stencil, stencil + (last - first)), + * such that all of the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose + * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator + * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator + * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i + * in the range [stencil + (middle - first), stencil + (last - first)). + * The return value of \p stable_partition is \c middle. + * + * Note that the relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition, does guarantee to preserve the relative order. + * + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements whose stencil elements do not satisfy \p pred. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The ranges [first,last) and [stencil, stencil + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p partition to reorder a + * sequence so that even numbers precede odd numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::partition(A, A + N, S, is_even()); + * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * // S is unmodified + * \endcode + * + * \see http://www.sgi.com/tech/stl/partition.html + * \see \p stable_partition + * \see \p partition_copy + */ +template + ForwardIterator partition(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +/*! \p partition_copy differs from \ref partition only in that the reordered + * sequence is written to difference output sequences, rather than in place. + * + * \p partition_copy copies the elements [first, last) based on the + * function object \p pred. All of the elements that satisfy \p pred are copied + * to the range beginning at \p out_true and all the elements that fail to satisfy it + * are copied to the range beginning at \p out_false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type + * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input range shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p partition_copy to separate a + * sequence into two output sequences of even and odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::partition_copy(thrust::host, A, A + N, evens, odds, is_even()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \note The relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition_copy, does guarantee to preserve the relative order. + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p stable_partition_copy + * \see \p partition + */ +template + thrust::pair + partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p partition_copy differs from \ref partition only in that the reordered + * sequence is written to difference output sequences, rather than in place. + * + * \p partition_copy copies the elements [first, last) based on the + * function object \p pred. All of the elements that satisfy \p pred are copied + * to the range beginning at \p out_true and all the elements that fail to satisfy it + * are copied to the range beginning at \p out_false. + * + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type + * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input range shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p partition_copy to separate a + * sequence into two output sequences of even and odd numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::partition_copy(A, A + N, evens, odds, is_even()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \note The relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition_copy, does guarantee to preserve the relative order. + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p stable_partition_copy + * \see \p partition + */ +template + thrust::pair + partition_copy(InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p partition_copy differs from \ref partition only in that the reordered + * sequence is written to difference output sequences, rather than in place. + * + * \p partition_copy copies the elements [first, last) based on the + * function object \p pred which is applied to a range of stencil elements. All of the elements + * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true + * and all the elements whose stencil element fails to satisfy it are copied to the range beginning + * at \p out_false. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input ranges shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p partition_copy to separate a + * sequence into two output sequences of even and odd numbers using the \p thrust::host execution + * policy for parallelization. + * + * \code + * #include + * #include + * #include + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::stable_partition_copy(thrust::host, A, A + N, S, evens, odds, thrust::identity()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \note The relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition_copy, does guarantee to preserve the relative order. + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p stable_partition_copy + * \see \p partition + */ +template + thrust::pair + partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p partition_copy differs from \ref partition only in that the reordered + * sequence is written to difference output sequences, rather than in place. + * + * \p partition_copy copies the elements [first, last) based on the + * function object \p pred which is applied to a range of stencil elements. All of the elements + * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true + * and all the elements whose stencil element fails to satisfy it are copied to the range beginning + * at \p out_false. + * + * \param first The beginning of the sequence to reorder. + * \param last The end of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input ranges shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p partition_copy to separate a + * sequence into two output sequences of even and odd numbers. + * + * \code + * #include + * #include + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::stable_partition_copy(A, A + N, S, evens, odds, thrust::identity()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \note The relative order of elements in the two reordered sequences is not + * necessarily the same as it was in the original sequence. A different algorithm, + * \ref stable_partition_copy, does guarantee to preserve the relative order. + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p stable_partition_copy + * \see \p partition + */ +template + thrust::pair + partition_copy(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p stable_partition is much like \ref partition : it reorders the elements in the + * range [first, last) based on the function object \p pred, such that all of + * the elements that satisfy \p pred precede all of the elements that fail to satisfy + * it. The postcondition is that, for some iterator \p middle in the range + * [first, last), pred(*i) is \c true for every iterator \c i in the + * range [first,middle) and \c false for every iterator \c i in the range + * [middle, last). The return value of \p stable_partition is \c middle. + * + * \p stable_partition differs from \ref partition in that \p stable_partition is + * guaranteed to preserve relative order. That is, if \c x and \c y are elements in + * [first, last), and \c stencil_x and \c stencil_y are the stencil elements + * in corresponding positions within [stencil, stencil + (last - first)), + * and pred(stencil_x) == pred(stencil_y), and if \c x precedes + * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements which do not satisfy pred. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, + * and \p ForwardIterator is mutable. + * \tparam Predicate is a model of Predicate. + * + * The following code snippet demonstrates how to use \p stable_partition to reorder a + * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::stable_partition(thrust::host, + * A, A + N, + * is_even()); + * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_partition.html + * \see \p partition + * \see \p stable_partition_copy + */ +template + ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p stable_partition is much like \ref partition : it reorders the elements in the + * range [first, last) based on the function object \p pred, such that all of + * the elements that satisfy \p pred precede all of the elements that fail to satisfy + * it. The postcondition is that, for some iterator \p middle in the range + * [first, last), pred(*i) is \c true for every iterator \c i in the + * range [first,middle) and \c false for every iterator \c i in the range + * [middle, last). The return value of \p stable_partition is \c middle. + * + * \p stable_partition differs from \ref partition in that \p stable_partition is + * guaranteed to preserve relative order. That is, if \c x and \c y are elements in + * [first, last), and \c stencil_x and \c stencil_y are the stencil elements + * in corresponding positions within [stencil, stencil + (last - first)), + * and pred(stencil_x) == pred(stencil_y), and if \c x precedes + * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. + * + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements which do not satisfy pred. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, + * and \p ForwardIterator is mutable. + * \tparam Predicate is a model of Predicate. + * + * The following code snippet demonstrates how to use \p stable_partition to reorder a + * sequence so that even numbers precede odd numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::stable_partition(A, A + N, + * is_even()); + * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_partition.html + * \see \p partition + * \see \p stable_partition_copy + */ +template + ForwardIterator stable_partition(ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p stable_partition is much like \p partition: it reorders the elements in the + * range [first, last) based on the function object \p pred applied to a stencil + * range [stencil, stencil + (last - first)), such that all of + * the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose + * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator + * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator + * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i + * in the range [stencil + (middle - first), stencil + (last - first)). + * The return value of \p stable_partition is \c middle. + * + * \p stable_partition differs from \ref partition in that \p stable_partition is + * guaranteed to preserve relative order. That is, if \c x and \c y are elements in + * [first, last), such that pred(x) == pred(y), and if \c x precedes + * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements whose stencil elements do not satisfy \p pred. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall not overlap with the range [stencil, stencil + (last - first)). + * + * The following code snippet demonstrates how to use \p stable_partition to reorder a + * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::stable_partition(thrust::host, A, A + N, S, is_even()); + * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * // S is unmodified + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_partition.html + * \see \p partition + * \see \p stable_partition_copy + */ +template + ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +/*! \p stable_partition is much like \p partition: it reorders the elements in the + * range [first, last) based on the function object \p pred applied to a stencil + * range [stencil, stencil + (last - first)), such that all of + * the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose + * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator + * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator + * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i + * in the range [stencil + (middle - first), stencil + (last - first)). + * The return value of \p stable_partition is \c middle. + * + * \p stable_partition differs from \ref partition in that \p stable_partition is + * guaranteed to preserve relative order. That is, if \c x and \c y are elements in + * [first, last), such that pred(x) == pred(y), and if \c x precedes + * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. + * + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return An iterator referring to the first element of the second partition, that is, + * the sequence of the elements whose stencil elements do not satisfy \p pred. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall not overlap with the range [stencil, stencil + (last - first)). + * + * The following code snippet demonstrates how to use \p stable_partition to reorder a + * sequence so that even numbers precede odd numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * const int N = sizeof(A)/sizeof(int); + * thrust::stable_partition(A, A + N, S, is_even()); + * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * // S is unmodified + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_partition.html + * \see \p partition + * \see \p stable_partition_copy + */ +template + ForwardIterator stable_partition(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered + * sequence is written to different output sequences, rather than in place. + * + * \p stable_partition_copy copies the elements [first, last) based on the + * function object \p pred. All of the elements that satisfy \p pred are copied + * to the range beginning at \p out_true and all the elements that fail to satisfy it + * are copied to the range beginning at \p out_false. + * + * \p stable_partition_copy differs from \ref partition_copy in that + * \p stable_partition_copy is guaranteed to preserve relative order. That is, if + * \c x and \c y are elements in [first, last), such that + * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true + * after \p stable_partition_copy that \c x precedes \c y in the output. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type + * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input ranges shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p stable_partition_copy to + * reorder a sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::stable_partition_copy(thrust::host, A, A + N, evens, odds, is_even()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p partition_copy + * \see \p stable_partition + */ +template + thrust::pair + stable_partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered + * sequence is written to different output sequences, rather than in place. + * + * \p stable_partition_copy copies the elements [first, last) based on the + * function object \p pred. All of the elements that satisfy \p pred are copied + * to the range beginning at \p out_true and all the elements that fail to satisfy it + * are copied to the range beginning at \p out_false. + * + * \p stable_partition_copy differs from \ref partition_copy in that + * \p stable_partition_copy is guaranteed to preserve relative order. That is, if + * \c x and \c y are elements in [first, last), such that + * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true + * after \p stable_partition_copy that \c x precedes \c y in the output. + * + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type + * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input ranges shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p stable_partition_copy to + * reorder a sequence so that even numbers precede odd numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::stable_partition_copy(A, A + N, evens, odds, is_even()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p partition_copy + * \see \p stable_partition + */ +template + thrust::pair + stable_partition_copy(InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered + * sequence is written to different output sequences, rather than in place. + * + * \p stable_partition_copy copies the elements [first, last) based on the + * function object \p pred which is applied to a range of stencil elements. All of the elements + * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true + * and all the elements whose stencil element fails to satisfy it are copied to the range beginning + * at \p out_false. + * + * \p stable_partition_copy differs from \ref partition_copy in that + * \p stable_partition_copy is guaranteed to preserve relative order. That is, if + * \c x and \c y are elements in [first, last), such that + * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true + * after \p stable_partition_copy that \c x precedes \c y in the output. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input ranges shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p stable_partition_copy to + * reorder a sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::stable_partition_copy(thrust::host, A, A + N, S, evens, odds, thrust::identity()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p partition_copy + * \see \p stable_partition + */ +template + thrust::pair + stable_partition_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered + * sequence is written to different output sequences, rather than in place. + * + * \p stable_partition_copy copies the elements [first, last) based on the + * function object \p pred which is applied to a range of stencil elements. All of the elements + * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true + * and all the elements whose stencil element fails to satisfy it are copied to the range beginning + * at \p out_false. + * + * \p stable_partition_copy differs from \ref partition_copy in that + * \p stable_partition_copy is guaranteed to preserve relative order. That is, if + * \c x and \c y are elements in [first, last), such that + * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true + * after \p stable_partition_copy that \c x precedes \c y in the output. + * + * \param first The first element of the sequence to reorder. + * \param last One position past the last element of the sequence to reorder. + * \param stencil The beginning of the stencil sequence. + * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. + * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. + * \param pred A function object which decides to which partition each element of the + * sequence [first, last) belongs. + * \return A \p pair p such that p.first is the end of the output range beginning + * at \p out_true and p.second is the end of the output range beginning at + * \p out_false. + * + * \tparam InputIterator1 is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The input ranges shall not overlap with either output range. + * + * The following code snippet demonstrates how to use \p stable_partition_copy to + * reorder a sequence so that even numbers precede odd numbers. + * + * \code + * #include + * #include + * ... + * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + * int result[10]; + * const int N = sizeof(A)/sizeof(int); + * int *evens = result; + * int *odds = result + 5; + * thrust::stable_partition_copy(A, A + N, S, evens, odds, thrust::identity()); + * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} + * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} + * // evens points to {2, 4, 6, 8, 10} + * // odds points to {1, 3, 5, 7, 9} + * \endcode + * + * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf + * \see \p partition_copy + * \see \p stable_partition + */ +template + thrust::pair + stable_partition_copy(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +/*! \} // end stream_compaction + */ + +/*! \} // end reordering + */ + +/*! \addtogroup searching + * \{ + */ + + +/*! \p partition_point returns an iterator pointing to the end of the true + * partition of a partitioned range. \p partition_point requires the input range + * [first,last) to be a partition; that is, all elements which satisfy + * pred shall appear before those that do not. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range to consider. + * \param last The end of the range to consider. + * \param pred A function object which decides to which partition each element of the + * range [first, last) belongs. + * \return An iterator \c mid such that all_of(first, mid, pred) + * and none_of(mid, last, pred) are both true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall be partitioned by \p pred. + * + * \note Though similar, \p partition_point is not redundant with \p find_if_not. + * \p partition_point's precondition provides an opportunity for a + * faster implemention. + * + * \code + * #include + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * + * ... + * + * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; + * int * B = thrust::partition_point(thrust::host, A, A + 10, is_even()); + * // B - A is 5 + * // [A, B) contains only even values + * \endcode + * + * \see \p partition + * \see \p find_if_not + */ +template + ForwardIterator partition_point(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p partition_point returns an iterator pointing to the end of the true + * partition of a partitioned range. \p partition_point requires the input range + * [first,last) to be a partition; that is, all elements which satisfy + * pred shall appear before those that do not. + * \param first The beginning of the range to consider. + * \param last The end of the range to consider. + * \param pred A function object which decides to which partition each element of the + * range [first, last) belongs. + * \return An iterator \c mid such that all_of(first, mid, pred) + * and none_of(mid, last, pred) are both true. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall be partitioned by \p pred. + * + * \note Though similar, \p partition_point is not redundant with \p find_if_not. + * \p partition_point's precondition provides an opportunity for a + * faster implemention. + * + * \code + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * + * ... + * + * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; + * int * B = thrust::partition_point(A, A + 10, is_even()); + * // B - A is 5 + * // [A, B) contains only even values + * \endcode + * + * \see \p partition + * \see \p find_if_not + */ +template + ForwardIterator partition_point(ForwardIterator first, + ForwardIterator last, + Predicate pred); + +/*! \} // searching + */ + +/*! \addtogroup reductions + * \{ + * \addtogroup predicates + * \{ + */ + + +/*! \p is_partitioned returns \c true if the given range + * is partitioned with respect to a predicate, and \c false otherwise. + * + * Specifically, \p is_partitioned returns \c true if [first, last) + * is empty of if [first, last) is partitioned by \p pred, i.e. if + * all elements that satisfy \p pred appear before those that do not. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range to consider. + * \param last The end of the range to consider. + * \param pred A function object which decides to which partition each element of the + * range [first, last) belongs. + * \return \c true if the range [first, last) is partitioned with respect + * to \p pred, or if [first, last) is empty. \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \code + * #include + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * + * ... + * + * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; + * int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * + * thrust::is_partitioned(thrust::host, A, A + 10); // returns true + * thrust::is_partitioned(thrust::host, B, B + 10); // returns false + * \endcode + * + * \see \p partition + */ +template + bool is_partitioned(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + Predicate pred); + + +/*! \p is_partitioned returns \c true if the given range + * is partitioned with respect to a predicate, and \c false otherwise. + * + * Specifically, \p is_partitioned returns \c true if [first, last) + * is empty of if [first, last) is partitioned by \p pred, i.e. if + * all elements that satisfy \p pred appear before those that do not. + * + * \param first The beginning of the range to consider. + * \param last The end of the range to consider. + * \param pred A function object which decides to which partition each element of the + * range [first, last) belongs. + * \return \c true if the range [first, last) is partitioned with respect + * to \p pred, or if [first, last) is empty. \c false, otherwise. + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \code + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int &x) + * { + * return (x % 2) == 0; + * } + * }; + * + * ... + * + * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; + * int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + * + * thrust::is_partitioned(A, A + 10); // returns true + * thrust::is_partitioned(B, B + 10); // returns false + * \endcode + * + * \see \p partition + */ +template + bool is_partitioned(InputIterator first, + InputIterator last, + Predicate pred); + + +/*! \} // end predicates + * \} // end reductions + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/random.h b/compat/thrust/random.h new file mode 100644 index 0000000..5a2c00d --- /dev/null +++ b/compat/thrust/random.h @@ -0,0 +1,120 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file random.h + * \brief Pseudo-random number generators. + */ + +#pragma once + +#include +#include + +// RNGs +#include +#include +#include +#include +#include + +// distributions +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup random Random Number Generation + * \{ + */ + + +/*! \namespace thrust::random + * \brief \p thrust::random is the namespace which contains random number engine class templates, + * random number engine adaptor class templates, engines with predefined parameters, + * and random number distribution class templates. They are provided in a separate namespace + * for import convenience but are also aliased in the top-level \p thrust namespace for + * easy access. + */ +namespace random +{ + +/*! \addtogroup predefined_random Random Number Engines with Predefined Parameters + * \ingroup random + * \{ + */ + +/*! \typedef ranlux24 + * \brief A random number engine with predefined parameters which implements the + * RANLUX level-3 random number generation algorithm. + * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux24 + * shall produce the value \c 9901578 . + */ +typedef discard_block_engine ranlux24; + + +/*! \typedef ranlux48 + * \brief A random number engine with predefined parameters which implements the + * RANLUX level-4 random number generation algorithm. + * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux48 + * shall produce the value \c 88229545517833 . + */ +typedef discard_block_engine ranlux48; + + +/*! \typedef taus88 + * \brief A random number engine with predefined parameters which implements + * L'Ecuyer's 1996 three-component Tausworthe random number generator. + * + * \note The 10000th consecutive invocation of a default-constructed object of type \p taus88 + * shall produce the value \c 3535848941 . + */ +typedef xor_combine_engine< + linear_feedback_shift_engine, + 0, + xor_combine_engine< + linear_feedback_shift_engine, 0, + linear_feedback_shift_engine, 0 + >, + 0 +> taus88; + +/*! \typedef default_random_engine + * \brief An implementation-defined "default" random number engine. + * \note \p default_random_engine is currently an alias for \p minstd_rand, and may change + * in a future version. + */ +typedef minstd_rand default_random_engine; + +/*! \} // end predefined_random + */ + +} // end random + + +/*! \} // end random + */ + +// import names into thrust:: +using random::ranlux24; +using random::ranlux48; +using random::taus88; +using random::default_random_engine; + +} // end thrust + diff --git a/compat/thrust/random/detail/discard_block_engine.inl b/compat/thrust/random/detail/discard_block_engine.inl new file mode 100644 index 0000000..5f01bd1 --- /dev/null +++ b/compat/thrust/random/detail/discard_block_engine.inl @@ -0,0 +1,201 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace thrust +{ + +namespace random +{ + + +template + discard_block_engine + ::discard_block_engine() + : m_e(), m_n(0) +{} + + +template + discard_block_engine + ::discard_block_engine(result_type s) + : m_e(s), m_n(0) +{} + + +template + discard_block_engine + ::discard_block_engine(const base_type &urng) + : m_e(urng), m_n(0) +{} + + +template + void discard_block_engine + ::seed(void) +{ + m_e.seed(); + m_n = 0; +} + + +template + void discard_block_engine + ::seed(result_type s) +{ + m_e.seed(s); + m_n = 0; +} + + +template + typename discard_block_engine::result_type + discard_block_engine + ::operator()(void) +{ + if(m_n >= used_block) + { + m_e.discard(block_size - m_n); +// for(; m_n < block_size; ++m_n) +// m_e(); + m_n = 0; + } + + ++m_n; + + return m_e(); +} + + +template + void discard_block_engine + ::discard(unsigned long long z) +{ + // XXX this should be accelerated + for(; z > 0; --z) + { + this->operator()(); + } // end for +} + + +template + const typename discard_block_engine::base_type & + discard_block_engine + ::base(void) const +{ + return m_e; +} + + +template + template + std::basic_ostream& discard_block_engine + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags & fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + const CharT space = os.widen(' '); + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(space); + + // output the base engine followed by n + os << m_e << space << m_n; + + // restore flags & fill character + os.flags(flags); + os.fill(fill); + + return os; +} + + +template + template + std::basic_istream& discard_block_engine + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::skipws); + + // input the base engine and then n + is >> m_e >> m_n; + + // restore old flags + is.flags(flags); + return is; +} + + +template + bool discard_block_engine + ::equal(const discard_block_engine &rhs) const +{ + return (m_e == rhs.m_e) && (m_n == rhs.m_n); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const discard_block_engine &e) +{ + return thrust::random::detail::random_core_access::stream_out(os,e); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + discard_block_engine &e) +{ + return thrust::random::detail::random_core_access::stream_in(is,e); +} + + +template +bool operator==(const discard_block_engine &lhs, + const discard_block_engine &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const discard_block_engine &lhs, + const discard_block_engine &rhs) +{ + return !(lhs == rhs); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/linear_congruential_engine.inl b/compat/thrust/random/detail/linear_congruential_engine.inl new file mode 100644 index 0000000..f040563 --- /dev/null +++ b/compat/thrust/random/detail/linear_congruential_engine.inl @@ -0,0 +1,163 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + + +template + linear_congruential_engine + ::linear_congruential_engine(result_type s) +{ + seed(s); +} // end linear_congruential_engine::linear_congruential_engine() + + +template + void linear_congruential_engine + ::seed(result_type s) +{ + if((detail::mod(c) == 0) && + (detail::mod(s) == 0)) + m_x = detail::mod(1); + else + m_x = detail::mod(s); +} // end linear_congruential_engine::seed() + + +template + typename linear_congruential_engine::result_type + linear_congruential_engine + ::operator()(void) +{ + m_x = detail::mod(m_x); + return m_x; +} // end linear_congruential_engine::operator()() + + +template + void linear_congruential_engine + ::discard(unsigned long long z) +{ + thrust::random::detail::linear_congruential_engine_discard::discard(*this,z); +} // end linear_congruential_engine::discard() + + +template + template + std::basic_ostream& linear_congruential_engine + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags & fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(os.widen(' ')); + + // output one word of state + os << m_x; + + // restore flags & fill character + os.flags(flags); + os.fill(fill); + + return os; +} + + +template + template + std::basic_istream& linear_congruential_engine + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::dec); + + // input one word of state + is >> m_x; + + // restore flags + is.flags(flags); + + return is; +} + + +template +bool linear_congruential_engine + ::equal(const linear_congruential_engine &rhs) const +{ + return m_x == rhs.m_x; +} + + +template +__host__ __device__ +bool operator==(const linear_congruential_engine &lhs, + const linear_congruential_engine &rhs) +{ + return detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const linear_congruential_engine &lhs, + const linear_congruential_engine &rhs) +{ + return !(lhs == rhs); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const linear_congruential_engine &e) +{ + return detail::random_core_access::stream_out(os,e); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + linear_congruential_engine &e) +{ + return detail::random_core_access::stream_in(is,e); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/linear_congruential_engine_discard.h b/compat/thrust/random/detail/linear_congruential_engine_discard.h new file mode 100644 index 0000000..f4ec233 --- /dev/null +++ b/compat/thrust/random/detail/linear_congruential_engine_discard.h @@ -0,0 +1,107 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace random +{ + +namespace detail +{ + + +template + struct linear_congruential_engine_discard_implementation +{ + __host__ __device__ + static void discard(UIntType &state, unsigned long long z) + { + for(; z > 0; --z) + { + state = detail::mod(state); + } + } +}; // end linear_congruential_engine_discard + + +// specialize for small integers and c == 0 +// XXX figure out a robust implemenation of this for any unsigned integer type later +template + struct linear_congruential_engine_discard_implementation +{ + __host__ __device__ + static void discard(thrust::detail::uint32_t &state, unsigned long long z) + { + const thrust::detail::uint32_t modulus = m; + + // XXX we need to use unsigned long long here or we will encounter overflow in the + // multiplies below + // figure out a robust implementation of this later + unsigned long long multiplier = a; + unsigned long long multiplier_to_z = 1; + + // see http://en.wikipedia.org/wiki/Modular_exponentiation + while(z > 0) + { + if(z & 1) + { + // multiply in this bit's contribution while using modulus to keep result small + multiplier_to_z = (multiplier_to_z * multiplier) % modulus; + } + + // move to the next bit of the exponent, square (and mod) the base accordingly + z >>= 1; + multiplier = (multiplier * multiplier) % modulus; + } + + state = static_cast((multiplier_to_z * state) % modulus); + } +}; // end linear_congruential_engine_discard + + +struct linear_congruential_engine_discard +{ + template + __host__ __device__ + static void discard(LinearCongruentialEngine &lcg, unsigned long long z) + { + typedef typename LinearCongruentialEngine::result_type result_type; + const result_type c = LinearCongruentialEngine::increment; + const result_type a = LinearCongruentialEngine::multiplier; + const result_type m = LinearCongruentialEngine::modulus; + + // XXX WAR unused variable warnings + (void) c; + (void) a; + (void) m; + + linear_congruential_engine_discard_implementation::discard(lcg.m_x, z); + } +}; // end linear_congruential_engine_discard + + +} // end detail + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/linear_feedback_shift_engine.inl b/compat/thrust/random/detail/linear_feedback_shift_engine.inl new file mode 100644 index 0000000..4e8dad5 --- /dev/null +++ b/compat/thrust/random/detail/linear_feedback_shift_engine.inl @@ -0,0 +1,158 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace thrust +{ + +namespace random +{ + +template + linear_feedback_shift_engine + ::linear_feedback_shift_engine(result_type value) +{ + seed(value); +} // end linear_feedback_shift_engine::linear_feedback_shift_engine() + +template + void linear_feedback_shift_engine + ::seed(result_type value) +{ + m_value = value; +} // end linear_feedback_shift_engine::seed() + +template + typename linear_feedback_shift_engine::result_type + linear_feedback_shift_engine + ::operator()(void) +{ + const UIntType b = (((m_value << q) ^ m_value) & wordmask) >> (k-s); + const UIntType mask = ( (~static_cast(0)) << (w-k) ) & wordmask; + m_value = ((m_value & mask) << s) ^ b; + return m_value; +} // end linear_feedback_shift_engine::operator()() + + +template + void linear_feedback_shift_engine + ::discard(unsigned long long z) +{ + for(; z > 0; --z) + { + this->operator()(); + } // end for +} // end linear_feedback_shift_engine::discard() + + +template + template + std::basic_ostream& linear_feedback_shift_engine + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags & fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(os.widen(' ')); + + // output one word of state + os << m_value; + + // restore flags & fill character + os.flags(flags); + os.fill(fill); + + return os; +} + + +template + template + std::basic_istream& linear_feedback_shift_engine + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::skipws); + + // input one word of state + is >> m_value; + + // restore flags + is.flags(flags); + + return is; +} + + +template + bool linear_feedback_shift_engine + ::equal(const linear_feedback_shift_engine &rhs) const +{ + return m_value == rhs.m_value; +} + + +template +bool operator==(const linear_feedback_shift_engine &lhs, + const linear_feedback_shift_engine &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const linear_feedback_shift_engine &lhs, + const linear_feedback_shift_engine &rhs) +{ + return !(lhs == rhs); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const linear_feedback_shift_engine &e) +{ + return thrust::random::detail::random_core_access::stream_out(os,e); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + linear_feedback_shift_engine &e) +{ + return thrust::random::detail::random_core_access::stream_in(is,e); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h b/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h new file mode 100644 index 0000000..ed9e51e --- /dev/null +++ b/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h @@ -0,0 +1,47 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace thrust +{ + +namespace random +{ + +namespace detail +{ + +template + struct linear_feedback_shift_engine_wordmask +{ + static const T value = + (T(1u) << i) | + linear_feedback_shift_engine_wordmask::value; +}; // end linear_feedback_shift_engine_wordmask + +template + struct linear_feedback_shift_engine_wordmask +{ + static const T value = 0; +}; // end linear_feedback_shift_engine_wordmask + +} // end detail + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/mod.h b/compat/thrust/random/detail/mod.h new file mode 100644 index 0000000..ceb2191 --- /dev/null +++ b/compat/thrust/random/detail/mod.h @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace thrust +{ + +namespace random +{ + +namespace detail +{ + +template + struct static_mod +{ + static const T q = m / a; + static const T r = m % a; + + __host__ __device__ + T operator()(T x) const + { + if(a == 1) + { + x %= m; + } + else + { + T t1 = a * (x % q); + T t2 = r * (x / q); + if(t1 >= t2) + { + x = t1 - t2; + } + else + { + x = m - t2 + t1; + } + } + + if(c != 0) + { + const T d = m - x; + if(d > c) + { + x += c; + } + else + { + x = c - d; + } + } + + return x; + } +}; // end static_mod + + +// Rely on machine overflow handling +template + struct static_mod +{ + __host__ __device__ + T operator()(T x) const + { + return a * x + c; + } +}; // end static_mod + +template +__host__ __device__ + T mod(T x) +{ + static_mod f; + return f(x); +} // end static_mod + +} // end detail + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/normal_distribution.inl b/compat/thrust/random/detail/normal_distribution.inl new file mode 100644 index 0000000..1bb55d7 --- /dev/null +++ b/compat/thrust/random/detail/normal_distribution.inl @@ -0,0 +1,241 @@ +/* + * + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +// for floating point infinity +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC +#include +#else +#include +#endif + +namespace thrust +{ + +namespace random +{ + + +template + normal_distribution + ::normal_distribution(RealType a, RealType b) + :super_t(),m_param(a,b) +{ +} // end normal_distribution::normal_distribution() + + +template + normal_distribution + ::normal_distribution(const param_type &parm) + :super_t(),m_param(parm) +{ +} // end normal_distribution::normal_distribution() + + +template + void normal_distribution + ::reset(void) +{ + super_t::reset(); +} // end normal_distribution::reset() + + +template + template + typename normal_distribution::result_type + normal_distribution + ::operator()(UniformRandomNumberGenerator &urng) +{ + return operator()(urng, m_param); +} // end normal_distribution::operator()() + + +template + template + typename normal_distribution::result_type + normal_distribution + ::operator()(UniformRandomNumberGenerator &urng, + const param_type &parm) +{ + return super_t::sample(urng, parm.first, parm.second); +} // end normal_distribution::operator()() + + +template + typename normal_distribution::param_type + normal_distribution + ::param(void) const +{ + return m_param; +} // end normal_distribution::param() + + +template + void normal_distribution + ::param(const param_type &parm) +{ + m_param = parm; +} // end normal_distribution::param() + + +template + typename normal_distribution::result_type + normal_distribution + ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const +{ + return -this->max(); +} // end normal_distribution::min() + + +template + typename normal_distribution::result_type + normal_distribution + ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const +{ + // XXX this solution is pretty terrible + // we can't use numeric_traits::max because nvcc will + // complain that it is a __host__ function + union + { + thrust::detail::uint32_t inf_as_int; + float result; + } hack; + + hack.inf_as_int = 0x7f800000u; + + return hack.result; +} // end normal_distribution::max() + + +template + typename normal_distribution::result_type + normal_distribution + ::mean(void) const +{ + return m_param.first; +} // end normal_distribution::mean() + + +template + typename normal_distribution::result_type + normal_distribution + ::stddev(void) const +{ + return m_param.second; +} // end normal_distribution::stddev() + + +template + bool normal_distribution + ::equal(const normal_distribution &rhs) const +{ + return m_param == rhs.param(); +} + + +template + template + std::basic_ostream& + normal_distribution + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags and fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + const CharT space = os.widen(' '); + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(space); + + os << mean() << space << stddev(); + + // restore old flags and fill character + os.flags(flags); + os.fill(fill); + return os; +} + + +template + template + std::basic_istream& + normal_distribution + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::skipws); + + is >> m_param.first >> m_param.second; + + // restore old flags + is.flags(flags); + return is; +} + + +template +bool operator==(const normal_distribution &lhs, + const normal_distribution &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const normal_distribution &lhs, + const normal_distribution &rhs) +{ + return !(lhs == rhs); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const normal_distribution &d) +{ + return thrust::random::detail::random_core_access::stream_out(os,d); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + normal_distribution &d) +{ + return thrust::random::detail::random_core_access::stream_in(is,d); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/normal_distribution_base.h b/compat/thrust/random/detail/normal_distribution_base.h new file mode 100644 index 0000000..d916611 --- /dev/null +++ b/compat/thrust/random/detail/normal_distribution_base.h @@ -0,0 +1,149 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright Jens Maurer 2000-2001 + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace random +{ +namespace detail +{ + +// this version samples the normal distribution directly +// and uses the non-standard math function erfcinv +template + class normal_distribution_nvcc +{ + protected: + template + __host__ __device__ + RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev) + { + typedef typename UniformRandomNumberGenerator::result_type uint_type; + const uint_type urng_range = UniformRandomNumberGenerator::max - UniformRandomNumberGenerator::min; + + // Constants for conversion + const RealType S1 = static_cast(1) / urng_range; + const RealType S2 = S1 / 2; + + RealType S3 = static_cast(-1.4142135623730950488016887242097); // -sqrt(2) + + // Get the integer value + uint_type u = urng() - UniformRandomNumberGenerator::min; + + // Ensure the conversion to float will give a value in the range [0,0.5) + if(u > (urng_range / 2)) + { + u = urng_range - u; + S3 = -S3; + } + + // Convert to floating point in [0,0.5) + RealType p = u*S1 + S2; + + // Apply inverse error function + return mean + stddev * S3 * erfcinv(2 * p); + } + + // no-op + __host__ __device__ + void reset() {} +}; + +// this version samples the normal distribution using +// Marsaglia's "polar method" +template + class normal_distribution_portable +{ + protected: + normal_distribution_portable() + : m_valid(false) + {} + + normal_distribution_portable(const normal_distribution_portable &other) + : m_valid(other.m_valid) + {} + + void reset() + { + m_valid = false; + } + + // note that we promise to call this member function with the same mean and stddev + template + __host__ __device__ + RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev) + { + // implementation from Boost + // allow for Koenig lookup + using std::sqrt; using std::log; using std::sin; using std::cos; + + if(!m_valid) + { + uniform_real_distribution u01; + m_r1 = u01(urng); + m_r2 = u01(urng); + m_cached_rho = sqrt(-RealType(2) * log(RealType(1)-m_r2)); + + m_valid = true; + } + else + { + m_valid = false; + } + + const RealType pi = RealType(3.14159265358979323846); + + RealType result = m_cached_rho * (m_valid ? + cos(RealType(2)*pi*m_r1) : + sin(RealType(2)*pi*m_r1)); + + return result; + } + + private: + RealType m_r1, m_r2, m_cached_rho; + bool m_valid; +}; + +template + struct normal_distribution_base +{ +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + typedef normal_distribution_nvcc type; +#else + typedef normal_distribution_portable type; +#endif +}; + +} // end detail +} // end random +} // end thrust + diff --git a/compat/thrust/random/detail/random_core_access.h b/compat/thrust/random/detail/random_core_access.h new file mode 100644 index 0000000..81f58e2 --- /dev/null +++ b/compat/thrust/random/detail/random_core_access.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace thrust +{ + +namespace random +{ + +namespace detail +{ + +struct random_core_access +{ + +template +static OStream &stream_out(OStream &os, const EngineOrDistribution &x) +{ + return x.stream_out(os); +} + +template +static IStream &stream_in(IStream &is, EngineOrDistribution &x) +{ + return x.stream_in(is); +} + +template +__host__ __device__ +static bool equal(const EngineOrDistribution &lhs, const EngineOrDistribution &rhs) +{ + return lhs.equal(rhs); +} + +}; // end random_core_access + +} // end detail + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/subtract_with_carry_engine.inl b/compat/thrust/random/detail/subtract_with_carry_engine.inl new file mode 100644 index 0000000..a58b266 --- /dev/null +++ b/compat/thrust/random/detail/subtract_with_carry_engine.inl @@ -0,0 +1,203 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + + +template + subtract_with_carry_engine + ::subtract_with_carry_engine(result_type value) +{ + seed(value); +} // end subtract_with_carry_engine::subtract_with_carry_engine() + + +template + void subtract_with_carry_engine + ::seed(result_type value) +{ + thrust::random::linear_congruential_engine e(value == 0u ? default_seed : value); + + // initialize state + for(size_t i = 0; i < long_lag; ++i) + { + m_x[i] = detail::mod(e()); + } // end for i + + m_carry = (m_x[long_lag-1] == 0); + m_k = 0; +} // end subtract_with_carry_engine::seed() + + +template + typename subtract_with_carry_engine::result_type + subtract_with_carry_engine + ::operator()(void) +{ + // XXX we probably need to cache these m_x[m_k] in a register + // maybe we need to cache the use of all member variables + int short_index = m_k - short_lag; + if(short_index < 0) + short_index += long_lag; + result_type xi; + if (m_x[short_index] >= m_x[m_k] + m_carry) + { + // x(n) >= 0 + xi = m_x[short_index] - m_x[m_k] - m_carry; + m_carry = 0; + } + else + { + // x(n) < 0 + xi = modulus - m_x[m_k] - m_carry + m_x[short_index]; + m_carry = 1; + } + m_x[m_k] = xi; + ++m_k; + if(m_k >= long_lag) + m_k = 0; + return xi; +} // end subtract_with_carry_engine::operator()() + + +template + void subtract_with_carry_engine + ::discard(unsigned long long z) +{ + for(; z > 0; --z) + { + this->operator()(); + } // end for +} // end subtract_with_carry_engine::discard() + + +template + template + std::basic_ostream& subtract_with_carry_engine + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + const CharT space = os.widen(' '); + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(space); + + const UIntType long_lag = r; + + for(size_t i = 0; i < r; ++i) + os << m_x[(i + m_k) % long_lag] << space; + os << m_carry; + + os.flags(flags); + os.fill(fill); + return os; +} + + +template + template + std::basic_istream& subtract_with_carry_engine + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + const typename ios_base::fmtflags flags = is.flags(); + is.flags(ios_base::dec | ios_base::skipws); + + for(size_t i = 0; i < r; ++i) + is >> m_x[i]; + is >> m_carry; + + m_k = 0; + + is.flags(flags); + return is; +} + + +template + bool subtract_with_carry_engine + ::equal(const subtract_with_carry_engine &rhs) const +{ + const UIntType long_lag = r; + + bool result = true; + for(size_t i = 0; i < r; ++i) + { + result &= (m_x[(i + m_k) % long_lag] == rhs.m_x[(i + rhs.m_k) % long_lag]); + } + + // XXX not sure if this last check is necessary + result &= (m_carry == rhs.m_carry); + + return result; +} + + +template + std::basic_ostream& + operator<<(std::basic_ostream &os, + const subtract_with_carry_engine &e) +{ + return thrust::random::detail::random_core_access::stream_out(os,e); +} + + +template + std::basic_istream& + operator>>(std::basic_istream &is, + subtract_with_carry_engine &e) +{ + return thrust::random::detail::random_core_access::stream_in(is,e); +} + + +template + bool operator==(const subtract_with_carry_engine &lhs, + const subtract_with_carry_engine &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template + bool operator!=(const subtract_with_carry_engine &lhs, + const subtract_with_carry_engine &rhs) +{ + return !(lhs == rhs); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/uniform_int_distribution.inl b/compat/thrust/random/detail/uniform_int_distribution.inl new file mode 100644 index 0000000..e92754c --- /dev/null +++ b/compat/thrust/random/detail/uniform_int_distribution.inl @@ -0,0 +1,232 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + + +template + uniform_int_distribution + ::uniform_int_distribution(IntType a, IntType b) + :m_param(a,b) +{ +} // end uniform_int_distribution::uniform_int_distribution() + + +template + uniform_int_distribution + ::uniform_int_distribution(const param_type &parm) + :m_param(parm) +{ +} // end uniform_int_distribution::uniform_int_distribution() + + +template + void uniform_int_distribution + ::reset(void) +{ +} // end uniform_int_distribution::reset() + + +template + template + typename uniform_int_distribution::result_type + uniform_int_distribution + ::operator()(UniformRandomNumberGenerator &urng) +{ + return operator()(urng, m_param); +} // end uniform_int_distribution::operator()() + + +template + template + typename uniform_int_distribution::result_type + uniform_int_distribution + ::operator()(UniformRandomNumberGenerator &urng, const param_type &parm) +{ + // XXX this implementation is somewhat hacky and will skip + // values if the range of the RNG is smaller than the range of the distribution + // we should improve this implementation in a later version + + typedef typename thrust::detail::largest_available_float::type float_type; + + const float_type real_min(parm.first); + const float_type real_max(parm.second); + + // add one to the right end of the interval because it is half-open + // XXX adding 1.0 to a potentially large floating point number seems like a bad idea + uniform_real_distribution real_dist(real_min, real_max + float_type(1)); + + return static_cast(real_dist(urng)); +} // end uniform_int_distribution::operator()() + + +template + typename uniform_int_distribution::result_type + uniform_int_distribution + ::a(void) const +{ + return m_param.first; +} // end uniform_int_distribution::a() + + +template + typename uniform_int_distribution::result_type + uniform_int_distribution + ::b(void) const +{ + return m_param.second; +} // end uniform_int_distribution::b() + + +template + typename uniform_int_distribution::param_type + uniform_int_distribution + ::param(void) const +{ + return m_param; +} // end uniform_int_distribution::param() + + +template + void uniform_int_distribution + ::param(const param_type &parm) +{ + m_param = parm; +} // end uniform_int_distribution::param() + + +template + typename uniform_int_distribution::result_type + uniform_int_distribution + ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const +{ + return a(); +} // end uniform_int_distribution::min() + + +template + typename uniform_int_distribution::result_type + uniform_int_distribution + ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const +{ + return b(); +} // end uniform_int_distribution::max() + + +template + bool uniform_int_distribution + ::equal(const uniform_int_distribution &rhs) const +{ + return param() == rhs.param(); +} + + +template + template + std::basic_ostream& + uniform_int_distribution + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags and fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + const CharT space = os.widen(' '); + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(space); + + os << a() << space << b(); + + // restore old flags and fill character + os.flags(flags); + os.fill(fill); + return os; +} + + +template + template + std::basic_istream& + uniform_int_distribution + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::skipws); + + is >> m_param.first >> m_param.second; + + // restore old flags + is.flags(flags); + return is; +} + + +template +bool operator==(const uniform_int_distribution &lhs, + const uniform_int_distribution &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const uniform_int_distribution &lhs, + const uniform_int_distribution &rhs) +{ + return !(lhs == rhs); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const uniform_int_distribution &d) +{ + return thrust::random::detail::random_core_access::stream_out(os,d); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + uniform_int_distribution &d) +{ + return thrust::random::detail::random_core_access::stream_in(is,d); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/uniform_real_distribution.inl b/compat/thrust/random/detail/uniform_real_distribution.inl new file mode 100644 index 0000000..6f6d6b5 --- /dev/null +++ b/compat/thrust/random/detail/uniform_real_distribution.inl @@ -0,0 +1,217 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace thrust +{ + +namespace random +{ + + +template + uniform_real_distribution + ::uniform_real_distribution(RealType a, RealType b) + :m_param(a,b) +{ +} // end uniform_real_distribution::uniform_real_distribution() + +template + uniform_real_distribution + ::uniform_real_distribution(const param_type &parm) + :m_param(parm) +{ +} // end uniform_real_distribution::uniform_real_distribution() + +template + void uniform_real_distribution + ::reset(void) +{ +} // end uniform_real_distribution::reset() + +template + template + typename uniform_real_distribution::result_type + uniform_real_distribution + ::operator()(UniformRandomNumberGenerator &urng) +{ + return operator()(urng, m_param); +} // end uniform_real::operator()() + +template + template + typename uniform_real_distribution::result_type + uniform_real_distribution + ::operator()(UniformRandomNumberGenerator &urng, + const param_type &parm) +{ + // call the urng & map its result to [0,1) + result_type result = static_cast(urng() - UniformRandomNumberGenerator::min); + + // adding one to the denominator ensures that the interval is half-open at 1.0 + // XXX adding 1.0 to a potentially large floating point number seems like a bad idea + // XXX OTOH adding 1 to what is potentially UINT_MAX also seems like a bad idea + // XXX we could statically check if 1u + (max - min) is representable and do that, otherwise use the current implementation + result /= (result_type(1) + static_cast(UniformRandomNumberGenerator::max - UniformRandomNumberGenerator::min)); + + return (result * (parm.second - parm.first)) + parm.first; +} // end uniform_real::operator()() + +template + typename uniform_real_distribution::result_type + uniform_real_distribution + ::a(void) const +{ + return m_param.first; +} // end uniform_real::a() + +template + typename uniform_real_distribution::result_type + uniform_real_distribution + ::b(void) const +{ + return m_param.second; +} // end uniform_real_distribution::b() + +template + typename uniform_real_distribution::param_type + uniform_real_distribution + ::param(void) const +{ + return m_param;; +} // end uniform_real_distribution::param() + +template + void uniform_real_distribution + ::param(const param_type &parm) +{ + m_param = parm; +} // end uniform_real_distribution::param() + +template + typename uniform_real_distribution::result_type + uniform_real_distribution + ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const +{ + return a(); +} // end uniform_real_distribution::min() + +template + typename uniform_real_distribution::result_type + uniform_real_distribution + ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const +{ + return b(); +} // end uniform_real_distribution::max() + + +template + bool uniform_real_distribution + ::equal(const uniform_real_distribution &rhs) const +{ + return m_param == rhs.param(); +} + + +template + template + std::basic_ostream& + uniform_real_distribution + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags and fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + const CharT space = os.widen(' '); + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(space); + + os << a() << space << b(); + + // restore old flags and fill character + os.flags(flags); + os.fill(fill); + return os; +} + + +template + template + std::basic_istream& + uniform_real_distribution + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::skipws); + + is >> m_param.first >> m_param.second; + + // restore old flags + is.flags(flags); + return is; +} + + +template +bool operator==(const uniform_real_distribution &lhs, + const uniform_real_distribution &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const uniform_real_distribution &lhs, + const uniform_real_distribution &rhs) +{ + return !(lhs == rhs); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const uniform_real_distribution &d) +{ + return thrust::random::detail::random_core_access::stream_out(os,d); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + uniform_real_distribution &d) +{ + return thrust::random::detail::random_core_access::stream_in(is,d); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/xor_combine_engine.inl b/compat/thrust/random/detail/xor_combine_engine.inl new file mode 100644 index 0000000..b138722 --- /dev/null +++ b/compat/thrust/random/detail/xor_combine_engine.inl @@ -0,0 +1,203 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace thrust +{ + +namespace random +{ + +template + xor_combine_engine + ::xor_combine_engine(void) + :m_b1(),m_b2() +{ +} // end xor_combine_engine::xor_combine_engine() + +template + xor_combine_engine + ::xor_combine_engine(const base1_type &urng1, const base2_type &urng2) + :m_b1(urng1),m_b2(urng2) +{ +} // end xor_combine_engine::xor_combine_engine() + +template + xor_combine_engine + ::xor_combine_engine(result_type s) + :m_b1(s),m_b2(s) +{ +} // end xor_combine_engine::xor_combine_engine() + +template + void xor_combine_engine + ::seed(void) +{ + m_b1.seed(); + m_b2.seed(); +} // end xor_combine_engine::seed() + +template + void xor_combine_engine + ::seed(result_type s) +{ + m_b1.seed(s); + m_b2.seed(s); +} // end xor_combine_engine::seed() + +template + const typename xor_combine_engine::base1_type & + xor_combine_engine + ::base1(void) const +{ + return m_b1; +} // end xor_combine_engine::base1() + +template + const typename xor_combine_engine::base2_type & + xor_combine_engine + ::base2(void) const +{ + return m_b2; +} // end xor_combine_engine::base2() + +template + typename xor_combine_engine::result_type + xor_combine_engine + ::operator()(void) +{ + return (result_type(m_b1() - base1_type::min) << shift1) ^ + (result_type(m_b2() - base2_type::min) << shift2); +} // end xor_combine_engine::operator()() + +template + void xor_combine_engine + ::discard(unsigned long long z) +{ + for(; z > 0; --z) + { + this->operator()(); + } // end for +} // end xor_combine_engine::discard() + + +template + template + std::basic_ostream& xor_combine_engine + ::stream_out(std::basic_ostream &os) const +{ + typedef std::basic_ostream ostream_type; + typedef typename ostream_type::ios_base ios_base; + + // save old flags and fill character + const typename ios_base::fmtflags flags = os.flags(); + const CharT fill = os.fill(); + + const CharT space = os.widen(' '); + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); + os.fill(space); + + // output each base engine in turn + os << base1() << space << base2(); + + // restore old flags and fill character + os.flags(flags); + os.fill(fill); + return os; +} + + +template + template + std::basic_istream& xor_combine_engine + ::stream_in(std::basic_istream &is) +{ + typedef std::basic_istream istream_type; + typedef typename istream_type::ios_base ios_base; + + // save old flags + const typename ios_base::fmtflags flags = is.flags(); + + is.flags(ios_base::skipws); + + // input each base engine in turn + is >> m_b1 >> m_b2; + + // restore old flags + is.flags(flags); + return is; +} + + +template + bool xor_combine_engine + ::equal(const xor_combine_engine &rhs) const +{ + return (m_b1 == rhs.m_b1) && (m_b2 == rhs.m_b2); +} + + +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const xor_combine_engine &e) +{ + return thrust::random::detail::random_core_access::stream_out(os,e); +} + + +template +std::basic_istream& +operator>>(std::basic_istream &is, + xor_combine_engine &e) +{ + return thrust::random::detail::random_core_access::stream_in(is,e); +} + + +template +bool operator==(const xor_combine_engine &lhs, + const xor_combine_engine &rhs) +{ + return thrust::random::detail::random_core_access::equal(lhs,rhs); +} + + +template +bool operator!=(const xor_combine_engine &lhs, + const xor_combine_engine &rhs) +{ + return !(lhs == rhs); +} + + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/detail/xor_combine_engine_max.h b/compat/thrust/random/detail/xor_combine_engine_max.h new file mode 100644 index 0000000..8bad9a4 --- /dev/null +++ b/compat/thrust/random/detail/xor_combine_engine_max.h @@ -0,0 +1,324 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + +namespace detail +{ + + +namespace math = thrust::detail::mpl::math; + + +namespace detail +{ + +// two cases for this function avoids compile-time warnings of overflow +template + struct lshift_w +{ + static const UIntType value = 0; +}; + + +template + struct lshift_w +{ + static const UIntType value = lhs << rhs; +}; + +} // end detail + + +template + struct lshift_w +{ + static const bool shift_will_overflow = rhs >= w; + + static const UIntType value = detail::lshift_w::value; +}; + + +template + struct lshift + : lshift_w::digits, lhs, rhs> +{}; + + +template + struct two_to_the_power + : lshift +{}; + + +template + class xor_combine_engine_max_aux_constants +{ + public: + static const result_type two_to_the_d = two_to_the_power::value; + static const result_type c = lshift::value; + + static const result_type t = + math::max< + result_type, + c, + b + >::value; + + static const result_type u = + math::min< + result_type, + c, + b + >::value; + + static const result_type p = math::log2::value; + static const result_type two_to_the_p = two_to_the_power::value; + + static const result_type k = math::div::value; +}; + + +template struct xor_combine_engine_max_aux; + + +template + struct xor_combine_engine_max_aux_case4 +{ + typedef xor_combine_engine_max_aux_constants constants; + + static const result_type k_plus_1_times_two_to_the_p = + lshift< + result_type, + math::plus::value, + constants::p + >::value; + + static const result_type M = + xor_combine_engine_max_aux< + result_type, + math::div< + result_type, + math::mod< + result_type, + constants::u, + constants::two_to_the_p + >::value, + constants::two_to_the_p + >::value, + math::mod< + result_type, + constants::t, + constants::two_to_the_p + >::value, + d + >::value; + + static const result_type value = math::plus::value; +}; + + +template + struct xor_combine_engine_max_aux_case3 +{ + typedef xor_combine_engine_max_aux_constants constants; + + static const result_type k_plus_1_times_two_to_the_p = + lshift< + result_type, + math::plus::value, + constants::p + >::value; + + static const result_type M = + xor_combine_engine_max_aux< + result_type, + math::div< + result_type, + math::mod< + result_type, + constants::t, + constants::two_to_the_p + >::value, + constants::two_to_the_p + >::value, + math::mod< + result_type, + constants::u, + constants::two_to_the_p + >::value, + d + >::value; + + static const result_type value = math::plus::value; +}; + + + +template + struct xor_combine_engine_max_aux_case2 +{ + typedef xor_combine_engine_max_aux_constants constants; + + static const result_type k_plus_1_times_two_to_the_p = + lshift< + result_type, + math::plus::value, + constants::p + >::value; + + static const result_type value = + math::minus< + result_type, + k_plus_1_times_two_to_the_p, + 1 + >::value; +}; + + +template + struct xor_combine_engine_max_aux_case1 +{ + static const result_type c = lshift::value; + + static const result_type value = math::plus::value; +}; + + +template + struct xor_combine_engine_max_aux_2 +{ + typedef xor_combine_engine_max_aux_constants constants; + + static const result_type value = + thrust::detail::eval_if< + // if k is odd... + math::is_odd::value, + thrust::detail::identity_< + thrust::detail::integral_constant< + result_type, + xor_combine_engine_max_aux_case2::value + > + >, + thrust::detail::eval_if< + // otherwise if a * 2^3 >= b, then case 3 + a * constants::two_to_the_d >= b, + thrust::detail::identity_< + thrust::detail::integral_constant< + result_type, + xor_combine_engine_max_aux_case3::value + > + >, + // otherwise, case 4 + thrust::detail::identity_< + thrust::detail::integral_constant< + result_type, + xor_combine_engine_max_aux_case4::value + > + > + > + >::type::value; +}; + + +template::value)> + struct xor_combine_engine_max_aux_1 + : xor_combine_engine_max_aux_case1 +{}; + + +template + struct xor_combine_engine_max_aux_1 + : xor_combine_engine_max_aux_2 +{}; + + +template + struct xor_combine_engine_max_aux + : xor_combine_engine_max_aux_1 +{}; + + +template + struct xor_combine_engine_max +{ + static const size_t w = std::numeric_limits::digits; + + static const result_type m1 = + math::min< + result_type, + result_type(Engine1::max - Engine1::min), + two_to_the_power::value - 1 + >::value; + + static const result_type m2 = + math::min< + result_type, + result_type(Engine2::max - Engine2::min), + two_to_the_power::value - 1 + >::value; + + static const result_type s = s1 - s2; + + static const result_type M = + xor_combine_engine_max_aux< + result_type, + m1, + m2, + s + >::value; + + // the value is M(m1,m2,s) lshift_w s2 + static const result_type value = + lshift_w< + result_type, + w, + M, + s2 + >::value; +}; // end xor_combine_engine_max + +} // end detail + +} // end random + +} // end thrust + diff --git a/compat/thrust/random/discard_block_engine.h b/compat/thrust/random/discard_block_engine.h new file mode 100644 index 0000000..c902c58 --- /dev/null +++ b/compat/thrust/random/discard_block_engine.h @@ -0,0 +1,252 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file discard_block_engine.h + * \brief A random number engine which adapts a base engine and produces + * numbers by discarding all but a contiguous blocks of its values. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + +/*! \addtogroup random_number_engine_adaptors Random Number Engine Adaptor Class Templates + * \ingroup random + * \{ + */ + +/*! \class discard_block_engine + * \brief A \p discard_block_engine adapts an existing base random number engine and produces + * random values by discarding some of the values returned by its base engine. + * Each cycle of the compound engine begins by returning \c r values successively produced + * by the base engine and ends by discarding p-r such values. The engine's state + * is the state of its base engine followed by the number of calls to operator() + * that have occurred since the beginning of the current cycle. + * + * \tparam Engine The type of the base random number engine to adapt. + * \tparam p The discard cycle length. + * \tparam r The number of values to return of the base engine. Because p-r will be + * discarded, r <= p. + * + * The following code snippet shows an example of using a \p discard_block_engine instance: + * + * \code + * #include + * #include + * #include + * + * int main(void) + * { + * // create a discard_block_engine from minstd_rand, with a cycle length of 13 + * // keep every first 10 values, and discard the next 3 + * thrust::discard_block_engine rng; + * + * // print a random number to standard output + * std::cout << rng() << std::endl; + * + * return 0; + * } + * \endcode + */ +template + class discard_block_engine +{ + public: + // types + + /*! \typedef base_type + * \brief The type of the adapted base random number engine. + */ + typedef Engine base_type; + + /*! \typedef result_type + * \brief The type of the unsigned integer produced by this \p linear_congruential_engine. + */ + typedef typename base_type::result_type result_type; + + // engine characteristics + + /*! The length of the production cycle. + */ + static const size_t block_size = p; + + /*! The number of used numbers per production cycle. + */ + static const size_t used_block = r; + + /*! The smallest value this \p discard_block_engine may potentially produce. + */ + static const result_type min = base_type::min; + + /*! The largest value this \p discard_block_engine may potentially produce. + */ + static const result_type max = base_type::max; + + // constructors and seeding functions + + /*! This constructor constructs a new \p discard_block_engine and constructs + * its \p base_type engine using its null constructor. + */ + __host__ __device__ + discard_block_engine(); + + /*! This constructor constructs a new \p discard_block_engine using + * a given \p base_type engine to initialize its adapted base engine. + * + * \param urng A \p base_type to use to initialize this \p discard_block_engine's + * adapted base engine. + */ + __host__ __device__ + explicit discard_block_engine(const base_type &urng); + + /*! This constructor initializes a new \p discard_block_engine with a given seed. + * + * \param s The seed used to intialize this \p discard_block_engine's adapted base engine. + */ + __host__ __device__ + explicit discard_block_engine(result_type s); + + /*! This method initializes the state of this \p discard_block_engine's adapted base engine + * by using its \p default_seed value. + */ + __host__ __device__ + void seed(void); + + /*! This method initializes the state of this \p discard_block_engine's adapted base engine + * by using the given seed. + * + * \param s The seed with which to intialize this \p discard_block_engine's adapted base engine. + */ + __host__ __device__ + void seed(result_type s); + + // generating functions + + /*! This member function produces a new random value and updates this \p discard_block_engine's state. + * \return A new random number. + */ + __host__ __device__ + result_type operator()(void); + + /*! This member function advances this \p discard_block_engine's state a given number of times + * and discards the results. + * + * \param z The number of random values to discard. + * \note This function is provided because an implementation may be able to accelerate it. + */ + __host__ __device__ + void discard(unsigned long long z); + + // property functions + + /*! This member function returns a const reference to this \p discard_block_engine's + * adapted base engine. + * + * \return A const reference to the base engine this \p discard_block_engine adapts. + */ + __host__ __device__ + const base_type &base(void) const; + + /*! \cond + */ + private: + base_type m_e; + unsigned int m_n; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const discard_block_engine &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + /*! \endcond + */ +}; // end discard_block_engine + + +/*! This function checks two \p discard_block_engines for equality. + * \param lhs The first \p discard_block_engine to test. + * \param rhs The second \p discard_block_engine to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const discard_block_engine &lhs, + const discard_block_engine &rhs); + + +/*! This function checks two \p discard_block_engines for inequality. + * \param lhs The first \p discard_block_engine to test. + * \param rhs The second \p discard_block_engine to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const discard_block_engine &lhs, + const discard_block_engine &rhs); + + +/*! This function streams a discard_block_engine to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param e The \p discard_block_engine to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const discard_block_engine &e); + + +/*! This function streams a discard_block_engine in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param e The \p discard_block_engine to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + discard_block_engine &e); + +/*! \} // end random_number_engine_adaptors + */ + +} // end random + +// import names into thrust:: +using random::discard_block_engine; + +} // end thrust + +#include + diff --git a/compat/thrust/random/linear_congruential_engine.h b/compat/thrust/random/linear_congruential_engine.h new file mode 100644 index 0000000..0added0 --- /dev/null +++ b/compat/thrust/random/linear_congruential_engine.h @@ -0,0 +1,295 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file linear_congruential_engine.h + * \brief A linear congruential pseudorandom number engine. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + +/*! \addtogroup random_number_engine_templates Random Number Engine Class Templates + * \ingroup random + * \{ + */ + +/*! \class linear_congruential_engine + * \brief A \p linear_congruential_engine random number engine produces unsigned integer + * random numbers using a linear congruential random number generation algorithm. + * + * The generation algorithm has the form x_i = (a * x_{i-1} + c) mod m. + * + * \tparam UIntType The type of unsigned integer to produce. + * \tparam a The multiplier used in the generation algorithm. + * \tparam c The increment used in the generation algorithm. + * \tparam m The modulus used in the generation algorithm. + * + * \note Inexperienced users should not use this class template directly. Instead, use + * \p minstd_rand or \p minstd_rand0. + * + * The following code snippet shows examples of use of a \p linear_congruential_engine instance: + * + * \code + * #include + * #include + * + * int main(void) + * { + * // create a minstd_rand object, which is an instance of linear_congruential_engine + * thrust::minstd_rand rng1; + * + * // output some random values to cout + * std::cout << rng1() << std::endl; + * + * // a random value is printed + * + * // create a new minstd_rand from a seed + * thrust::minstd_rand rng2(13); + * + * // discard some random values + * rng2.discard(13); + * + * // stream the object to an iostream + * std::cout << rng2 << std::endl; + * + * // rng2's current state is printed + * + * // print the minimum and maximum values that minstd_rand can produce + * std::cout << thrust::minstd_rand::min << std::endl; + * std::cout << thrust::minstd_rand::max << std::endl; + * + * // the range of minstd_rand is printed + * + * // save the state of rng2 to a different object + * thrust::minstd_rand rng3 = rng2; + * + * // compare rng2 and rng3 + * std::cout << (rng2 == rng3) << std::endl; + * + * // 1 is printed + * + * // re-seed rng2 with a different seed + * rng2.seed(7); + * + * // compare rng2 and rng3 + * std::cout << (rng2 == rng3) << std::endl; + * + * // 0 is printed + * + * return 0; + * } + * + * \endcode + * + * \see thrust::random::minstd_rand + * \see thrust::random::minstd_rand0 + */ +template + class linear_congruential_engine +{ + public: + // types + + /*! \typedef result_type + * \brief The type of the unsigned integer produced by this \p linear_congruential_engine. + */ + typedef UIntType result_type; + + // engine characteristics + + /*! The multiplier used in the generation algorithm. + */ + static const result_type multiplier = a; + + /*! The increment used in the generation algorithm. + */ + static const result_type increment = c; + + /*! The modulus used in the generation algorithm. + */ + static const result_type modulus = m; + + /*! The smallest value this \p linear_congruential_engine may potentially produce. + */ + static const result_type min = c == 0u ? 1u : 0u; + + /*! The largest value this \p linear_congruential_engine may potentially produce. + */ + static const result_type max = m - 1u; + + /*! The default seed of this \p linear_congruential_engine. + */ + static const result_type default_seed = 1u; + + // constructors and seeding functions + + /*! This constructor, which optionally accepts a seed, initializes a new + * \p linear_congruential_engine. + * + * \param s The seed used to intialize this \p linear_congruential_engine's state. + */ + __host__ __device__ + explicit linear_congruential_engine(result_type s = default_seed); + + /*! This method initializes this \p linear_congruential_engine's state, and optionally accepts + * a seed value. + * + * \param s The seed used to initializes this \p linear_congruential_engine's state. + */ + __host__ __device__ + void seed(result_type s = default_seed); + + // generating functions + + /*! This member function produces a new random value and updates this \p linear_congruential_engine's state. + * \return A new random number. + */ + __host__ __device__ + result_type operator()(void); + + /*! This member function advances this \p linear_congruential_engine's state a given number of times + * and discards the results. + * + * \param z The number of random values to discard. + * \note This function is provided because an implementation may be able to accelerate it. + */ + __host__ __device__ + void discard(unsigned long long z); + + /*! \cond + */ + private: + result_type m_x; + + static void transition(result_type &state); + + friend struct thrust::random::detail::random_core_access; + + friend struct thrust::random::detail::linear_congruential_engine_discard; + + __host__ __device__ + bool equal(const linear_congruential_engine &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + + /*! \endcond + */ +}; // end linear_congruential_engine + + +/*! This function checks two \p linear_congruential_engines for equality. + * \param lhs The first \p linear_congruential_engine to test. + * \param rhs The second \p linear_congruential_engine to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const linear_congruential_engine &lhs, + const linear_congruential_engine &rhs); + + +/*! This function checks two \p linear_congruential_engines for inequality. + * \param lhs The first \p linear_congruential_engine to test. + * \param rhs The second \p linear_congruential_engine to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const linear_congruential_engine &lhs, + const linear_congruential_engine &rhs); + + +/*! This function streams a linear_congruential_engine to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param e The \p linear_congruential_engine to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const linear_congruential_engine &e); + + +/*! This function streams a linear_congruential_engine in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param e The \p linear_congruential_engine to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + linear_congruential_engine &e); + + +/*! \} // random_number_engine_templates + */ + + +/*! \addtogroup predefined_random + * \{ + */ + +// XXX the type N2111 used here was uint_fast32_t + +/*! \typedef minstd_rand0 + * \brief A random number engine with predefined parameters which implements a version of + * the Minimal Standard random number generation algorithm. + * \note The 10000th consecutive invocation of a default-constructed object of type \p minstd_rand0 + * shall produce the value \c 1043618065 . + */ +typedef linear_congruential_engine minstd_rand0; + + +/*! \typedef minstd_rand + * \brief A random number engine with predefined parameters which implements a version of + * the Minimal Standard random number generation algorithm. + * \note The 10000th consecutive invocation of a default-constructed object of type \p minstd_rand + * shall produce the value \c 399268537 . + */ +typedef linear_congruential_engine minstd_rand; + +/*! \} // predefined_random + */ + +} // end random + +// import names into thrust:: +using random::linear_congruential_engine; +using random::minstd_rand; +using random::minstd_rand0; + +} // end thrust + +#include + diff --git a/compat/thrust/random/linear_feedback_shift_engine.h b/compat/thrust/random/linear_feedback_shift_engine.h new file mode 100644 index 0000000..f5646c9 --- /dev/null +++ b/compat/thrust/random/linear_feedback_shift_engine.h @@ -0,0 +1,230 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file linear_feedback_shift_engine.h + * \brief A linear feedback shift pseudorandom number generator. + */ + +/* + * Copyright Jens Maurer 2002 + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include +#include // for size_t +#include + +namespace thrust +{ + + +namespace random +{ + +/*! \addtogroup random_number_engine_templates + * \{ + */ + +/*! \class linear_feedback_shift_engine + * \brief A \p linear_feedback_shift_engine random number engine produces + * unsigned integer random values using a linear feedback shift random number + * generation algorithm. + * + * \tparam UIntType The type of unsigned integer to produce. + * \tparam w The word size of the produced values (w <= sizeof(UIntType)). + * \tparam k The k parameter of Tausworthe's 1965 algorithm. + * \tparam q The q exponent of Tausworthe's 1965 algorithm. + * \tparam s The step size of Tausworthe's 1965 algorithm. + * + * \note linear_feedback_shift_engine is based on the Boost Template Library's linear_feedback_shift. + */ +template + class linear_feedback_shift_engine +{ + public: + // types + + /*! \typedef result_type + * \brief The type of the unsigned integer produced by this \p linear_feedback_shift_engine. + */ + typedef UIntType result_type; + + // engine characteristics + + /*! The word size of the produced values. + */ + static const size_t word_size = w; + + /*! A constant used in the generation algorithm. + */ + static const size_t exponent1 = k; + + /*! A constant used in the generation algorithm. + */ + static const size_t exponent2 = q; + + /*! The step size used in the generation algorithm. + */ + static const size_t step_size = s; + + /*! \cond + */ + private: + static const result_type wordmask = + detail::linear_feedback_shift_engine_wordmask< + result_type, + w + >::value; + /*! \endcond + */ + + public: + + /*! The smallest value this \p linear_feedback_shift_engine may potentially produce. + */ + static const result_type min = 0; + + /*! The largest value this \p linear_feedback_shift_engine may potentially produce. + */ + static const result_type max = wordmask; + + /*! The default seed of this \p linear_feedback_shift_engine. + */ + static const result_type default_seed = 341u; + + // constructors and seeding functions + + /*! This constructor, which optionally accepts a seed, initializes a new + * \p linear_feedback_shift_engine. + * + * \param value The seed used to intialize this \p linear_feedback_shift_engine's state. + */ + __host__ __device__ + explicit linear_feedback_shift_engine(result_type value = default_seed); + + /*! This method initializes this \p linear_feedback_shift_engine's state, and optionally accepts + * a seed value. + * + * \param value The seed used to initializes this \p linear_feedback_shift_engine's state. + */ + __host__ __device__ + void seed(result_type value = default_seed); + + // generating functions + + /*! This member function produces a new random value and updates this \p linear_feedback_shift_engine's state. + * \return A new random number. + */ + __host__ __device__ + result_type operator()(void); + + /*! This member function advances this \p linear_feedback_shift_engine's state a given number of times + * and discards the results. + * + * \param z The number of random values to discard. + * \note This function is provided because an implementation may be able to accelerate it. + */ + __host__ __device__ + void discard(unsigned long long z); + + /*! \cond + */ + private: + result_type m_value; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const linear_feedback_shift_engine &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + + /*! \endcond + */ +}; // end linear_feedback_shift_engine + + +/*! This function checks two \p linear_feedback_shift_engines for equality. + * \param lhs The first \p linear_feedback_shift_engine to test. + * \param rhs The second \p linear_feedback_shift_engine to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const linear_feedback_shift_engine &lhs, + const linear_feedback_shift_engine &rhs); + + +/*! This function checks two \p linear_feedback_shift_engines for inequality. + * \param lhs The first \p linear_feedback_shift_engine to test. + * \param rhs The second \p linear_feedback_shift_engine to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const linear_feedback_shift_engine &lhs, + const linear_feedback_shift_engine &rhs); + + +/*! This function streams a linear_feedback_shift_engine to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param e The \p linear_feedback_shift_engine to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const linear_feedback_shift_engine &e); + + +/*! This function streams a linear_feedback_shift_engine in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param e The \p linear_feedback_shift_engine to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + linear_feedback_shift_engine &e); + + +/*! \} // end random_number_engine_templates + */ + + +} // end random + +// import names into thrust:: +using random::linear_feedback_shift_engine; + +} // end thrust + +#include + diff --git a/compat/thrust/random/normal_distribution.h b/compat/thrust/random/normal_distribution.h new file mode 100644 index 0000000..5543f30 --- /dev/null +++ b/compat/thrust/random/normal_distribution.h @@ -0,0 +1,275 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file normal_distribution.h + * \brief A normal (Gaussian) distribution of real-valued numbers. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + + +/*! \addtogroup random_number_distributions + * \{ + */ + +/*! \class normal_distribution + * \brief A \p normal_distribution random number distribution produces floating point + * Normally distributed random numbers. + * + * \tparam RealType The type of floating point number to produce. + * + * The following code snippet demonstrates examples of using a \p normal_distribution with a + * random number engine to produce random values drawn from the Normal distribution with a given + * mean and variance: + * + * \code + * #include + * #include + * + * int main(void) + * { + * // create a minstd_rand object to act as our source of randomness + * thrust::minstd_rand rng; + * + * // create a normal_distribution to produce floats from the Normal distribution + * // with mean 2.0 and standard deviation 3.5 + * thrust::random::normal_distribution dist(2.0f, 3.5f); + * + * // write a random number to standard output + * std::cout << dist(rng) << std::endl; + * + * // write the mean of the distribution, just in case we forgot + * std::cout << dist.mean() << std::endl; + * + * // 2.0 is printed + * + * // and the standard deviation + * std::cout << dist.stddev() << std::endl; + * + * // 3.5 is printed + * + * return 0; + * } + * \endcode + */ +template + class normal_distribution + : public detail::normal_distribution_base::type +{ + private: + typedef typename detail::normal_distribution_base::type super_t; + + public: + // types + + /*! \typedef result_type + * \brief The type of the floating point number produced by this \p normal_distribution. + */ + typedef RealType result_type; + + /*! \typedef param_type + * \brief The type of the object encapsulating this \p normal_distribution's parameters. + */ + typedef thrust::pair param_type; + + // constructors and reset functions + + /*! This constructor creates a new \p normal_distribution from two values defining the + * half-open interval of the distribution. + * + * \param mean The mean (expected value) of the distribution. Defaults to \c 0.0. + * \param stddev The standard deviation of the distribution. Defaults to \c 1.0. + */ + __host__ __device__ + explicit normal_distribution(RealType mean = 0.0, RealType stddev = 1.0); + + /*! This constructor creates a new \p normal_distribution from a \p param_type object + * encapsulating the range of the distribution. + * + * \param parm A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of the distribution. + */ + __host__ __device__ + explicit normal_distribution(const param_type &parm); + + /*! Calling this member function guarantees that subsequent uses of this + * \p normal_distribution do not depend on values produced by any random + * number generator prior to invoking this function. + */ + __host__ __device__ + void reset(void); + + // generating functions + + /*! This method produces a new Normal random integer drawn from this \p normal_distribution's + * range using a \p UniformRandomNumberGenerator as a source of randomness. + * + * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. + */ + template + __host__ __device__ + result_type operator()(UniformRandomNumberGenerator &urng); + + /*! This method produces a new Normal random integer as if by creating a new \p normal_distribution + * from the given \p param_type object, and calling its operator() method with the given + * \p UniformRandomNumberGenerator as a source of randomness. + * + * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. + * \param parm A \p param_type object encapsulating the parameters of the \p normal_distribution + * to draw from. + */ + template + __host__ __device__ + result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); + + // property functions + + /*! This method returns the value of the parameter with which this \p normal_distribution + * was constructed. + * + * \return The mean (expected value) of this \p normal_distribution's output. + */ + __host__ __device__ + result_type mean(void) const; + + /*! This method returns the value of the parameter with which this \p normal_distribution + * was constructed. + * + * \return The standard deviation of this \p uniform_real_distribution's output. + */ + __host__ __device__ + result_type stddev(void) const; + + /*! This method returns a \p param_type object encapsulating the parameters with which this + * \p normal_distribution was constructed. + * + * \return A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of this \p normal_distribution. + */ + __host__ __device__ + param_type param(void) const; + + /*! This method changes the parameters of this \p normal_distribution using the values encapsulated + * in a given \p param_type object. + * + * \param parm A \p param_type object encapsulating the new parameters (i.e., the mean and variance) of this \p normal_distribution. + */ + __host__ __device__ + void param(const param_type &parm); + + /*! This method returns the smallest floating point number this \p normal_distribution can potentially produce. + * + * \return The lower bound of this \p normal_distribution's half-open interval. + */ + __host__ __device__ + result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; + + /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce. + * + * \return The upper bound of this \p normal_distribution's half-open interval. + */ + __host__ __device__ + result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; + + /*! \cond + */ + private: + param_type m_param; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const normal_distribution &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + /*! \endcond + */ +}; // end normal_distribution + + +/*! This function checks two \p normal_distributions for equality. + * \param lhs The first \p normal_distribution to test. + * \param rhs The second \p normal_distribution to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const normal_distribution &lhs, + const normal_distribution &rhs); + + +/*! This function checks two \p normal_distributions for inequality. + * \param lhs The first \p normal_distribution to test. + * \param rhs The second \p normal_distribution to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const normal_distribution &lhs, + const normal_distribution &rhs); + + +/*! This function streams a normal_distribution to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param d The \p normal_distribution to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const normal_distribution &d); + + +/*! This function streams a normal_distribution in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param d The \p normal_distribution to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + normal_distribution &d); + + +/*! \} // end random_number_distributions + */ + + +} // end random + +using random::normal_distribution; + +} // end thrust + +#include + diff --git a/compat/thrust/random/subtract_with_carry_engine.h b/compat/thrust/random/subtract_with_carry_engine.h new file mode 100644 index 0000000..b888100 --- /dev/null +++ b/compat/thrust/random/subtract_with_carry_engine.h @@ -0,0 +1,256 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file subtract_with_carry_engine.h + * \brief A subtract-with-carry pseudorandom number generator + * based on Marsaglia & Zaman. + */ + +#pragma once + +#include +#include + +#include +#include // for size_t +#include + +namespace thrust +{ + +namespace random +{ + + +/*! \addtogroup random_number_engine_templates + * \{ + */ + +/*! \class subtract_with_carry_engine + * \brief A \p subtract_with_carry_engine random number engine produces unsigned + * integer random numbers using the subtract with carry algorithm of Marsaglia & Zaman. + * + * The generation algorithm is performed as follows: + * -# Let Y = X_{i-s}- X_{i-r} - c. + * -# Set X_i to y = T mod m. Set \c c to \c 1 if Y < 0, otherwise set \c c to \c 0. + * + * This algorithm corresponds to a modular linear function of the form + * + * TA(x_i) = (a * x_i) mod b, where \c b is of the form m^r - m^s + 1 and + * a = b - (b-1)/m. + * + * \tparam UIntType The type of unsigned integer to produce. + * \tparam w The word size of the produced values ( w <= sizeof(UIntType)). + * \tparam s The short lag of the generation algorithm. + * \tparam r The long lag of the generation algorithm. + * + * \note Inexperienced users should not use this class template directly. Instead, use + * \p ranlux24_base or \p ranlux48_base, which are instances of \p subtract_with_carry_engine. + * + * \see thrust::random::ranlux24_base + * \see thrust::random::ranlux48_base + */ +template + class subtract_with_carry_engine +{ + /*! \cond + */ + private: + static const UIntType modulus = UIntType(1) << w; + /*! \endcond + */ + + public: + // types + + /*! \typedef result_type + * \brief The type of the unsigned integer produced by this \p subtract_with_carry_engine. + */ + typedef UIntType result_type; + + // engine characteristics + + /*! The word size of the produced values. + */ + static const size_t word_size = w; + + /*! The size of the short lag used in the generation algorithm. + */ + static const size_t short_lag = s; + + /*! The size of the long lag used in the generation algorithm. + */ + static const size_t long_lag = r; + + /*! The smallest value this \p subtract_with_carry_engine may potentially produce. + */ + static const result_type min = 0; + + /*! The largest value this \p subtract_with_carry_engine may potentially produce. + */ + static const result_type max = modulus - 1; + + /*! The default seed of this \p subtract_with_carry_engine. + */ + static const result_type default_seed = 19780503u; + + // constructors and seeding functions + + /*! This constructor, which optionally accepts a seed, initializes a new + * \p subtract_with_carry_engine. + * + * \param value The seed used to intialize this \p subtract_with_carry_engine's state. + */ + __host__ __device__ + explicit subtract_with_carry_engine(result_type value = default_seed); + + /*! This method initializes this \p subtract_with_carry_engine's state, and optionally accepts + * a seed value. + * + * \param value The seed used to initializes this \p subtract_with_carry_engine's state. + */ + __host__ __device__ + void seed(result_type value = default_seed); + + // generating functions + + /*! This member function produces a new random value and updates this \p subtract_with_carry_engine's state. + * \return A new random number. + */ + __host__ __device__ + result_type operator()(void); + + /*! This member function advances this \p subtract_with_carry_engine's state a given number of times + * and discards the results. + * + * \param z The number of random values to discard. + * \note This function is provided because an implementation may be able to accelerate it. + */ + __host__ __device__ + void discard(unsigned long long z); + + /*! \cond + */ + private: + result_type m_x[long_lag]; + unsigned int m_k; + int m_carry; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const subtract_with_carry_engine &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + + /*! \endcond + */ +}; // end subtract_with_carry_engine + + +/*! This function checks two \p subtract_with_carry_engines for equality. + * \param lhs The first \p subtract_with_carry_engine to test. + * \param rhs The second \p subtract_with_carry_engine to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const subtract_with_carry_engine &lhs, + const subtract_with_carry_engine &rhs); + + +/*! This function checks two \p subtract_with_carry_engines for inequality. + * \param lhs The first \p subtract_with_carry_engine to test. + * \param rhs The second \p subtract_with_carry_engine to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const subtract_with_carry_engine&lhs, + const subtract_with_carry_engine&rhs); + + +/*! This function streams a subtract_with_carry_engine to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param e The \p subtract_with_carry_engine to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const subtract_with_carry_engine &e); + + +/*! This function streams a subtract_with_carry_engine in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param e The \p subtract_with_carry_engine to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + subtract_with_carry_engine &e); + + +/*! \} // end random_number_engine_templates + */ + + +/*! \addtogroup predefined_random + * \{ + */ + +// XXX N2111 uses uint_fast32_t here + +/*! \typedef ranlux24_base + * \brief A random number engine with predefined parameters which implements the + * base engine of the \p ranlux24 random number engine. + * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux24_base + * shall produce the value \c 7937952 . + */ +typedef subtract_with_carry_engine ranlux24_base; + + +// XXX N2111 uses uint_fast64_t here + +/*! \typedef ranlux48_base + * \brief A random number engine with predefined parameters which implements the + * base engine of the \p ranlux48 random number engine. + * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux48_base + * shall produce the value \c 192113843633948 . + */ +typedef subtract_with_carry_engine ranlux48_base; + +/*! \} // end predefined_random + */ + +} // end random + +// import names into thrust:: +using random::subtract_with_carry_engine; +using random::ranlux24_base; +using random::ranlux48_base; + +} // end thrust + +#include + diff --git a/compat/thrust/random/uniform_int_distribution.h b/compat/thrust/random/uniform_int_distribution.h new file mode 100644 index 0000000..d05f7fa --- /dev/null +++ b/compat/thrust/random/uniform_int_distribution.h @@ -0,0 +1,276 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file uniform_int_distribution.h + * \brief A uniform distribution of integer-valued numbers + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + +/*! \addtogroup random_number_distributions Random Number Distributions Class Templates + * \ingroup random + * \{ + */ + +/*! \class uniform_int_distribution + * \brief A \p uniform_int_distribution random number distribution produces signed or unsigned integer + * uniform random numbers from a given range. + * + * \tparam IntType The type of integer to produce. + * + * The following code snippet demonstrates examples of using a \p uniform_int_distribution with a + * random number engine to produce random integers drawn from a given range: + * + * \code + * #include + * #include + * + * int main(void) + * { + * // create a minstd_rand object to act as our source of randomness + * thrust::minstd_rand rng; + * + * // create a uniform_int_distribution to produce ints from [-7,13] + * thrust::uniform_int_distribution dist(-7,13); + * + * // write a random number from the range [-7,13] to standard output + * std::cout << dist(rng) << std::endl; + * + * // write the range of the distribution, just in case we forgot + * std::cout << dist.min() << std::endl; + * + * // -7 is printed + * + * std::cout << dist.max() << std::endl; + * + * // 13 is printed + * + * // write the parameters of the distribution (which happen to be the bounds) to standard output + * std::cout << dist.a() << std::endl; + * + * // -7 is printed + * + * std::cout << dist.b() << std::endl; + * + * // 13 is printed + * + * return 0; + * } + * \endcode + */ +template + class uniform_int_distribution +{ + public: + // types + + /*! \typedef result_type + * \brief The type of the integer produced by this \p uniform_int_distribution. + */ + typedef IntType result_type; + + /*! \typedef param_type + * \brief The type of the object encapsulating this \p uniform_int_distribution's parameters. + */ + typedef thrust::pair param_type; + + // constructors and reset functions + + /*! This constructor creates a new \p uniform_int_distribution from two values defining the + * range of the distribution. + * + * \param a The smallest integer to potentially produce. Defaults to \c 0. + * \param b The largest integer to potentially produce. Defaults to the largest representable integer in + * the platform. + */ + __host__ __device__ + explicit uniform_int_distribution(IntType a = 0, IntType b = thrust::detail::integer_traits::const_max); + + /*! This constructor creates a new \p uniform_int_distribution from a \p param_type object + * encapsulating the range of the distribution. + * + * \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution. + */ + __host__ __device__ + explicit uniform_int_distribution(const param_type &parm); + + /*! This does nothing. It is included to conform to the requirements of the RandomDistribution concept. + */ + __host__ __device__ + void reset(void); + + // generating functions + + /*! This method produces a new uniform random integer drawn from this \p uniform_int_distribution's + * range using a \p UniformRandomNumberGenerator as a source of randomness. + * + * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. + */ + template + __host__ __device__ + result_type operator()(UniformRandomNumberGenerator &urng); + + /*! This method produces a new uniform random integer as if by creating a new \p uniform_int_distribution + * from the given \p param_type object, and calling its operator() method with the given + * \p UniformRandomNumberGenerator as a source of randomness. + * + * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. + * \param parm A \p param_type object encapsulating the parameters of the \p uniform_int_distribution + * to draw from. + */ + template + __host__ __device__ + result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); + + // property functions + + /*! This method returns the value of the parameter with which this \p uniform_int_distribution + * was constructed. + * + * \return The lower bound of this \p uniform_int_distribution's range. + */ + __host__ __device__ + result_type a(void) const; + + /*! This method returns the value of the parameter with which this \p uniform_int_distribution + * was constructed. + * + * \return The upper bound of this \p uniform_int_distribution's range. + */ + __host__ __device__ + result_type b(void) const; + + /*! This method returns a \p param_type object encapsulating the parameters with which this + * \p uniform_int_distribution was constructed. + * + * \return A \p param_type object enapsulating the range of this \p uniform_int_distribution. + */ + __host__ __device__ + param_type param(void) const; + + /*! This method changes the parameters of this \p uniform_int_distribution using the values encapsulated + * in a given \p param_type object. + * + * \param parm A \p param_type object encapsulating the new range of this \p uniform_int_distribution. + */ + __host__ __device__ + void param(const param_type &parm); + + /*! This method returns the smallest integer this \p uniform_int_distribution can potentially produce. + * + * \return The lower bound of this \p uniform_int_distribution's range. + */ + __host__ __device__ + result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; + + /*! This method returns the largest integer this \p uniform_int_distribution can potentially produce. + * + * \return The upper bound of this \p uniform_int_distribution's range. + */ + __host__ __device__ + result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; + + /*! \cond + */ + private: + param_type m_param; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const uniform_int_distribution &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + /*! \endcond + */ +}; // end uniform_int_distribution + + +/*! This function checks two \p uniform_int_distributions for equality. + * \param lhs The first \p uniform_int_distribution to test. + * \param rhs The second \p uniform_int_distribution to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const uniform_int_distribution &lhs, + const uniform_int_distribution &rhs); + + +/*! This function checks two \p uniform_int_distributions for inequality. + * \param lhs The first \p uniform_int_distribution to test. + * \param rhs The second \p uniform_int_distribution to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const uniform_int_distribution &lhs, + const uniform_int_distribution &rhs); + + +/*! This function streams a uniform_int_distribution to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param d The \p uniform_int_distribution to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const uniform_int_distribution &d); + + +/*! This function streams a uniform_int_distribution in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param d The \p uniform_int_distribution to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + uniform_int_distribution &d); + + +/*! \} // end random_number_distributions + */ + + +} // end random + +using random::uniform_int_distribution; + +} // end thrust + +#include + diff --git a/compat/thrust/random/uniform_real_distribution.h b/compat/thrust/random/uniform_real_distribution.h new file mode 100644 index 0000000..ab85ab3 --- /dev/null +++ b/compat/thrust/random/uniform_real_distribution.h @@ -0,0 +1,274 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file uniform_real_distribution.h + * \brief A uniform distribution of real-valued numbers + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace random +{ + + +/*! \addtogroup random_number_distributions + * \{ + */ + +/*! \class uniform_real_distribution + * \brief A \p uniform_real_distribution random number distribution produces floating point + * uniform random numbers from a half-open interval. + * + * \tparam RealType The type of floating point number to produce. + * + * The following code snippet demonstrates examples of using a \p uniform_real_distribution with a + * random number engine to produce random integers drawn from a given range: + * + * \code + * #include + * #include + * + * int main(void) + * { + * // create a minstd_rand object to act as our source of randomness + * thrust::minstd_rand rng; + * + * // create a uniform_real_distribution to produce floats from [-7,13) + * thrust::uniform_real_distribution dist(-7,13); + * + * // write a random number from the range [-7,13) to standard output + * std::cout << dist(rng) << std::endl; + * + * // write the range of the distribution, just in case we forgot + * std::cout << dist.min() << std::endl; + * + * // -7.0 is printed + * + * std::cout << dist.max() << std::endl; + * + * // 13.0 is printed + * + * // write the parameters of the distribution (which happen to be the bounds) to standard output + * std::cout << dist.a() << std::endl; + * + * // -7.0 is printed + * + * std::cout << dist.b() << std::endl; + * + * // 13.0 is printed + * + * return 0; + * } + * \endcode + */ +template + class uniform_real_distribution +{ + public: + // types + + /*! \typedef result_type + * \brief The type of the floating point number produced by this \p uniform_real_distribution. + */ + typedef RealType result_type; + + /*! \typedef param_type + * \brief The type of the object encapsulating this \p uniform_real_distribution's parameters. + */ + typedef thrust::pair param_type; + + // constructors and reset functions + + /*! This constructor creates a new \p uniform_real_distribution from two values defining the + * half-open interval of the distribution. + * + * \param a The smallest floating point number to potentially produce. Defaults to \c 0.0. + * \param b The smallest number larger than the largest floating point number to potentially produce. Defaults to \c 1.0. + */ + __host__ __device__ + explicit uniform_real_distribution(RealType a = 0.0, RealType b = 1.0); + + /*! This constructor creates a new \p uniform_real_distribution from a \p param_type object + * encapsulating the range of the distribution. + * + * \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution. + */ + __host__ __device__ + explicit uniform_real_distribution(const param_type &parm); + + /*! This does nothing. It is included to conform to the requirements of the RandomDistribution concept. + */ + __host__ __device__ + void reset(void); + + // generating functions + + /*! This method produces a new uniform random integer drawn from this \p uniform_real_distribution's + * range using a \p UniformRandomNumberGenerator as a source of randomness. + * + * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. + */ + template + __host__ __device__ + result_type operator()(UniformRandomNumberGenerator &urng); + + /*! This method produces a new uniform random integer as if by creating a new \p uniform_real_distribution + * from the given \p param_type object, and calling its operator() method with the given + * \p UniformRandomNumberGenerator as a source of randomness. + * + * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. + * \param parm A \p param_type object encapsulating the parameters of the \p uniform_real_distribution + * to draw from. + */ + template + __host__ __device__ + result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); + + // property functions + + /*! This method returns the value of the parameter with which this \p uniform_real_distribution + * was constructed. + * + * \return The lower bound of this \p uniform_real_distribution's half-open interval. + */ + __host__ __device__ + result_type a(void) const; + + /*! This method returns the value of the parameter with which this \p uniform_real_distribution + * was constructed. + * + * \return The upper bound of this \p uniform_real_distribution's half-open interval. + */ + __host__ __device__ + result_type b(void) const; + + /*! This method returns a \p param_type object encapsulating the parameters with which this + * \p uniform_real_distribution was constructed. + * + * \return A \p param_type object enapsulating the half-open interval of this \p uniform_real_distribution. + */ + __host__ __device__ + param_type param(void) const; + + /*! This method changes the parameters of this \p uniform_real_distribution using the values encapsulated + * in a given \p param_type object. + * + * \param parm A \p param_type object encapsulating the new half-open interval of this \p uniform_real_distribution. + */ + __host__ __device__ + void param(const param_type &parm); + + /*! This method returns the smallest floating point number this \p uniform_real_distribution can potentially produce. + * + * \return The lower bound of this \p uniform_real_distribution's half-open interval. + */ + __host__ __device__ + result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; + + /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce. + * + * \return The upper bound of this \p uniform_real_distribution's half-open interval. + */ + __host__ __device__ + result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; + + /*! \cond + */ + private: + param_type m_param; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const uniform_real_distribution &rhs) const; + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + /*! \endcond + */ +}; // end uniform_real_distribution + + +/*! This function checks two \p uniform_real_distributions for equality. + * \param lhs The first \p uniform_real_distribution to test. + * \param rhs The second \p uniform_real_distribution to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const uniform_real_distribution &lhs, + const uniform_real_distribution &rhs); + + +/*! This function checks two \p uniform_real_distributions for inequality. + * \param lhs The first \p uniform_real_distribution to test. + * \param rhs The second \p uniform_real_distribution to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const uniform_real_distribution &lhs, + const uniform_real_distribution &rhs); + + +/*! This function streams a uniform_real_distribution to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param d The \p uniform_real_distribution to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const uniform_real_distribution &d); + + +/*! This function streams a uniform_real_distribution in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param d The \p uniform_real_distribution to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + uniform_real_distribution &d); + + +/*! \} // end random_number_distributions + */ + + +} // end random + +using random::uniform_real_distribution; + +} // end thrust + +#include + diff --git a/compat/thrust/random/xor_combine_engine.h b/compat/thrust/random/xor_combine_engine.h new file mode 100644 index 0000000..61eb5a5 --- /dev/null +++ b/compat/thrust/random/xor_combine_engine.h @@ -0,0 +1,271 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file xor_combine_engine.h + * \brief A pseudorandom number generator which produces pseudorandom + * numbers from two integer base engines by merging their + * pseudorandom numbers with bitwise exclusive-or. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include // for size_t + +namespace thrust +{ + +namespace random +{ + +/*! \addtogroup random_number_engine_adaptors + * \{ + */ + +/*! \class xor_combine_engine + * \brief An \p xor_combine_engine adapts two existing base random number engines and + * produces random values by combining the values produced by each. + * + * \tparam Engine1 The type of the first base random number engine to adapt. + * \tparam s1 The size of the first shift to use in the generation algorithm. + * \tparam Engine2 The type of the second base random number engine to adapt. + * \tparam s2 The second of the second shift to use in the generation algorithm. Defaults to \c 0. + * + * The following code snippet shows an example of using an \p xor_combine_engine instance: + * + * \code + * #include + * #include + * #include + * + * int main(void) + * { + * // create an xor_combine_engine from minstd_rand and minstd_rand0 + * // use a shift of 0 for each + * thrust::xor_combine_engine rng; + * + * // print a random number to standard output + * std::cout << rng() << std::endl; + * + * return 0; + * } + * \endcode + */ +template + class xor_combine_engine +{ + public: + // types + + /*! \typedef base1_type + * \brief The type of the first adapted base random number engine. + */ + typedef Engine1 base1_type; + + /*! \typedef base2_type + * \brief The type of the second adapted base random number engine. + */ + typedef Engine2 base2_type; + + /*! \typedef result_type + * \brief The type of the unsigned integer produced by this \p xor_combine_engine. + */ + typedef typename thrust::detail::eval_if< + (sizeof(typename base2_type::result_type) > sizeof(typename base1_type::result_type)), + thrust::detail::identity_, + thrust::detail::identity_ + >::type result_type; + + /*! The size of the first shift used in the generation algorithm. + */ + static const size_t shift1 = s1; + + /*! The size of the second shift used in the generation algorithm. + */ + static const size_t shift2 = s2; + + /*! The smallest value this \p xor_combine_engine may potentially produce. + */ + static const result_type min = 0; + + /*! The largest value this \p xor_combine_engine may potentially produce. + */ + static const result_type max = + detail::xor_combine_engine_max< + Engine1, s1, Engine2, s2, result_type + >::value; + + // constructors and seeding functions + + /*! This constructor constructs a new \p xor_combine_engine and constructs + * its adapted engines using their null constructors. + */ + __host__ __device__ + xor_combine_engine(void); + + /*! This constructor constructs a new \p xor_combine_engine using + * given \p base1_type and \p base2_type engines to initialize its adapted base engines. + * + * \param urng1 A \p base1_type to use to initialize this \p xor_combine_engine's + * first adapted base engine. + * \param urng2 A \p base2_type to use to initialize this \p xor_combine_engine's + * first adapted base engine. + */ + __host__ __device__ + xor_combine_engine(const base1_type &urng1, const base2_type &urng2); + + /*! This constructor initializes a new \p xor_combine_engine with a given seed. + * + * \param s The seed used to intialize this \p xor_combine_engine's adapted base engines. + */ + __host__ __device__ + xor_combine_engine(result_type s); + + /*! This method initializes the state of this \p xor_combine_engine's adapted base engines + * by using their \p default_seed values. + */ + __host__ __device__ + void seed(void); + + /*! This method initializes the state of this \p xor_combine_engine's adapted base engines + * by using the given seed. + * + * \param s The seed with which to intialize this \p xor_combine_engine's adapted base engines. + */ + __host__ __device__ + void seed(result_type s); + + // generating functions + + /*! This member function produces a new random value and updates this \p xor_combine_engine's state. + * \return A new random number. + */ + __host__ __device__ + result_type operator()(void); + + /*! This member function advances this \p xor_combine_engine's state a given number of times + * and discards the results. + * + * \param z The number of random values to discard. + * \note This function is provided because an implementation may be able to accelerate it. + */ + __host__ __device__ + void discard(unsigned long long z); + + // property functions + + /*! This member function returns a const reference to this \p xor_combine_engine's + * first adapted base engine. + * + * \return A const reference to the first base engine this \p xor_combine_engine adapts. + */ + __host__ __device__ + const base1_type &base1(void) const; + + /*! This member function returns a const reference to this \p xor_combine_engine's + * second adapted base engine. + * + * \return A const reference to the second base engine this \p xor_combine_engine adapts. + */ + __host__ __device__ + const base2_type &base2(void) const; + + /*! \cond + */ + private: + base1_type m_b1; + base2_type m_b2; + + friend struct thrust::random::detail::random_core_access; + + __host__ __device__ + bool equal(const xor_combine_engine &rhs) const; + + template + std::basic_istream& stream_in(std::basic_istream &is); + + template + std::basic_ostream& stream_out(std::basic_ostream &os) const; + + /*! \endcond + */ +}; // end xor_combine_engine + + +/*! This function checks two \p xor_combine_engines for equality. + * \param lhs The first \p xor_combine_engine to test. + * \param rhs The second \p xor_combine_engine to test. + * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator==(const xor_combine_engine &lhs, + const xor_combine_engine &rhs); + + +/*! This function checks two \p xor_combine_engines for inequality. + * \param lhs The first \p xor_combine_engine to test. + * \param rhs The second \p xor_combine_engine to test. + * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. + */ +template +__host__ __device__ +bool operator!=(const xor_combine_engine &lhs, + const xor_combine_engine &rhs); + + +/*! This function streams a xor_combine_engine to a \p std::basic_ostream. + * \param os The \p basic_ostream to stream out to. + * \param e The \p xor_combine_engine to stream out. + * \return \p os + */ +template +std::basic_ostream& +operator<<(std::basic_ostream &os, + const xor_combine_engine &e); + + +/*! This function streams a xor_combine_engine in from a std::basic_istream. + * \param is The \p basic_istream to stream from. + * \param e The \p xor_combine_engine to stream in. + * \return \p is + */ +template +std::basic_istream& +operator>>(std::basic_istream &is, + xor_combine_engine &e); + + +/*! \} // end random_number_engine_adaptors + */ + + +} // end random + +// import names into thrust:: +using random::xor_combine_engine; + +} // end thrust + +#include + diff --git a/compat/thrust/reduce.h b/compat/thrust/reduce.h new file mode 100644 index 0000000..1dc931f --- /dev/null +++ b/compat/thrust/reduce.h @@ -0,0 +1,779 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief Functions for reducing a range to a single value + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reductions + * \{ + */ + + +/*! \p reduce is a generalization of summation: it computes the sum (or some + * other binary operation) of all the elements in the range [first, + * last). This version of \p reduce uses \c 0 as the initial value of the + * reduction. \p reduce is similar to the C++ Standard Template Library's + * std::accumulate. The primary difference between the two functions + * is that std::accumulate guarantees the order of summation, while + * \p reduce requires associativity of the binary operation to parallelize + * the reduction. + * + * Note that \p reduce also assumes that the binary reduction operator (in this + * case operator+) is commutative. If the reduction operator is not commutative + * then \p thrust::reduce should not be used. Instead, one could use + * \p inclusive_scan (which does not require commutativity) and select the + * last element of the output array. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return The result of the reduction. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and if \c x and \c y are objects of \p InputIterator's \c value_type, + * then x + y is defined and is convertible to \p InputIterator's + * \c value_type. If \c T is \c InputIterator's \c value_type, then + * T(0) is defined. + * + * The following code snippet demonstrates how to use \p reduce to compute + * the sum of a sequence of integers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int result = thrust::reduce(thrust::host, data, data + 6); + * + * // result == 9 + * \endcode + * + * \see http://www.sgi.com/tech/stl/accumulate.html + */ +template + typename thrust::iterator_traits::value_type + reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last); + + +/*! \p reduce is a generalization of summation: it computes the sum (or some + * other binary operation) of all the elements in the range [first, + * last). This version of \p reduce uses \c 0 as the initial value of the + * reduction. \p reduce is similar to the C++ Standard Template Library's + * std::accumulate. The primary difference between the two functions + * is that std::accumulate guarantees the order of summation, while + * \p reduce requires associativity of the binary operation to parallelize + * the reduction. + * + * Note that \p reduce also assumes that the binary reduction operator (in this + * case operator+) is commutative. If the reduction operator is not commutative + * then \p thrust::reduce should not be used. Instead, one could use + * \p inclusive_scan (which does not require commutativity) and select the + * last element of the output array. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return The result of the reduction. + * + * \tparam InputIterator is a model of Input Iterator + * and if \c x and \c y are objects of \p InputIterator's \c value_type, + * then x + y is defined and is convertible to \p InputIterator's + * \c value_type. If \c T is \c InputIterator's \c value_type, then + * T(0) is defined. + * + * The following code snippet demonstrates how to use \p reduce to compute + * the sum of a sequence of integers. + * + * \code + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int result = thrust::reduce(data, data + 6); + * + * // result == 9 + * \endcode + * + * \see http://www.sgi.com/tech/stl/accumulate.html + */ +template typename + thrust::iterator_traits::value_type reduce(InputIterator first, InputIterator last); + + +/*! \p reduce is a generalization of summation: it computes the sum (or some + * other binary operation) of all the elements in the range [first, + * last). This version of \p reduce uses \p init as the initial value of the + * reduction. \p reduce is similar to the C++ Standard Template Library's + * std::accumulate. The primary difference between the two functions + * is that std::accumulate guarantees the order of summation, while + * \p reduce requires associativity of the binary operation to parallelize + * the reduction. + * + * Note that \p reduce also assumes that the binary reduction operator (in this + * case operator+) is commutative. If the reduction operator is not commutative + * then \p thrust::reduce should not be used. Instead, one could use + * \p inclusive_scan (which does not require commutativity) and select the + * last element of the output array. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param init The initial value. + * \return The result of the reduction. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and if \c x and \c y are objects of \p InputIterator's \c value_type, + * then x + y is defined and is convertible to \p T. + * \tparam T is convertible to \p InputIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p reduce to compute + * the sum of a sequence of integers including an intialization value using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int result = thrust::reduce(thrust::host, data, data + 6, 1); + * + * // result == 10 + * \endcode + * + * \see http://www.sgi.com/tech/stl/accumulate.html + */ +template + T reduce(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + T init); + + +/*! \p reduce is a generalization of summation: it computes the sum (or some + * other binary operation) of all the elements in the range [first, + * last). This version of \p reduce uses \p init as the initial value of the + * reduction. \p reduce is similar to the C++ Standard Template Library's + * std::accumulate. The primary difference between the two functions + * is that std::accumulate guarantees the order of summation, while + * \p reduce requires associativity of the binary operation to parallelize + * the reduction. + * + * Note that \p reduce also assumes that the binary reduction operator (in this + * case operator+) is commutative. If the reduction operator is not commutative + * then \p thrust::reduce should not be used. Instead, one could use + * \p inclusive_scan (which does not require commutativity) and select the + * last element of the output array. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param init The initial value. + * \return The result of the reduction. + * + * \tparam InputIterator is a model of Input Iterator + * and if \c x and \c y are objects of \p InputIterator's \c value_type, + * then x + y is defined and is convertible to \p T. + * \tparam T is convertible to \p InputIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p reduce to compute + * the sum of a sequence of integers including an intialization value. + * + * \code + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int result = thrust::reduce(data, data + 6, 1); + * + * // result == 10 + * \endcode + * + * \see http://www.sgi.com/tech/stl/accumulate.html + */ +template + T reduce(InputIterator first, + InputIterator last, + T init); + + +/*! \p reduce is a generalization of summation: it computes the sum (or some + * other binary operation) of all the elements in the range [first, + * last). This version of \p reduce uses \p init as the initial value of the + * reduction and \p binary_op as the binary function used for summation. \p reduce + * is similar to the C++ Standard Template Library's std::accumulate. + * The primary difference between the two functions is that std::accumulate + * guarantees the order of summation, while \p reduce requires associativity of + * \p binary_op to parallelize the reduction. + * + * Note that \p reduce also assumes that the binary reduction operator (in this + * case \p binary_op) is commutative. If the reduction operator is not commutative + * then \p thrust::reduce should not be used. Instead, one could use + * \p inclusive_scan (which does not require commutativity) and select the + * last element of the output array. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param init The initial value. + * \param binary_op The binary function used to 'sum' values. + * \return The result of the reduction. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c T. + * \tparam T is a model of Assignable, + * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. + * \tparam BinaryFunction is a model of Binary Function, + * and \p BinaryFunction's \c result_type is convertible to \p OutputType. + * + * The following code snippet demonstrates how to use \p reduce to + * compute the maximum value of a sequence of integers using the \p thrust::host execution policy + * for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int result = thrust::reduce(thrust::host, + * data, data + 6, + * -1, + * thrust::maximum()); + * // result == 3 + * \endcode + * + * \see http://www.sgi.com/tech/stl/accumulate.html + * \see transform_reduce + */ +template + T reduce(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + T init, + BinaryFunction binary_op); + + +/*! \p reduce is a generalization of summation: it computes the sum (or some + * other binary operation) of all the elements in the range [first, + * last). This version of \p reduce uses \p init as the initial value of the + * reduction and \p binary_op as the binary function used for summation. \p reduce + * is similar to the C++ Standard Template Library's std::accumulate. + * The primary difference between the two functions is that std::accumulate + * guarantees the order of summation, while \p reduce requires associativity of + * \p binary_op to parallelize the reduction. + * + * Note that \p reduce also assumes that the binary reduction operator (in this + * case \p binary_op) is commutative. If the reduction operator is not commutative + * then \p thrust::reduce should not be used. Instead, one could use + * \p inclusive_scan (which does not require commutativity) and select the + * last element of the output array. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param init The initial value. + * \param binary_op The binary function used to 'sum' values. + * \return The result of the reduction. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c T. + * \tparam T is a model of Assignable, + * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. + * \tparam BinaryFunction is a model of Binary Function, + * and \p BinaryFunction's \c result_type is convertible to \p OutputType. + * + * The following code snippet demonstrates how to use \p reduce to + * compute the maximum value of a sequence of integers. + * + * \code + * #include + * #include + * ... + * int data[6] = {1, 0, 2, 2, 1, 3}; + * int result = thrust::reduce(data, data + 6, + * -1, + * thrust::maximum()); + * // result == 3 + * \endcode + * + * \see http://www.sgi.com/tech/stl/accumulate.html + * \see transform_reduce + */ +template + T reduce(InputIterator first, + InputIterator last, + T init, + BinaryFunction binary_op); + + +/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p reduce_by_key copies the first element of the group to the + * \c keys_output. The corresponding values in the range are reduced using the + * \c plus and the result copied to \c values_output. + * + * This version of \p reduce_by_key uses the function object \c equal_to + * to test for equality and \c plus to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_output The beginning of the output key range. + * \param values_output The beginning of the output value range. + * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p reduce_by_key to + * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. + * \endcode + * + * \see reduce + * \see unique_copy + * \see unique_by_key + * \see unique_by_key_copy + */ +template + thrust::pair + reduce_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output); + + +/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p reduce_by_key copies the first element of the group to the + * \c keys_output. The corresponding values in the range are reduced using the + * \c plus and the result copied to \c values_output. + * + * This version of \p reduce_by_key uses the function object \c equal_to + * to test for equality and \c plus to reduce values with equal keys. + * + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_output The beginning of the output key range. + * \param values_output The beginning of the output value range. + * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). + * + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p reduce_by_key to + * compact a sequence of key/value pairs and sum values with equal keys. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * new_end = thrust::reduce_by_key(A, A + N, B, C, D); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. + * \endcode + * + * \see reduce + * \see unique_copy + * \see unique_by_key + * \see unique_by_key_copy + */ +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output); + + +/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p reduce_by_key copies the first element of the group to the + * \c keys_output. The corresponding values in the range are reduced using the + * \c plus and the result copied to \c values_output. + * + * This version of \p reduce_by_key uses the function object \c binary_pred + * to test for equality and \c plus to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_output The beginning of the output key range. + * \param values_output The beginning of the output value range. + * \param binary_pred The binary predicate used to determine equality. + * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p reduce_by_key to + * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. + * \endcode + * + * \see reduce + * \see unique_copy + * \see unique_by_key + * \see unique_by_key_copy + */ +template + thrust::pair + reduce_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred); + + +/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p reduce_by_key copies the first element of the group to the + * \c keys_output. The corresponding values in the range are reduced using the + * \c plus and the result copied to \c values_output. + * + * This version of \p reduce_by_key uses the function object \c binary_pred + * to test for equality and \c plus to reduce values with equal keys. + * + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_output The beginning of the output key range. + * \param values_output The beginning of the output value range. + * \param binary_pred The binary predicate used to determine equality. + * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). + * + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p reduce_by_key to + * compact a sequence of key/value pairs and sum values with equal keys. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. + * \endcode + * + * \see reduce + * \see unique_copy + * \see unique_by_key + * \see unique_by_key_copy + */ +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred); + + +/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p reduce_by_key copies the first element of the group to the + * \c keys_output. The corresponding values in the range are reduced using the + * \c BinaryFunction \c binary_op and the result copied to \c values_output. + * Specifically, if consecutive key iterators \c i and \c (i + 1) are + * such that binary_pred(*i, *(i+1)) is \c true, then the corresponding + * values are reduced to a single value with \c binary_op. + * + * This version of \p reduce_by_key uses the function object \c binary_pred + * to test for equality and \c binary_op to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_output The beginning of the output key range. + * \param values_output The beginning of the output value range. + * \param binary_pred The binary predicate used to determine equality. + * \param binary_op The binary function used to accumulate values. + * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * \tparam BinaryFunction is a model of Binary Function + * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p reduce_by_key to + * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * thrust::plus binary_op; + * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred, binary_op); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. + * \endcode + * + * \see reduce + * \see unique_copy + * \see unique_by_key + * \see unique_by_key_copy + */ +template + thrust::pair + reduce_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op); + + +/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p reduce_by_key copies the first element of the group to the + * \c keys_output. The corresponding values in the range are reduced using the + * \c BinaryFunction \c binary_op and the result copied to \c values_output. + * Specifically, if consecutive key iterators \c i and \c (i + 1) are + * such that binary_pred(*i, *(i+1)) is \c true, then the corresponding + * values are reduced to a single value with \c binary_op. + * + * This version of \p reduce_by_key uses the function object \c binary_pred + * to test for equality and \c binary_op to reduce values with equal keys. + * + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_output The beginning of the output key range. + * \param values_output The beginning of the output value range. + * \param binary_pred The binary predicate used to determine equality. + * \param binary_op The binary function used to accumulate values. + * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). + * + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * \tparam BinaryFunction is a model of Binary Function + * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p reduce_by_key to + * compact a sequence of key/value pairs and sum values with equal keys. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * thrust::plus binary_op; + * new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred, binary_op); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. + * \endcode + * + * \see reduce + * \see unique_copy + * \see unique_by_key + * \see unique_by_key_copy + */ +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op); + + +/*! \} // end reductions + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/remove.h b/compat/thrust/remove.h new file mode 100644 index 0000000..c538776 --- /dev/null +++ b/compat/thrust/remove.h @@ -0,0 +1,800 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file remove.h + * \brief Functions for removing elements from a range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup stream_compaction Stream Compaction + * \ingroup reordering + * \{ + * + */ + + +/*! \p remove removes from the range [first, last) all elements that are + * equal to \p value. That is, \p remove returns an iterator \p new_last such + * that the range [first, new_last) contains no elements equal to + * \p value. The iterators in the range [new_first,last) are all still + * dereferenceable, but the elements that they point to are unspecified. \p remove + * is stable, meaning that the relative order of elements that are not equal to + * \p value is unchanged. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param value The value to remove from the range [first, last). + * Elements which are equal to value are removed from the sequence. + * \return A \p ForwardIterator pointing to the end of the resulting range of + * elements which are not equal to \p value. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam T is a model of Equality Comparable, + * and objects of type \p T can be compared for equality with objects of \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p remove to remove a number + * of interest from a range using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {3, 1, 4, 1, 5, 9}; + * int *new_end = thrust::remove(A, A + N, 1); + * // The first four values of A are now {3, 4, 5, 9} + * // Values beyond new_end are unspecified + * \endcode + * + * \note The meaning of "removal" is somewhat subtle. \p remove does not destroy any + * iterators, and does not change the distance between \p first and \p last. + * (There's no way that it could do anything of the sort.) So, for example, if + * \c V is a device_vector, remove(V.begin(), V.end(), 0) does not + * change V.size(): \c V will contain just as many elements as it did + * before. \p remove returns an iterator that points to the end of the resulting + * range after elements have been removed from it; it follows that the elements + * after that iterator are of no interest, and may be discarded. If you are + * removing elements from a + * Sequence, you may + * simply erase them. That is, a reasonable way of removing elements from a + * Sequence is + * S.erase(remove(S.begin(), S.end(), x), S.end()). + * + * \see http://www.sgi.com/tech/stl/remove.html + * \see remove_if + * \see remove_copy + * \see remove_copy_if + */ +template + ForwardIterator remove(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &value); + + +/*! \p remove removes from the range [first, last) all elements that are + * equal to \p value. That is, \p remove returns an iterator \p new_last such + * that the range [first, new_last) contains no elements equal to + * \p value. The iterators in the range [new_first,last) are all still + * dereferenceable, but the elements that they point to are unspecified. \p remove + * is stable, meaning that the relative order of elements that are not equal to + * \p value is unchanged. + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param value The value to remove from the range [first, last). + * Elements which are equal to value are removed from the sequence. + * \return A \p ForwardIterator pointing to the end of the resulting range of + * elements which are not equal to \p value. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam T is a model of Equality Comparable, + * and objects of type \p T can be compared for equality with objects of \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p remove to remove a number + * of interest from a range. + * + * \code + * #include + * ... + * const int N = 6; + * int A[N] = {3, 1, 4, 1, 5, 9}; + * int *new_end = thrust::remove(A, A + N, 1); + * // The first four values of A are now {3, 4, 5, 9} + * // Values beyond new_end are unspecified + * \endcode + * + * \note The meaning of "removal" is somewhat subtle. \p remove does not destroy any + * iterators, and does not change the distance between \p first and \p last. + * (There's no way that it could do anything of the sort.) So, for example, if + * \c V is a device_vector, remove(V.begin(), V.end(), 0) does not + * change V.size(): \c V will contain just as many elements as it did + * before. \p remove returns an iterator that points to the end of the resulting + * range after elements have been removed from it; it follows that the elements + * after that iterator are of no interest, and may be discarded. If you are + * removing elements from a + * Sequence, you may + * simply erase them. That is, a reasonable way of removing elements from a + * Sequence is + * S.erase(remove(S.begin(), S.end(), x), S.end()). + * + * \see http://www.sgi.com/tech/stl/remove.html + * \see remove_if + * \see remove_copy + * \see remove_copy_if + */ +template + ForwardIterator remove(ForwardIterator first, + ForwardIterator last, + const T &value); + + +/*! \p remove_copy copies elements that are not equal to \p value from the range + * [first, last) to a range beginning at \p result. The return value is + * the end of the resulting range. This operation is stable, meaning that the + * relative order of the elements that are copied is the same as in + * the range [first, last). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param result The resulting range is copied to the sequence beginning at this + * location. + * \param value The value to omit from the copied range. + * \return An OutputIterator pointing to the end of the resulting range of elements + * which are not equal to \p value. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam T is a model of Equality Comparable, + * and objects of type \p T can be compared for equality with objects of \p InputIterator's \c value_type. + * + * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_copy to copy + * a sequence of numbers to an output range while omitting a value of interest using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int result[N-2]; + * thrust::remove_copy(thrust::host, V, V + N, result, 0); + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-2, -1, 1, 2} + * \endcode + * + * \see http://www.sgi.com/tech/stl/remove_copy.html + * \see remove + * \see remove_if + * \see remove_copy_if + */ +template + OutputIterator remove_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + const T &value); + + +/*! \p remove_copy copies elements that are not equal to \p value from the range + * [first, last) to a range beginning at \p result. The return value is + * the end of the resulting range. This operation is stable, meaning that the + * relative order of the elements that are copied is the same as in + * the range [first, last). + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param result The resulting range is copied to the sequence beginning at this + * location. + * \param value The value to omit from the copied range. + * \return An OutputIterator pointing to the end of the resulting range of elements + * which are not equal to \p value. + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam T is a model of Equality Comparable, + * and objects of type \p T can be compared for equality with objects of \p InputIterator's \c value_type. + * + * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_copy to copy + * a sequence of numbers to an output range while omitting a value of interest. + * + * \code + * #include + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int result[N-2]; + * thrust::remove_copy(V, V + N, result, 0); + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-2, -1, 1, 2} + * \endcode + * + * \see http://www.sgi.com/tech/stl/remove_copy.html + * \see remove + * \see remove_if + * \see remove_copy_if + */ +template + OutputIterator remove_copy(InputIterator first, + InputIterator last, + OutputIterator result, + const T &value); + + +/*! \p remove_if removes from the range [first, last) every element \p x + * such that pred(x) is \c true. That is, \p remove_if returns an + * iterator \c new_last such that the range [first,new_last) contains + * no elements for which \p pred is \c true. The iterators in the range + * [new_last,last) are all still dereferenceable, but the elements that + * they point to are unspecified. \p remove_if is stable, meaning that the + * relative order of elements that are not removed is unchanged. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param pred A predicate to evaluate for each element of the range + * [first,last). Elements for which \p pred evaluates to + * \c false are removed from the sequence. + * \return A ForwardIterator pointing to the end of the resulting range of + * elements for which \p pred evaluated to \c true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * The following code snippet demonstrates how to use \p remove_if to remove + * all even numbers from an array of integers using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * int *new_end = thrust::remove_if(thrust::host, A, A + N, is_even()); + * // The first three values of A are now {1, 5, 7} + * // Values beyond new_end are unspecified + * \endcode + * + * \note The meaning of "removal" is somewhat subtle. \p remove_if does not + * destroy any iterators, and does not change the distance between \p first and + * \p last. (There's no way that it could do anything of the sort.) So, for + * example, if \c V is a device_vector, + * remove_if(V.begin(), V.end(), pred) does not change + * V.size(): \c V will contain just as many elements as it did before. + * \p remove_if returns an iterator that points to the end of the resulting + * range after elements have been removed from it; it follows that the elements + * after that iterator are of no interest, and may be discarded. If you are + * removing elements from a + * Sequence, you may + * simply erase them. That is, a reasonable way of removing elements from a + * Sequence is + * S.erase(remove_if(S.begin(), S.end(), pred), S.end()). + * + * \see http://www.sgi.com/tech/stl/remove_if.html + * \see remove + * \see remove_copy + * \see remove_copy_if + */ +template + ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p remove_if removes from the range [first, last) every element \p x + * such that pred(x) is \c true. That is, \p remove_if returns an + * iterator \c new_last such that the range [first,new_last) contains + * no elements for which \p pred is \c true. The iterators in the range + * [new_last,last) are all still dereferenceable, but the elements that + * they point to are unspecified. \p remove_if is stable, meaning that the + * relative order of elements that are not removed is unchanged. + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param pred A predicate to evaluate for each element of the range + * [first,last). Elements for which \p pred evaluates to + * \c false are removed from the sequence. + * \return A ForwardIterator pointing to the end of the resulting range of + * elements for which \p pred evaluated to \c true. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * The following code snippet demonstrates how to use \p remove_if to remove + * all even numbers from an array of integers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * int *new_end = thrust::remove_if(A, A + N, is_even()); + * // The first three values of A are now {1, 5, 7} + * // Values beyond new_end are unspecified + * \endcode + * + * \note The meaning of "removal" is somewhat subtle. \p remove_if does not + * destroy any iterators, and does not change the distance between \p first and + * \p last. (There's no way that it could do anything of the sort.) So, for + * example, if \c V is a device_vector, + * remove_if(V.begin(), V.end(), pred) does not change + * V.size(): \c V will contain just as many elements as it did before. + * \p remove_if returns an iterator that points to the end of the resulting + * range after elements have been removed from it; it follows that the elements + * after that iterator are of no interest, and may be discarded. If you are + * removing elements from a + * Sequence, you may + * simply erase them. That is, a reasonable way of removing elements from a + * Sequence is + * S.erase(remove_if(S.begin(), S.end(), pred), S.end()). + * + * \see http://www.sgi.com/tech/stl/remove_if.html + * \see remove + * \see remove_copy + * \see remove_copy_if + */ +template + ForwardIterator remove_if(ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +/*! \p remove_copy_if copies elements from the range [first,last) to a + * range beginning at \p result, except that elements for which \p pred is + * \c true are not copied. The return value is the end of the resulting range. + * This operation is stable, meaning that the relative order of the elements that + * are copied is the same as the range [first,last). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param result The resulting range is copied to the sequence beginning at this + * location. + * \param pred A predicate to evaluate for each element of the range [first,last). + * Elements for which \p pred evaluates to \c false are not copied + * to the resulting sequence. + * \return An OutputIterator pointing to the end of the resulting range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_copy_if to copy + * a sequence of numbers to an output range while omitting even numbers using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int result[2]; + * thrust::remove_copy_if(thrust::host, V, V + N, result, is_even()); + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-1, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/remove_copy_if.html + * \see remove + * \see remove_copy + * \see remove_if + */ +template + OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + +/*! \p remove_copy_if copies elements from the range [first,last) to a + * range beginning at \p result, except that elements for which \p pred is + * \c true are not copied. The return value is the end of the resulting range. + * This operation is stable, meaning that the relative order of the elements that + * are copied is the same as the range [first,last). + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param result The resulting range is copied to the sequence beginning at this + * location. + * \param pred A predicate to evaluate for each element of the range [first,last). + * Elements for which \p pred evaluates to \c false are not copied + * to the resulting sequence. + * \return An OutputIterator pointing to the end of the resulting range. + * + * \tparam InputIterator is a model of Input Iterator, + * \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_copy_if to copy + * a sequence of numbers to an output range while omitting even numbers. + * + * \code + * #include + * ... + * struct is_even + * { + * __host__ __device__ + * bool operator()(const int x) + * { + * return (x % 2) == 0; + * } + * }; + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int result[2]; + * thrust::remove_copy_if(V, V + N, result, is_even()); + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-1, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/remove_copy_if.html + * \see remove + * \see remove_copy + * \see remove_if + */ +template + OutputIterator remove_copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + +/*! \p remove_if removes from the range [first, last) every element \p x + * such that pred(x) is \c true. That is, \p remove_if returns an + * iterator \c new_last such that the range [first, new_last) contains + * no elements for which \p pred of the corresponding stencil value is \c true. + * The iterators in the range [new_last,last) are all still dereferenceable, + * but the elements that they point to are unspecified. \p remove_if is stable, + * meaning that the relative order of elements that are not removed is unchanged. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param stencil The beginning of the stencil sequence. + * \param pred A predicate to evaluate for each element of the range + * [stencil, stencil + (last - first)). Elements for which \p pred evaluates to + * \c false are removed from the sequence [first, last) + * \return A ForwardIterator pointing to the end of the resulting range of + * elements for which \p pred evaluated to \c true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). + * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_if to remove + * specific elements from an array of integers using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * int S[N] = {0, 1, 1, 1, 0, 0}; + * + * int *new_end = thrust::remove(thrust::host, A, A + N, S, thrust::identity()); + * // The first three values of A are now {1, 5, 7} + * // Values beyond new_end are unspecified + * \endcode + * + * \note The range [first, last) is not permitted to overlap with the range [stencil, stencil + (last - first)). + * + * \see http://www.sgi.com/tech/stl/remove_if.html + * \see remove + * \see remove_copy + * \see remove_copy_if + */ +template + ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +/*! \p remove_if removes from the range [first, last) every element \p x + * such that pred(x) is \c true. That is, \p remove_if returns an + * iterator \c new_last such that the range [first, new_last) contains + * no elements for which \p pred of the corresponding stencil value is \c true. + * The iterators in the range [new_last,last) are all still dereferenceable, + * but the elements that they point to are unspecified. \p remove_if is stable, + * meaning that the relative order of elements that are not removed is unchanged. + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param stencil The beginning of the stencil sequence. + * \param pred A predicate to evaluate for each element of the range + * [stencil, stencil + (last - first)). Elements for which \p pred evaluates to + * \c false are removed from the sequence [first, last) + * \return A ForwardIterator pointing to the end of the resulting range of + * elements for which \p pred evaluated to \c true. + * + * \tparam ForwardIterator is a model of Forward Iterator + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). + * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_if to remove + * specific elements from an array of integers. + * + * \code + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * int S[N] = {0, 1, 1, 1, 0, 0}; + * + * int *new_end = thrust::remove(A, A + N, S, thrust::identity()); + * // The first three values of A are now {1, 5, 7} + * // Values beyond new_end are unspecified + * \endcode + * + * \note The range [first, last) is not permitted to overlap with the range [stencil, stencil + (last - first)). + * + * \see http://www.sgi.com/tech/stl/remove_if.html + * \see remove + * \see remove_copy + * \see remove_copy_if + */ +template + ForwardIterator remove_if(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +/*! \p remove_copy_if copies elements from the range [first,last) to a + * range beginning at \p result, except that elements for which \p pred of the + * corresponding stencil value is \c true are not copied. The return value is + * the end of the resulting range. This operation is stable, meaning that the + * relative order of the elements that are copied is the same as the + * range [first,last). + * + * The algorithm's execution policy is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param stencil The beginning of the stencil sequence. + * \param result The resulting range is copied to the sequence beginning at this + * location. + * \param pred A predicate to evaluate for each element of the range [first,last). + * Elements for which \p pred evaluates to \c false are not copied + * to the resulting sequence. + * \return An OutputIterator pointing to the end of the resulting range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_copy_if to copy + * a sequence of numbers to an output range while omitting specific elements using the \p thrust::host + * execution policy for parallelization. + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int S[N] = { 1, 1, 0, 1, 0, 1}; + * int result[2]; + * thrust::remove_copy_if(thrust::host, V, V + N, S, result, thrust::identity()); + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-1, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/remove_copy_if.html + * \see remove + * \see remove_copy + * \see remove_if + * \see copy_if + */ +template + OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +/*! \p remove_copy_if copies elements from the range [first,last) to a + * range beginning at \p result, except that elements for which \p pred of the + * corresponding stencil value is \c true are not copied. The return value is + * the end of the resulting range. This operation is stable, meaning that the + * relative order of the elements that are copied is the same as the + * range [first,last). + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param stencil The beginning of the stencil sequence. + * \param result The resulting range is copied to the sequence beginning at this + * location. + * \param pred A predicate to evaluate for each element of the range [first,last). + * Elements for which \p pred evaluates to \c false are not copied + * to the resulting sequence. + * \return An OutputIterator pointing to the end of the resulting range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * + * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). + * + * The following code snippet demonstrates how to use \p remove_copy_if to copy + * a sequence of numbers to an output range while omitting specific elements. + * + * \code + * #include + * ... + * const int N = 6; + * int V[N] = {-2, 0, -1, 0, 1, 2}; + * int S[N] = { 1, 1, 0, 1, 0, 1}; + * int result[2]; + * thrust::remove_copy_if(V, V + N, S, result, thrust::identity()); + * // V remains {-2, 0, -1, 0, 1, 2} + * // result is now {-1, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/remove_copy_if.html + * \see remove + * \see remove_copy + * \see remove_if + * \see copy_if + */ +template + OutputIterator remove_copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +/*! \} // end stream_compaction + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/replace.h b/compat/thrust/replace.h new file mode 100644 index 0000000..48e3e49 --- /dev/null +++ b/compat/thrust/replace.h @@ -0,0 +1,817 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file replace.h + * \brief Functions for replacing elements in a range with a particular value + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup transformations + * \addtogroup replacing + * \ingroup transformations + * \{ + */ + + +/*! \p replace replaces every element in the range [first, last) equal to \p old_value + * with \p new_value. That is: for every iterator \c i, if *i == old_value + * then it performs the assignment *i = new_value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence of interest. + * \param last The end of the sequence of interest. + * \param old_value The value to replace. + * \param new_value The new value to replace \p old_value. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam T is a model of Assignable, + * \p T is a model of EqualityComparable, + * objects of \p T may be compared for equality with objects of + * \p ForwardIterator's \c value_type, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p replace to replace + * a value of interest in a \c device_vector with another using the \p thrust::device + * execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = 2; + * A[2] = 3; + * A[3] = 1; + * + * thrust::replace(thrust::device, A.begin(), A.end(), 1, 99); + * + * // A contains [99, 2, 3, 99] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace.html + * \see \c replace_if + * \see \c replace_copy + * \see \c replace_copy_if + */ +template + void replace(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, ForwardIterator last, + const T &old_value, + const T &new_value); + + +/*! \p replace replaces every element in the range [first, last) equal to \p old_value + * with \p new_value. That is: for every iterator \c i, if *i == old_value + * then it performs the assignment *i = new_value. + * + * \param first The beginning of the sequence of interest. + * \param last The end of the sequence of interest. + * \param old_value The value to replace. + * \param new_value The new value to replace \p old_value. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam T is a model of Assignable, + * \p T is a model of EqualityComparable, + * objects of \p T may be compared for equality with objects of + * \p ForwardIterator's \c value_type, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p replace to replace + * a value of interest in a \c device_vector with another. + * + * \code + * #include + * #include + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = 2; + * A[2] = 3; + * A[3] = 1; + * + * thrust::replace(A.begin(), A.end(), 1, 99); + * + * // A contains [99, 2, 3, 99] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace.html + * \see \c replace_if + * \see \c replace_copy + * \see \c replace_copy_if + */ +template + void replace(ForwardIterator first, ForwardIterator last, const T &old_value, + const T &new_value); + + +/*! \p replace_if replaces every element in the range [first, last) for which + * \p pred returns \c true with \p new_value. That is: for every iterator \c i, if + * pred(*i) is \c true then it performs the assignment *i = new_value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence of interest. + * \param last The end of the sequence of interest. + * \param pred The predicate to test on every value of the range [first,last). + * \param new_value The new value to replace elements which pred(*i) evaluates + * to \c true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p replace_if to replace + * a \c device_vector's negative elements with \c 0 using the \p thrust::device execution policy + * for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = -3; + * A[2] = 2; + * A[3] = -1; + * + * is_less_than_zero pred; + * + * thrust::replace_if(thrust::device, A.begin(), A.end(), pred, 0); + * + * // A contains [1, 0, 2, 0] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_if.html + * \see \c replace + * \see \c replace_copy + * \see \c replace_copy_if + */ +template + void replace_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, ForwardIterator last, + Predicate pred, + const T &new_value); + + +/*! \p replace_if replaces every element in the range [first, last) for which + * \p pred returns \c true with \p new_value. That is: for every iterator \c i, if + * pred(*i) is \c true then it performs the assignment *i = new_value. + * + * \param first The beginning of the sequence of interest. + * \param last The end of the sequence of interest. + * \param pred The predicate to test on every value of the range [first,last). + * \param new_value The new value to replace elements which pred(*i) evaluates + * to \c true. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p replace_if to replace + * a \c device_vector's negative elements with \c 0. + * + * \code + * #include + * #include + * ... + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = -3; + * A[2] = 2; + * A[3] = -1; + * + * is_less_than_zero pred; + * + * thrust::replace_if(A.begin(), A.end(), pred, 0); + * + * // A contains [1, 0, 2, 0] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_if.html + * \see \c replace + * \see \c replace_copy + * \see \c replace_copy_if + */ +template + void replace_if(ForwardIterator first, ForwardIterator last, + Predicate pred, + const T &new_value); + + +/*! \p replace_if replaces every element in the range [first, last) for which + * pred(*s) returns \c true with \p new_value. That is: for every iterator + * \c i in the range [first, last), and \c s in the range [stencil, stencil + (last - first)), + * if pred(*s) is \c true then it performs the assignment *i = new_value. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence of interest. + * \param last The end of the sequence of interest. + * \param stencil The beginning of the stencil sequence. + * \param pred The predicate to test on every value of the range [first,last). + * \param new_value The new value to replace elements which pred(*i) evaluates + * to \c true. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p replace_if to replace + * a \c device_vector's element with \c 0 when its corresponding stencil element is less than zero + * using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 10; + * A[1] = 20; + * A[2] = 30; + * A[3] = 40; + * + * thrust::device_vector S(4); + * S[0] = -1; + * S[1] = 0; + * S[2] = -1; + * S[3] = 0; + * + * is_less_than_zero pred; + * thrust::replace_if(thrust::device, A.begin(), A.end(), S.begin(), pred, 0); + * + * // A contains [0, 20, 0, 40] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_if.html + * \see \c replace + * \see \c replace_copy + * \see \c replace_copy_if + */ +template + void replace_if(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, ForwardIterator last, + InputIterator stencil, + Predicate pred, + const T &new_value); + + +/*! \p replace_if replaces every element in the range [first, last) for which + * pred(*s) returns \c true with \p new_value. That is: for every iterator + * \c i in the range [first, last), and \c s in the range [stencil, stencil + (last - first)), + * if pred(*s) is \c true then it performs the assignment *i = new_value. + * + * \param first The beginning of the sequence of interest. + * \param last The end of the sequence of interest. + * \param stencil The beginning of the stencil sequence. + * \param pred The predicate to test on every value of the range [first,last). + * \param new_value The new value to replace elements which pred(*i) evaluates + * to \c true. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p replace_if to replace + * a \c device_vector's element with \c 0 when its corresponding stencil element is less than zero. + * + * \code + * #include + * #include + * + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 10; + * A[1] = 20; + * A[2] = 30; + * A[3] = 40; + * + * thrust::device_vector S(4); + * S[0] = -1; + * S[1] = 0; + * S[2] = -1; + * S[3] = 0; + * + * is_less_than_zero pred; + * thrust::replace_if(A.begin(), A.end(), S.begin(), pred, 0); + * + * // A contains [0, 20, 0, 40] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_if.html + * \see \c replace + * \see \c replace_copy + * \see \c replace_copy_if + */ +template + void replace_if(ForwardIterator first, ForwardIterator last, + InputIterator stencil, + Predicate pred, + const T &new_value); + + +/*! \p replace_copy copies elements from the range [first, last) to the range + * [result, result + (last-first)), except that any element equal to \p old_value + * is not copied; \p new_value is copied instead. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, \p replace_copy + * performs the assignment *(result+n) = new_value if *(first+n) == old_value, + * and *(result+n) = *(first+n) otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to copy from. + * \param last The end of the sequence to copy from. + * \param result The beginning of the sequence to copy to. + * \param old_value The value to replace. + * \param new_value The replacement value for which *i == old_value evaluates to \c true. + * \return result + (last-first) + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam T is a model of Assignable, + * \p T is a model of Equality Comparable, + * \p T may be compared for equality with \p InputIterator's \c value_type, + * and \p T is convertible to \p OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = 2; + * A[2] = 3; + * A[3] = 1; + * + * thrust::device_vector B(4); + * + * thrust::replace_copy(thrust::device, A.begin(), A.end(), B.begin(), 1, 99); + * + * // B contains [99, 2, 3, 99] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_copy.html + * \see \c copy + * \see \c replace + * \see \c replace_if + * \see \c replace_copy_if + */ +template + OutputIterator replace_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + const T &old_value, + const T &new_value); + + +/*! \p replace_copy copies elements from the range [first, last) to the range + * [result, result + (last-first)), except that any element equal to \p old_value + * is not copied; \p new_value is copied instead. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, \p replace_copy + * performs the assignment *(result+n) = new_value if *(first+n) == old_value, + * and *(result+n) = *(first+n) otherwise. + * + * \param first The beginning of the sequence to copy from. + * \param last The end of the sequence to copy from. + * \param result The beginning of the sequence to copy to. + * \param old_value The value to replace. + * \param new_value The replacement value for which *i == old_value evaluates to \c true. + * \return result + (last-first) + * + * \tparam InputIterator is a model of Input Iterator. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam T is a model of Assignable, + * \p T is a model of Equality Comparable, + * \p T may be compared for equality with \p InputIterator's \c value_type, + * and \p T is convertible to \p OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. + * + * \code + * #include + * #include + * ... + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = 2; + * A[2] = 3; + * A[3] = 1; + * + * thrust::device_vector B(4); + * + * thrust::replace_copy(A.begin(), A.end(), B.begin(), 1, 99); + * + * // B contains [99, 2, 3, 99] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_copy.html + * \see \c copy + * \see \c replace + * \see \c replace_if + * \see \c replace_copy_if + */ +template + OutputIterator replace_copy(InputIterator first, InputIterator last, + OutputIterator result, const T &old_value, + const T &new_value); + + +/*! \p replace_copy_if copies elements from the range [first, last) to the range + * [result, result + (last-first)), except that any element for which \p pred + * is \c true is not copied; \p new_value is copied instead. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p replace_copy_if performs the assignment *(result+n) = new_value if + * pred(*(first+n)), and *(result+n) = *(first+n) otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to copy from. + * \param last The end of the sequence to copy from. + * \param result The beginning of the sequence to copy to. + * \param pred The predicate to test on every value of the range [first,last). + * \param new_value The replacement value to assign pred(*i) evaluates to \c true. + * \return result + (last-first) + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. + * + * \code + * #include + * #include + * #include + * + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = -3; + * A[2] = 2; + * A[3] = -1; + + * thrust::device_vector B(4); + * is_less_than_zero pred; + * + * thrust::replace_copy_if(thrust::device, A.begin(), A.end(), B.begin(), pred, 0); + * + * // B contains [1, 0, 2, 0] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_copy_if.html + * \see \c replace + * \see \c replace_if + * \see \c replace_copy + */ +template + OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + Predicate pred, + const T &new_value); + + +/*! \p replace_copy_if copies elements from the range [first, last) to the range + * [result, result + (last-first)), except that any element for which \p pred + * is \c true is not copied; \p new_value is copied instead. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p replace_copy_if performs the assignment *(result+n) = new_value if + * pred(*(first+n)), and *(result+n) = *(first+n) otherwise. + * + * \param first The beginning of the sequence to copy from. + * \param last The end of the sequence to copy from. + * \param result The beginning of the sequence to copy to. + * \param pred The predicate to test on every value of the range [first,last). + * \param new_value The replacement value to assign pred(*i) evaluates to \c true. + * \return result + (last-first) + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. + * + * \code + * #include + * #include + * + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 1; + * A[1] = -3; + * A[2] = 2; + * A[3] = -1; + + * thrust::device_vector B(4); + * is_less_than_zero pred; + * + * thrust::replace_copy_if(A.begin(), A.end(), B.begin(), pred, 0); + * + * // B contains [1, 0, 2, 0] + * \endcode + * + * \see http://www.sgi.com/tech/stl/replace_copy_if.html + * \see \c replace + * \see \c replace_if + * \see \c replace_copy + */ +template + OutputIterator replace_copy_if(InputIterator first, InputIterator last, + OutputIterator result, + Predicate pred, + const T &new_value); + + +/*! This version of \p replace_copy_if copies elements from the range [first, last) to the range + * [result, result + (last-first)), except that any element whose corresponding stencil + * element causes \p pred to be \c true is not copied; \p new_value is copied instead. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p replace_copy_if performs the assignment *(result+n) = new_value if + * pred(*(stencil+n)), and *(result+n) = *(first+n) otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence to copy from. + * \param last The end of the sequence to copy from. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the sequence to copy to. + * \param pred The predicate to test on every value of the range [stencil, stencil + (last - first)). + * \param new_value The replacement value to assign when pred(*s) evaluates to \c true. + * \return result + (last-first) + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator. + * \tparam InputIterator2 is a model of Input Iterator + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. + * \pre \p stencil may equal \p result, but the ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap otherwise. + * + * \code + * #include + * #include + * #include + * + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 10; + * A[1] = 20; + * A[2] = 30; + * A[3] = 40; + * + * thrust::device_vector S(4); + * S[0] = -1; + * S[1] = 0; + * S[2] = -1; + * S[3] = 0; + * + * thrust::device_vector B(4); + * is_less_than_zero pred; + * + * thrust::replace_if(thrust::device, A.begin(), A.end(), S.begin(), B.begin(), pred, 0); + * + * // B contains [0, 20, 0, 40] + * \endcode + * + * \see \c replace_copy + * \see \c replace_if + */ +template + OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred, + const T &new_value); + + +/*! This version of \p replace_copy_if copies elements from the range [first, last) to the range + * [result, result + (last-first)), except that any element whose corresponding stencil + * element causes \p pred to be \c true is not copied; \p new_value is copied instead. + * + * More precisely, for every integer \c n such that 0 <= n < last-first, + * \p replace_copy_if performs the assignment *(result+n) = new_value if + * pred(*(stencil+n)), and *(result+n) = *(first+n) otherwise. + * + * \param first The beginning of the sequence to copy from. + * \param last The end of the sequence to copy from. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the sequence to copy to. + * \param pred The predicate to test on every value of the range [stencil, stencil + (last - first)). + * \param new_value The replacement value to assign when pred(*s) evaluates to \c true. + * \return result + (last-first) + * + * \tparam InputIterator1 is a model of Input Iterator. + * \tparam InputIterator2 is a model of Input Iterator + * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam Predicate is a model of Predicate. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. + * \pre \p stencil may equal \p result, but the ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap otherwise. + * + * \code + * #include + * #include + * + * struct is_less_than_zero + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x < 0; + * } + * }; + * + * ... + * + * thrust::device_vector A(4); + * A[0] = 10; + * A[1] = 20; + * A[2] = 30; + * A[3] = 40; + * + * thrust::device_vector S(4); + * S[0] = -1; + * S[1] = 0; + * S[2] = -1; + * S[3] = 0; + * + * thrust::device_vector B(4); + * is_less_than_zero pred; + * + * thrust::replace_if(A.begin(), A.end(), S.begin(), B.begin(), pred, 0); + * + * // B contains [0, 20, 0, 40] + * \endcode + * + * \see \c replace_copy + * \see \c replace_if + */ +template + OutputIterator replace_copy_if(InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred, + const T &new_value); + + +/*! \} // end replacing + * \} // transformations + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/reverse.h b/compat/thrust/reverse.h new file mode 100644 index 0000000..ba50c5d --- /dev/null +++ b/compat/thrust/reverse.h @@ -0,0 +1,213 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reverse.h + * \brief Reverses the order of a range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reordering + * \ingroup algorithms + */ + + +/*! \p reverse reverses a range. That is: for every i such that + * 0 <= i <= (last - first) / 2, it exchanges *(first + i) + * and *(last - (i + 1)). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range to reverse. + * \param last The end of the range to reverse. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam BidirectionalIterator is a model of Bidirectional Iterator and + * \p BidirectionalIterator is mutable. + * + * The following code snippet demonstrates how to use \p reverse to reverse a + * \p device_vector of integers using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int data[N] = {0, 1, 2, 3, 4, 5}; + * thrust::device_vector v(data, data + N); + * thrust::reverse(thrust::device, v.begin(), v.end()); + * // v is now {5, 4, 3, 2, 1, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/reverse.html + * \see \p reverse_copy + * \see \p reverse_iterator + */ +template + void reverse(const thrust::detail::execution_policy_base &exec, + BidirectionalIterator first, + BidirectionalIterator last); + + +/*! \p reverse reverses a range. That is: for every i such that + * 0 <= i <= (last - first) / 2, it exchanges *(first + i) + * and *(last - (i + 1)). + * + * \param first The beginning of the range to reverse. + * \param last The end of the range to reverse. + * + * \tparam BidirectionalIterator is a model of Bidirectional Iterator and + * \p BidirectionalIterator is mutable. + * + * The following code snippet demonstrates how to use \p reverse to reverse a + * \p device_vector of integers. + * + * \code + * #include + * ... + * const int N = 6; + * int data[N] = {0, 1, 2, 3, 4, 5}; + * thrust::device_vector v(data, data + N); + * thrust::reverse(v.begin(), v.end()); + * // v is now {5, 4, 3, 2, 1, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/reverse.html + * \see \p reverse_copy + * \see \p reverse_iterator + */ +template + void reverse(BidirectionalIterator first, + BidirectionalIterator last); + + +/*! \p reverse_copy differs from \ref reverse only in that the reversed range + * is written to a different output range, rather than inplace. + * + * \p reverse_copy copies elements from the range [first, last) to the + * range [result, result + (last - first)) such that the copy is a + * reverse of the original range. Specifically: for every i such that + * 0 <= i < (last - first), \p reverse_copy performs the assignment + * *(result + (last - first) - i) = *(first + i). + * + * The return value is result + (last - first)). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range to reverse. + * \param last The end of the range to reverse. + * \param result The beginning of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam BidirectionalIterator is a model of Bidirectional Iterator, + * and \p BidirectionalIterator's \p value_type is convertible to \p OutputIterator's \p value_type. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The range [first, last) and the range [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p reverse_copy to reverse + * an input \p device_vector of integers to an output \p device_vector using the \p thrust::device + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int data[N] = {0, 1, 2, 3, 4, 5}; + * thrust::device_vector input(data, data + N); + * thrust::device_vector output(N); + * thrust::reverse_copy(thrust::device, v.begin(), v.end(), output.begin()); + * // input is still {0, 1, 2, 3, 4, 5} + * // output is now {5, 4, 3, 2, 1, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/reverse_copy.html + * \see \p reverse + * \see \p reverse_iterator + */ +template + OutputIterator reverse_copy(const thrust::detail::execution_policy_base &exec, + BidirectionalIterator first, + BidirectionalIterator last, + OutputIterator result); + + +/*! \p reverse_copy differs from \ref reverse only in that the reversed range + * is written to a different output range, rather than inplace. + * + * \p reverse_copy copies elements from the range [first, last) to the + * range [result, result + (last - first)) such that the copy is a + * reverse of the original range. Specifically: for every i such that + * 0 <= i < (last - first), \p reverse_copy performs the assignment + * *(result + (last - first) - i) = *(first + i). + * + * The return value is result + (last - first)). + * + * \param first The beginning of the range to reverse. + * \param last The end of the range to reverse. + * \param result The beginning of the output range. + * + * \tparam BidirectionalIterator is a model of Bidirectional Iterator, + * and \p BidirectionalIterator's \p value_type is convertible to \p OutputIterator's \p value_type. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The range [first, last) and the range [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p reverse_copy to reverse + * an input \p device_vector of integers to an output \p device_vector. + * + * \code + * #include + * ... + * const int N = 6; + * int data[N] = {0, 1, 2, 3, 4, 5}; + * thrust::device_vector input(data, data + N); + * thrust::device_vector output(N); + * thrust::reverse_copy(v.begin(), v.end(), output.begin()); + * // input is still {0, 1, 2, 3, 4, 5} + * // output is now {5, 4, 3, 2, 1, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/reverse_copy.html + * \see \p reverse + * \see \p reverse_iterator + */ +template + OutputIterator reverse_copy(BidirectionalIterator first, + BidirectionalIterator last, + OutputIterator result); + + +/*! \} // end reordering + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/scan.h b/compat/thrust/scan.h new file mode 100644 index 0000000..95074e6 --- /dev/null +++ b/compat/thrust/scan.h @@ -0,0 +1,1552 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.h + * \brief Functions for computing prefix sums + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + + +/*! \addtogroup prefixsums Prefix Sums + * \ingroup algorithms + * \{ + */ + + +/*! \p inclusive_scan computes an inclusive prefix sum operation. The + * term 'inclusive' means that each result includes the corresponding + * input operand in the partial sum. More precisely, *first is + * assigned to *result and the sum of *first and + * *(first + 1) is assigned to *(result + 1), and so on. + * This version of \p inclusive_scan assumes plus as the associative operator. + * When the input and output sequences are the same, the scan is performed + * in-place. + + * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary + * difference between the two functions is that \c std::partial_sum guarantees + * a serial summation order, while \p inclusive_scan requires associativity of + * the binary operation to parallelize the prefix sum. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's + * \c value_type, then x + y is defined. If \c T is + * \c OutputIterator's \c value_type, then T(0) is + * defined. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place + * prefix sum using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::inclusive_scan(thrust::host, data, data + 6, data); // in-place scan + * + * // data is now {1, 1, 3, 5, 6, 9} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + * + */ +template + OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p inclusive_scan computes an inclusive prefix sum operation. The + * term 'inclusive' means that each result includes the corresponding + * input operand in the partial sum. More precisely, *first is + * assigned to *result and the sum of *first and + * *(first + 1) is assigned to *(result + 1), and so on. + * This version of \p inclusive_scan assumes plus as the associative operator. + * When the input and output sequences are the same, the scan is performed + * in-place. + + * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary + * difference between the two functions is that \c std::partial_sum guarantees + * a serial summation order, while \p inclusive_scan requires associativity of + * the binary operation to parallelize the prefix sum. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's + * \c value_type, then x + y is defined. If \c T is + * \c OutputIterator's \c value_type, then T(0) is + * defined. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan + * + * \code + * #include + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::inclusive_scan(data, data + 6, data); // in-place scan + * + * // data is now {1, 1, 3, 5, 6, 9} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + * + */ +template + OutputIterator inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p inclusive_scan computes an inclusive prefix sum operation. The + * term 'inclusive' means that each result includes the corresponding + * input operand in the partial sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary + * difference between the two functions is that \c std::partial_sum guarantees + * a serial summation order, while \p inclusive_scan requires associativity of + * the binary operation to parallelize the prefix sum. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator + * and \c OutputIterator's \c value_type is convertible to + * both \c AssociativeOperator's \c first_argument_type and + * \c second_argument_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place + * prefix sum using the \p thrust::host execution policy for parallelization: + * + * \code + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * thrust::maximum binary_op; + * + * thrust::inclusive_scan(thrust::host, data, data + 10, data, binary_op); // in-place scan + * + * // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + AssociativeOperator binary_op); + + +/*! \p inclusive_scan computes an inclusive prefix sum operation. The + * term 'inclusive' means that each result includes the corresponding + * input operand in the partial sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary + * difference between the two functions is that \c std::partial_sum guarantees + * a serial summation order, while \p inclusive_scan requires associativity of + * the binary operation to parallelize the prefix sum. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator + * and \c OutputIterator's \c value_type is convertible to + * both \c AssociativeOperator's \c first_argument_type and + * \c second_argument_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan + * + * \code + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * thrust::maximum binary_op; + * + * thrust::inclusive_scan(data, data + 10, data, binary_op); // in-place scan + * + * // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + AssociativeOperator binary_op); + + +/*! \p exclusive_scan computes an exclusive prefix sum operation. The + * term 'exclusive' means that each result does not include the + * corresponding input operand in the partial sum. More precisely, + * 0 is assigned to *result and the sum of + * 0 and *first is assigned to *(result + 1), + * and so on. This version of \p exclusive_scan assumes plus as the + * associative operator and \c 0 as the initial value. When the input and + * output sequences are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's + * \c value_type, then x + y is defined. If \c T is + * \c OutputIterator's \c value_type, then T(0) is + * defined. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place + * prefix sum using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::exclusive_scan(thrust::host, data, data + 6, data); // in-place scan + * + * // data is now {0, 1, 1, 3, 5, 6} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p exclusive_scan computes an exclusive prefix sum operation. The + * term 'exclusive' means that each result does not include the + * corresponding input operand in the partial sum. More precisely, + * 0 is assigned to *result and the sum of + * 0 and *first is assigned to *(result + 1), + * and so on. This version of \p exclusive_scan assumes plus as the + * associative operator and \c 0 as the initial value. When the input and + * output sequences are the same, the scan is performed in-place. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's + * \c value_type, then x + y is defined. If \c T is + * \c OutputIterator's \c value_type, then T(0) is + * defined. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan + * + * \code + * #include + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::exclusive_scan(data, data + 6, data); // in-place scan + * + * // data is now {0, 1, 1, 3, 5, 6} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p exclusive_scan computes an exclusive prefix sum operation. The + * term 'exclusive' means that each result does not include the + * corresponding input operand in the partial sum. More precisely, + * \p init is assigned to *result and the sum of \p init and + * *first is assigned to *(result + 1), and so on. + * This version of \p exclusive_scan assumes plus as the associative + * operator but requires an initial value \p init. When the input and + * output sequences are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param init The initial value. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's + * \c value_type, then x + y is defined. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place + * prefix sum using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::exclusive_scan(thrust::host, data, data + 6, data, 4); // in-place scan + * + * // data is now {4, 5, 5, 7, 9, 10} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init); + + +/*! \p exclusive_scan computes an exclusive prefix sum operation. The + * term 'exclusive' means that each result does not include the + * corresponding input operand in the partial sum. More precisely, + * \p init is assigned to *result and the sum of \p init and + * *first is assigned to *(result + 1), and so on. + * This version of \p exclusive_scan assumes plus as the associative + * operator but requires an initial value \p init. When the input and + * output sequences are the same, the scan is performed in-place. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param init The initial value. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's + * \c value_type, then x + y is defined. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan + * + * \code + * #include + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::exclusive_scan(data, data + 6, data, 4); // in-place scan + * + * // data is now {4, 5, 5, 7, 9, 10} + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init); + + +/*! \p exclusive_scan computes an exclusive prefix sum operation. The + * term 'exclusive' means that each result does not include the + * corresponding input operand in the partial sum. More precisely, + * \p init is assigned to \*result and the value + * binary_op(init, \*first) is assigned to \*(result + 1), + * and so on. This version of the function requires both and associative + * operator and an initial value \p init. When the input and output + * sequences are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param init The initial value. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator + * and \c OutputIterator's \c value_type is convertible to + * both \c AssociativeOperator's \c first_argument_type and + * \c second_argument_type. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place + * prefix sum using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * thrust::maximum binary_op; + * + * thrust::exclusive_scan(thrust::host, data, data + 10, data, 1, binary_op); // in-place scan + * + * // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 } + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + AssociativeOperator binary_op); + + +/*! \p exclusive_scan computes an exclusive prefix sum operation. The + * term 'exclusive' means that each result does not include the + * corresponding input operand in the partial sum. More precisely, + * \p init is assigned to \*result and the value + * binary_op(init, \*first) is assigned to \*(result + 1), + * and so on. This version of the function requires both and associative + * operator and an initial value \p init. When the input and output + * sequences are the same, the scan is performed in-place. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param init The initial value. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to + * \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator + * and \c OutputIterator's \c value_type is convertible to + * both \c AssociativeOperator's \c first_argument_type and + * \c second_argument_type. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan + * + * \code + * #include + * #include + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * thrust::maximum binary_op; + * + * thrust::exclusive_scan(data, data + 10, data, 1, binary_op); // in-place scan + * + * // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 } + * \endcode + * + * \see http://www.sgi.com/tech/stl/partial_sum.html + */ +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init, + AssociativeOperator binary_op); + + +/*! \addtogroup segmentedprefixsums Segmented Prefix Sums + * \ingroup prefixsums + * \{ + */ + + +/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix + * sum operation. The term 'inclusive' means that each result includes + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate inclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p inclusive_scan_by_key assumes \c equal_to as the binary + * predicate used to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1) + * belong to the same segment if *i == *(i+1), and belong to + * different segments otherwise. + * + * This version of \p inclusive_scan_by_key assumes \c plus as the associative + * operator used to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * + * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals); // in-place scan + * + * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; + * \endcode + * + * \see inclusive_scan + * \see exclusive_scan_by_key + * + */ +template + OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result); + + +/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix + * sum operation. The term 'inclusive' means that each result includes + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate inclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p inclusive_scan_by_key assumes \c equal_to as the binary + * predicate used to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1) + * belong to the same segment if *i == *(i+1), and belong to + * different segments otherwise. + * + * This version of \p inclusive_scan_by_key assumes \c plus as the associative + * operator used to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan_by_key + * + * \code + * #include + * + * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * + * thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals); // in-place scan + * + * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; + * \endcode + * + * \see inclusive_scan + * \see exclusive_scan_by_key + * + */ +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result); + + +/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix + * sum operation. The term 'inclusive' means that each result includes + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate inclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p inclusive_scan_by_key uses the binary predicate + * \c pred to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1) + * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to + * different segments otherwise. + * + * This version of \p inclusive_scan_by_key assumes \c plus as the associative + * operator used to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param binary_pred The binary predicate used to determine equality of keys. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * + * thrust::equal_to binary_pred; + * + * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals, binary_pred); // in-place scan + * + * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; + * \endcode + * + * \see inclusive_scan + * \see exclusive_scan_by_key + * + */ +template + OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred); + + +/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix + * sum operation. The term 'inclusive' means that each result includes + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate inclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p inclusive_scan_by_key uses the binary predicate + * \c pred to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1) + * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to + * different segments otherwise. + * + * This version of \p inclusive_scan_by_key assumes \c plus as the associative + * operator used to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param binary_pred The binary predicate used to determine equality of keys. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan_by_key + * + * \code + * #include + * #include + * + * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * + * thrust::equal_to binary_pred; + * + * thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals, binary_pred); // in-place scan + * + * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; + * \endcode + * + * \see inclusive_scan + * \see exclusive_scan_by_key + * + */ +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred); + + +/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix + * sum operation. The term 'inclusive' means that each result includes + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate inclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p inclusive_scan_by_key uses the binary predicate + * \c pred to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1) + * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to + * different segments otherwise. + * + * This version of \p inclusive_scan_by_key uses the associative operator + * \c binary_op to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param binary_pred The binary predicate used to determine equality of keys. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * \tparam BinaryPredicate is a model of Binary Predicate. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * + * thrust::equal_to binary_pred; + * thrust::plus binary_op; + * + * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals, binary_pred, binary_op); // in-place scan + * + * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; + * \endcode + * + * \see inclusive_scan + * \see exclusive_scan_by_key + * + */ +template + OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + AssociativeOperator binary_op); + + +/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix + * sum operation. The term 'inclusive' means that each result includes + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate inclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p inclusive_scan_by_key uses the binary predicate + * \c pred to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1) + * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to + * different segments otherwise. + * + * This version of \p inclusive_scan_by_key uses the associative operator + * \c binary_op to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param binary_pred The binary predicate used to determine equality of keys. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * \tparam BinaryPredicate is a model of Binary Predicate. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p inclusive_scan_by_key + * + * \code + * #include + * #include + * + * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * + * thrust::equal_to binary_pred; + * thrust::plus binary_op; + * + * thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals, binary_pred, binary_op); // in-place scan + * + * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; + * \endcode + * + * \see inclusive_scan + * \see exclusive_scan_by_key + * + */ +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + AssociativeOperator binary_op); + + +/*! \p exclusive_scan_by_key computes an exclusive segmented prefix + * + * This version of \p exclusive_scan_by_key uses the value \c 0 to + * initialize the exclusive scan operation. + * + * This version of \p exclusive_scan_by_key assumes \c plus as the associative + * operator used to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * This version of \p exclusive_scan_by_key assumes \c equal_to as the binary + * predicate used to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1 + * belong to the same segment if *i == *(i+1), and belong to + * different segments otherwise. + * + * Refer to the most general form of \p exclusive_scan_by_key for additional details. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals); // in-place scan + * + * // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3}; + * \endcode + * + * \see exclusive_scan + * + */ +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result); + + +/*! \p exclusive_scan_by_key computes an exclusive segmented prefix + * + * This version of \p exclusive_scan_by_key uses the value \c 0 to + * initialize the exclusive scan operation. + * + * This version of \p exclusive_scan_by_key assumes \c plus as the associative + * operator used to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * This version of \p exclusive_scan_by_key assumes \c equal_to as the binary + * predicate used to compare adjacent keys. Specifically, consecutive iterators + * i and i+1 in the range [first1, last1 + * belong to the same segment if *i == *(i+1), and belong to + * different segments otherwise. + * + * Refer to the most general form of \p exclusive_scan_by_key for additional details. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key. + * + * \code + * #include + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * thrust::exclusive_scan_by_key(key, key + 10, vals, vals); // in-place scan + * + * // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3}; + * \endcode + * + * \see exclusive_scan + * + */ +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result); + + +/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix + * sum operation. The term 'exclusive' means that each result does not include + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate exclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p exclusive_scan_by_key uses the value \c init to + * initialize the exclusive scan operation. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param init The initial of the exclusive sum value. + * \return The end of the output sequence. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the \p + * thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * int init = 5; + * + * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init); // in-place scan + * + * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; + * \endcode + * + * \see exclusive_scan + * \see inclusive_scan_by_key + * + */ +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init); + + +/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix + * sum operation. The term 'exclusive' means that each result does not include + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate exclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p exclusive_scan_by_key uses the value \c init to + * initialize the exclusive scan operation. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param init The initial of the exclusive sum value. + * \return The end of the output sequence. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key + * + * \code + * #include + * #include + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * int init = 5; + * + * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init); // in-place scan + * + * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; + * \endcode + * + * \see exclusive_scan + * \see inclusive_scan_by_key + * + */ +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init); + + +/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix + * sum operation. The term 'exclusive' means that each result does not include + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate exclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p exclusive_scan_by_key uses the value \c init to + * initialize the exclusive scan operation. + * + * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred + * to compare adjacent keys. Specifically, consecutive iterators i and + * i+1 in the range [first1, last1) belong to the same segment if + * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param init The initial of the exclusive sum value. + * \param binary_pred The binary predicate used to determine equality of keys. + * \return The end of the output sequence. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * int init = 5; + * + * thrust::equal_to binary_pred; + * + * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred); // in-place scan + * + * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; + * \endcode + * + * \see exclusive_scan + * \see inclusive_scan_by_key + * + */ +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred); + + +/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix + * sum operation. The term 'exclusive' means that each result does not include + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate exclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p exclusive_scan_by_key uses the value \c init to + * initialize the exclusive scan operation. + * + * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred + * to compare adjacent keys. Specifically, consecutive iterators i and + * i+1 in the range [first1, last1) belong to the same segment if + * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param init The initial of the exclusive sum value. + * \param binary_pred The binary predicate used to determine equality of keys. + * \return The end of the output sequence. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key + * + * \code + * #include + * #include + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * int init = 5; + * + * thrust::equal_to binary_pred; + * + * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred); // in-place scan + * + * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; + * \endcode + * + * \see exclusive_scan + * \see inclusive_scan_by_key + * + */ +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred); + + +/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix + * sum operation. The term 'exclusive' means that each result does not include + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate exclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p exclusive_scan_by_key uses the value \c init to + * initialize the exclusive scan operation. + * + * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred + * to compare adjacent keys. Specifically, consecutive iterators i and + * i+1 in the range [first1, last1) belong to the same segment if + * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. + * + * This version of \p exclusive_scan_by_key uses the associative operator + * \c binary_op to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param init The initial of the exclusive sum value. + * \param binary_pred The binary predicate used to determine equality of keys. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * int init = 5; + * + * thrust::equal_to binary_pred; + * thrust::plus binary_op; + * + * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan + * + * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; + * \endcode + * + * \see exclusive_scan + * \see inclusive_scan_by_key + * + */ +template + OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + AssociativeOperator binary_op); + + +/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix + * sum operation. The term 'exclusive' means that each result does not include + * the corresponding input operand in the partial sum. The term 'segmented' + * means that the partial sums are broken into distinct segments. In other + * words, within each segment a separate exclusive scan operation is computed. + * Refer to the code sample below for example usage. + * + * This version of \p exclusive_scan_by_key uses the value \c init to + * initialize the exclusive scan operation. + * + * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred + * to compare adjacent keys. Specifically, consecutive iterators i and + * i+1 in the range [first1, last1) belong to the same segment if + * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. + * + * This version of \p exclusive_scan_by_key uses the associative operator + * \c binary_op to perform the prefix sum. When the input and output sequences + * are the same, the scan is performed in-place. + * + * \param first1 The beginning of the key sequence. + * \param last1 The end of the key sequence. + * \param first2 The beginning of the input value sequence. + * \param result The beginning of the output value sequence. + * \param init The initial of the exclusive sum value. + * \param binary_pred The binary predicate used to determine equality of keys. + * \param binary_op The associatve operator used to 'sum' values. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam OutputIterator is a model of Output Iterator, + * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then + * binary_op(x,y) is defined. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. + * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p exclusive_scan_by_key + * + * \code + * #include + * #include + * + * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; + * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + * + * int init = 5; + * + * thrust::equal_to binary_pred; + * thrust::plus binary_op; + * + * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan + * + * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; + * \endcode + * + * \see exclusive_scan + * \see inclusive_scan_by_key + * + */ +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + AssociativeOperator binary_op); + + +/*! \} // end segmentedprefixsums + */ + + +/*! \} // end prefix sums + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/scatter.h b/compat/thrust/scatter.h new file mode 100644 index 0000000..59604ca --- /dev/null +++ b/compat/thrust/scatter.h @@ -0,0 +1,420 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scatter.h + * \brief Irregular copying to a destination range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup scattering + * \ingroup copying + * \{ + */ + + +/*! \p scatter copies elements from a source range into an output array + * according to a map. For each iterator \c i in the range [\p first, \p last), + * the value \c *i is assigned to output[*(map + (i - first))]. The + * output iterator must permit random access. If the same index + * appears more than once in the range [map, map + (last - first)), + * the result is undefined. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first Beginning of the sequence of values to scatter. + * \param last End of the sequence of values to scatter. + * \param map Beginning of the sequence of output indices. + * \param result Destination of the source elements. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam RandomAccessIterator must be a model of Random Access iterator. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The expression `result[*i]` shall be valid for all iterators in the range `[map,map + (last - first))`. + * + * The following code snippet demonstrates how to use \p scatter to + * reorder a range using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * // mark even indices with a 1; odd indices with a 0 + * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * thrust::device_vector d_values(values, values + 10); + * + * // scatter all even indices into the first half of the + * // range, and odd indices vice versa + * int map[10] = {0, 5, 1, 6, 2, 7, 3, 8, 4, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10); + * thrust::scatter(thrust::device, + * d_values.begin(), d_values.end(), + * d_map.begin(), d_output.begin()); + * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * \endcode + * + * \note \p scatter is the inverse of thrust::gather. + */ +template + void scatter(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + RandomAccessIterator result); + + +/*! \p scatter copies elements from a source range into an output array + * according to a map. For each iterator \c i in the range [\p first, \p last), + * the value \c *i is assigned to output[*(map + (i - first))]. The + * output iterator must permit random access. If the same index + * appears more than once in the range [map, map + (last - first)), + * the result is undefined. + * + * \param first Beginning of the sequence of values to scatter. + * \param last End of the sequence of values to scatter. + * \param map Beginning of the sequence of output indices. + * \param result Destination of the source elements. + * + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam RandomAccessIterator must be a model of Random Access iterator. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The expression `result[*i]` shall be valid for all iterators in the range `[map,map + (last - first))`. + * + * The following code snippet demonstrates how to use \p scatter to + * reorder a range. + * + * \code + * #include + * #include + * ... + * // mark even indices with a 1; odd indices with a 0 + * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * thrust::device_vector d_values(values, values + 10); + * + * // scatter all even indices into the first half of the + * // range, and odd indices vice versa + * int map[10] = {0, 5, 1, 6, 2, 7, 3, 8, 4, 9}; + * thrust::device_vector d_map(map, map + 10); + * + * thrust::device_vector d_output(10); + * thrust::scatter(d_values.begin(), d_values.end(), + * d_map.begin(), d_output.begin()); + * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} + * \endcode + * + * \note \p scatter is the inverse of thrust::gather. + */ +template + void scatter(InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + RandomAccessIterator result); + + +/*! \p scatter_if conditionally copies elements from a source range into an + * output array according to a map. For each iterator \c i in the + * range [first, last) such that *(stencil + (i - first)) is + * true, the value \c *i is assigned to output[*(map + (i - first))]. + * The output iterator must permit random access. If the same index + * appears more than once in the range [map, map + (last - first)) + * the result is undefined. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first Beginning of the sequence of values to scatter. + * \param last End of the sequence of values to scatter. + * \param map Beginning of the sequence of output indices. + * \param stencil Beginning of the sequence of predicate values. + * \param output Beginning of the destination range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c bool. + * \tparam RandomAccessIterator must be a model of Random Access iterator. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `*(stencil + i) != false`. + * + * \code + * #include + * #include + * ... + * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; + * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; + * int S[8] = {1, 0, 1, 0, 1, 0, 1, 0}; + * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + * + * thrust::scatter_if(thrust::host, V, V + 8, M, S, D); + * + * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; + * \endcode + * + * \note \p scatter_if is the inverse of thrust::gather_if. + */ +template + void scatter_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output); + + +/*! \p scatter_if conditionally copies elements from a source range into an + * output array according to a map. For each iterator \c i in the + * range [first, last) such that *(stencil + (i - first)) is + * true, the value \c *i is assigned to output[*(map + (i - first))]. + * The output iterator must permit random access. If the same index + * appears more than once in the range [map, map + (last - first)) + * the result is undefined. + * + * \param first Beginning of the sequence of values to scatter. + * \param last End of the sequence of values to scatter. + * \param map Beginning of the sequence of output indices. + * \param stencil Beginning of the sequence of predicate values. + * \param output Beginning of the destination range. + * + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c bool. + * \tparam RandomAccessIterator must be a model of Random Access iterator. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `*(stencil + i) != false`. + * + * \code + * #include + * ... + * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; + * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; + * int S[8] = {1, 0, 1, 0, 1, 0, 1, 0}; + * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + * + * thrust::scatter_if(V, V + 8, M, S, D); + * + * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; + * \endcode + * + * \note \p scatter_if is the inverse of thrust::gather_if. + */ +template + void scatter_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output); + + +/*! \p scatter_if conditionally copies elements from a source range into an + * output array according to a map. For each iterator \c i in the + * range [first, last) such that pred(*(stencil + (i - first))) is + * \c true, the value \c *i is assigned to output[*(map + (i - first))]. + * The output iterator must permit random access. If the same index + * appears more than once in the range [map, map + (last - first)) + * the result is undefined. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first Beginning of the sequence of values to scatter. + * \param last End of the sequence of values to scatter. + * \param map Beginning of the sequence of output indices. + * \param stencil Beginning of the sequence of predicate values. + * \param output Beginning of the destination range. + * \param pred Predicate to apply to the stencil values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c Predicate's \c argument_type. + * \tparam RandomAccessIterator must be a model of Random Access iterator. + * \tparam Predicate must be a model of Predicate. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `pred(*(stencil + i)) != false`. + * + * \code + * #include + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return (x % 2) == 0; + * } + * }; + * + * ... + * + * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; + * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; + * int S[8] = {2, 1, 2, 1, 2, 1, 2, 1}; + * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + * + * is_even pred; + * thrust::scatter_if(thrust::host, V, V + 8, M, S, D, pred); + * + * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; + * \endcode + * + * \note \p scatter_if is the inverse of thrust::gather_if. + */ +template + void scatter_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output, + Predicate pred); + + +/*! \p scatter_if conditionally copies elements from a source range into an + * output array according to a map. For each iterator \c i in the + * range [first, last) such that pred(*(stencil + (i - first))) is + * \c true, the value \c *i is assigned to output[*(map + (i - first))]. + * The output iterator must permit random access. If the same index + * appears more than once in the range [map, map + (last - first)) + * the result is undefined. + * + * \param first Beginning of the sequence of values to scatter. + * \param last End of the sequence of values to scatter. + * \param map Beginning of the sequence of output indices. + * \param stencil Beginning of the sequence of predicate values. + * \param output Beginning of the destination range. + * \param pred Predicate to apply to the stencil values. + * + * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. + * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. + * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c Predicate's \c argument_type. + * \tparam RandomAccessIterator must be a model of Random Access iterator. + * \tparam Predicate must be a model of Predicate. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. + * + * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `pred(*(stencil + i)) != false`. + * + * \code + * #include + * + * struct is_even + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return (x % 2) == 0; + * } + * }; + * + * ... + * + * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; + * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; + * int S[8] = {2, 1, 2, 1, 2, 1, 2, 1}; + * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + * + * is_even pred; + * thrust::scatter_if(V, V + 8, M, S, D, pred); + * + * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; + * \endcode + * + * \note \p scatter_if is the inverse of thrust::gather_if. + */ +template + void scatter_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output, + Predicate pred); + + +/*! \} // end scattering + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/sequence.h b/compat/thrust/sequence.h new file mode 100644 index 0000000..6c54a5b --- /dev/null +++ b/compat/thrust/sequence.h @@ -0,0 +1,293 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file sequence.h + * \brief Fills a range with a sequence of numbers + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup transformations + * \{ + */ + + +/*! \p sequence fills the range [first, last) with a sequence of numbers. + * + * For each iterator \c i in the range [first, last), this version of + * \p sequence performs the assignment *i = (i - first). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * + * The following code snippet demonstrates how to use \p sequence to fill a range + * with a sequence of numbers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::sequence(thrust::host, A, A + 10); + * // A is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + * \endcode + * + * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no + * guarantee on order of execution. + * + * \see http://www.sgi.com/tech/stl/iota.html + */ +template + void sequence(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last); + + +/*! \p sequence fills the range [first, last) with a sequence of numbers. + * + * For each iterator \c i in the range [first, last), this version of + * \p sequence performs the assignment *i = (i - first). + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * + * The following code snippet demonstrates how to use \p sequence to fill a range + * with a sequence of numbers. + * + * \code + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::sequence(A, A + 10); + * // A is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + * \endcode + * + * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no + * guarantee on order of execution. + * + * \see http://www.sgi.com/tech/stl/iota.html + */ +template + void sequence(ForwardIterator first, + ForwardIterator last); + + +/*! \p sequence fills the range [first, last) with a sequence of numbers. + * + * For each iterator \c i in the range [first, last), this version of + * \p sequence performs the assignment *i = init + (i - first). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param init The first value of the sequence of numbers. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p sequence to fill a range + * with a sequence of numbers starting from the value 1 using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::sequence(thrust::host, A, A + 10, 1); + * // A is now {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * \endcode + * + * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no + * guarantee on order of execution. + * + * \see http://www.sgi.com/tech/stl/iota.html + */ +template + void sequence(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + T init); + + +/*! \p sequence fills the range [first, last) with a sequence of numbers. + * + * For each iterator \c i in the range [first, last), this version of + * \p sequence performs the assignment *i = init + (i - first). + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param init The first value of the sequence of numbers. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p sequence to fill a range + * with a sequence of numbers starting from the value 1. + * + * \code + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::sequence(A, A + 10, 1); + * // A is now {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + * \endcode + * + * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no + * guarantee on order of execution. + * + * \see http://www.sgi.com/tech/stl/iota.html + */ +template + void sequence(ForwardIterator first, + ForwardIterator last, + T init); + + +/*! \p sequence fills the range [first, last) with a sequence of numbers. + * + * For each iterator \c i in the range [first, last), this version of + * \p sequence performs the assignment *i = init + step * (i - first). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param init The first value of the sequence of numbers + * \param step The difference between consecutive elements. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p sequence to fill a range + * with a sequence of numbers starting from the value 1 with a step size of 3 using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::sequence(thrust::host, A, A + 10, 1, 3); + * // A is now {1, 4, 7, 10, 13, 16, 19, 22, 25, 28} + * \endcode + * + * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no + * guarantee on order of execution. + * + * \see http://www.sgi.com/tech/stl/iota.html + */ +template + void sequence(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + T init, + T step); + + +/*! \p sequence fills the range [first, last) with a sequence of numbers. + * + * For each iterator \c i in the range [first, last), this version of + * \p sequence performs the assignment *i = init + step * (i - first). + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param init The first value of the sequence of numbers + * \param step The difference between consecutive elements. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * \tparam T is a model of Assignable, + * and \p T is convertible to \p ForwardIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p sequence to fill a range + * with a sequence of numbers starting from the value 1 with a step size of 3. + * + * \code + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::sequence(A, A + 10, 1, 3); + * // A is now {1, 4, 7, 10, 13, 16, 19, 22, 25, 28} + * \endcode + * + * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no + * guarantee on order of execution. + * + * \see http://www.sgi.com/tech/stl/iota.html + */ +template + void sequence(ForwardIterator first, + ForwardIterator last, + T init, + T step); + + +/*! \} // end transformations + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/set_operations.h b/compat/thrust/set_operations.h new file mode 100644 index 0000000..a7ee624 --- /dev/null +++ b/compat/thrust/set_operations.h @@ -0,0 +1,2947 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file set_operations.h + * \brief Set theoretic operations for sorted ranges + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup set_operations Set Operations + * \ingroup algorithms + * \{ + */ + + +/*! \p set_difference constructs a sorted range that is the set difference of the sorted + * ranges [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_difference performs the "difference" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1) and not contained in [first2, last1). The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [first1, last1) range shall be copied to the output range. + * + * This version of \p set_difference compares elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_difference to compute the + * set difference of two sets of integers sorted in ascending order using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {0, 1, 3, 4, 5, 6, 9}; + * int A2[5] = {1, 3, 5, 7, 9}; + * + * int result[3]; + * + * int *result_end = thrust::set_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result); + * // result is now {0, 4, 6} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_difference.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_difference constructs a sorted range that is the set difference of the sorted + * ranges [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_difference performs the "difference" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1) and not contained in [first2, last1). The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [first1, last1) range shall be copied to the output range. + * + * This version of \p set_difference compares elements using \c operator<. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_difference to compute the + * set difference of two sets of integers sorted in ascending order. + * + * \code + * #include + * ... + * int A1[6] = {0, 1, 3, 4, 5, 6, 9}; + * int A2[5] = {1, 3, 5, 7, 9}; + * + * int result[3]; + * + * int *result_end = thrust::set_difference(A1, A1 + 6, A2, A2 + 5, result); + * // result is now {0, 4, 6} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_difference.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_difference constructs a sorted range that is the set difference of the sorted + * ranges [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_difference performs the "difference" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1) and not contained in [first2, last1). The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [first1, last1) range shall be copied to the output range. + * + * This version of \p set_difference compares elements using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_difference to compute the + * set difference of two sets of integers sorted in descending order using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A1[6] = {9, 6, 5, 4, 3, 1, 0}; + * int A2[5] = {9, 7, 5, 3, 1}; + * + * int result[3]; + * + * int *result_end = thrust::set_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); + * // result is now {6, 4, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_difference.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_difference constructs a sorted range that is the set difference of the sorted + * ranges [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_difference performs the "difference" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1) and not contained in [first2, last1). The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [first1, last1) range shall be copied to the output range. + * + * This version of \p set_difference compares elements using a function object \p comp. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_difference to compute the + * set difference of two sets of integers sorted in descending order. + * + * \code + * #include + * #include + * ... + * int A1[6] = {9, 6, 5, 4, 3, 1, 0}; + * int A2[5] = {9, 7, 5, 3, 1}; + * + * int result[3]; + * + * int *result_end = thrust::set_difference(A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); + * // result is now {6, 4, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_difference.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_intersection constructs a sorted range that is the + * intersection of sorted ranges [first1, last1) and + * [first2, last2). The return value is the end of the + * output range. + * + * In the simplest case, \p set_intersection performs the + * "intersection" operation from set theory: the output range + * contains a copy of every element that is contained in both + * [first1, last1) and [first2, last2). The + * general case is more complicated, because the input ranges may + * contain duplicate elements. The generalization is that if a value + * appears \c m times in [first1, last1) and \c n times in + * [first2, last2) (where \c m may be zero), then it + * appears min(m,n) times in the output range. + * \p set_intersection is stable, meaning that both elements are + * copied from the first range rather than the second, and that the + * relative order of elements in the output range is the same as in + * the first input range. + * + * This version of \p set_intersection compares objects using + * \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_intersection to compute the + * set intersection of two sets of integers sorted in ascending order using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {1, 3, 5, 7, 9, 11}; + * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; + * + * int result[7]; + * + * int *result_end = thrust::set_intersection(thrust::host, A1, A1 + 6, A2, A2 + 7, result); + * // result is now {1, 3, 5} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_intersection.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_intersection constructs a sorted range that is the + * intersection of sorted ranges [first1, last1) and + * [first2, last2). The return value is the end of the + * output range. + * + * In the simplest case, \p set_intersection performs the + * "intersection" operation from set theory: the output range + * contains a copy of every element that is contained in both + * [first1, last1) and [first2, last2). The + * general case is more complicated, because the input ranges may + * contain duplicate elements. The generalization is that if a value + * appears \c m times in [first1, last1) and \c n times in + * [first2, last2) (where \c m may be zero), then it + * appears min(m,n) times in the output range. + * \p set_intersection is stable, meaning that both elements are + * copied from the first range rather than the second, and that the + * relative order of elements in the output range is the same as in + * the first input range. + * + * This version of \p set_intersection compares objects using + * \c operator<. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_intersection to compute the + * set intersection of two sets of integers sorted in ascending order. + * + * \code + * #include + * ... + * int A1[6] = {1, 3, 5, 7, 9, 11}; + * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; + * + * int result[7]; + * + * int *result_end = thrust::set_intersection(A1, A1 + 6, A2, A2 + 7, result); + * // result is now {1, 3, 5} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_intersection.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_intersection(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_intersection constructs a sorted range that is the + * intersection of sorted ranges [first1, last1) and + * [first2, last2). The return value is the end of the + * output range. + * + * In the simplest case, \p set_intersection performs the + * "intersection" operation from set theory: the output range + * contains a copy of every element that is contained in both + * [first1, last1) and [first2, last2). The + * general case is more complicated, because the input ranges may + * contain duplicate elements. The generalization is that if a value + * appears \c m times in [first1, last1) and \c n times in + * [first2, last2) (where \c m may be zero), then it + * appears min(m,n) times in the output range. + * \p set_intersection is stable, meaning that both elements are + * copied from the first range rather than the second, and that the + * relative order of elements in the output range is the same as in + * the first input range. + * + * This version of \p set_intersection compares elements using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * The following code snippet demonstrates how to use \p set_intersection to compute + * the set intersection of sets of integers sorted in descending order using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {11, 9, 7, 5, 3, 1}; + * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; + * + * int result[3]; + * + * int *result_end = thrust::set_intersection(thrust::host, A1, A1 + 6, A2, A2 + 7, result, thrust::greater()); + * // result is now {5, 3, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_intersection.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_intersection constructs a sorted range that is the + * intersection of sorted ranges [first1, last1) and + * [first2, last2). The return value is the end of the + * output range. + * + * In the simplest case, \p set_intersection performs the + * "intersection" operation from set theory: the output range + * contains a copy of every element that is contained in both + * [first1, last1) and [first2, last2). The + * general case is more complicated, because the input ranges may + * contain duplicate elements. The generalization is that if a value + * appears \c m times in [first1, last1) and \c n times in + * [first2, last2) (where \c m may be zero), then it + * appears min(m,n) times in the output range. + * \p set_intersection is stable, meaning that both elements are + * copied from the first range rather than the second, and that the + * relative order of elements in the output range is the same as in + * the first input range. + * + * This version of \p set_intersection compares elements using a function object \p comp. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * The following code snippet demonstrates how to use \p set_intersection to compute + * the set intersection of sets of integers sorted in descending order. + * + * \code + * #include + * ... + * int A1[6] = {11, 9, 7, 5, 3, 1}; + * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; + * + * int result[3]; + * + * int *result_end = thrust::set_intersection(A1, A1 + 6, A2, A2 + 7, result, thrust::greater()); + * // result is now {5, 3, 1} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_intersection.html + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_intersection(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric + * difference of the sorted ranges [first1, last1) and [first2, last2). + * The return value is the end of the output range. + * + * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [first1, last1) but not [first2, last1), and a copy of + * every element that is contained in [first2, last2) but not [first1, last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements that are + * equivalent to each other and [first2, last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [first1, last1) if m > n, and + * the last n - m of these elements from [first2, last2) if m < n. + * + * This version of \p set_union compares elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference to compute + * the symmetric difference of two sets of integers sorted in ascending order using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {0, 1, 2, 2, 4, 6, 7}; + * int A2[5] = {1, 1, 2, 5, 8}; + * + * int result[6]; + * + * int *result_end = thrust::set_symmetric_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result); + * // result = {0, 4, 5, 6, 7, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html + * \see \p merge + * \see \p includes + * \see \p set_difference + * \see \p set_union + * \see \p set_intersection + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric + * difference of the sorted ranges [first1, last1) and [first2, last2). + * The return value is the end of the output range. + * + * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [first1, last1) but not [first2, last1), and a copy of + * every element that is contained in [first2, last2) but not [first1, last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements that are + * equivalent to each other and [first2, last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [first1, last1) if m > n, and + * the last n - m of these elements from [first2, last2) if m < n. + * + * This version of \p set_union compares elements using \c operator<. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference to compute + * the symmetric difference of two sets of integers sorted in ascending order. + * + * \code + * #include + * ... + * int A1[6] = {0, 1, 2, 2, 4, 6, 7}; + * int A2[5] = {1, 1, 2, 5, 8}; + * + * int result[6]; + * + * int *result_end = thrust::set_symmetric_difference(A1, A1 + 6, A2, A2 + 5, result); + * // result = {0, 4, 5, 6, 7, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html + * \see \p merge + * \see \p includes + * \see \p set_difference + * \see \p set_union + * \see \p set_intersection + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric + * difference of the sorted ranges [first1, last1) and [first2, last2). + * The return value is the end of the output range. + * + * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [first1, last1) but not [first2, last1), and a copy of + * every element that is contained in [first2, last2) but not [first1, last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements that are + * equivalent to each other and [first2, last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [first1, last1) if m > n, and + * the last n - m of these elements from [first2, last2) if m < n. + * + * This version of \p set_union compares elements using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference to compute + * the symmetric difference of two sets of integers sorted in descending order using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {7, 6, 4, 2, 2, 1, 0}; + * int A2[5] = {8, 5, 2, 1, 1}; + * + * int result[6]; + * + * int *result_end = thrust::set_symmetric_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result); + * // result = {8, 7, 6, 5, 4, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html + * \see \p merge + * \see \p includes + * \see \p set_difference + * \see \p set_union + * \see \p set_intersection + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric + * difference of the sorted ranges [first1, last1) and [first2, last2). + * The return value is the end of the output range. + * + * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [first1, last1) but not [first2, last1), and a copy of + * every element that is contained in [first2, last2) but not [first1, last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements that are + * equivalent to each other and [first2, last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [first1, last1) if m > n, and + * the last n - m of these elements from [first2, last2) if m < n. + * + * This version of \p set_union compares elements using a function object \p comp. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference to compute + * the symmetric difference of two sets of integers sorted in descending order. + * + * \code + * #include + * ... + * int A1[6] = {7, 6, 4, 2, 2, 1, 0}; + * int A2[5] = {8, 5, 2, 1, 1}; + * + * int result[6]; + * + * int *result_end = thrust::set_symmetric_difference(A1, A1 + 6, A2, A2 + 5, result); + * // result = {8, 7, 6, 5, 4, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html + * \see \p merge + * \see \p includes + * \see \p set_difference + * \see \p set_union + * \see \p set_intersection + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_union constructs a sorted range that is the union of the sorted ranges + * [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_union performs the "union" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1), [first2, last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * This version of \p set_union compares elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_union to compute the union of + * two sets of integers sorted in ascending order using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * ... + * int A1[6] = {0, 2, 4, 6, 8, 10, 12}; + * int A2[5] = {1, 3, 5, 7, 9}; + * + * int result[11]; + * + * int *result_end = thrust::set_union(thrust::host, A1, A1 + 6, A2, A2 + 5, result); + * // result = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_union.html + * \see \p merge + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_union(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_union constructs a sorted range that is the union of the sorted ranges + * [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_union performs the "union" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1), [first2, last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * This version of \p set_union compares elements using \c operator<. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_union to compute the union of + * two sets of integers sorted in ascending order. + * + * \code + * #include + * ... + * int A1[6] = {0, 2, 4, 6, 8, 10, 12}; + * int A2[5] = {1, 3, 5, 7, 9}; + * + * int result[11]; + * + * int *result_end = thrust::set_union(A1, A1 + 6, A2, A2 + 5, result); + * // result = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_union.html + * \see \p merge + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_union(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +/*! \p set_union constructs a sorted range that is the union of the sorted ranges + * [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_union performs the "union" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1), [first2, last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * This version of \p set_union compares elements using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_union to compute the union of + * two sets of integers sorted in ascending order using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A1[6] = {12, 10, 8, 6, 4, 2, 0}; + * int A2[5] = {9, 7, 5, 3, 1}; + * + * int result[11]; + * + * int *result_end = thrust::set_union(thrust::host, A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); + * // result = {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_union.html + * \see \p merge + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_union(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_union constructs a sorted range that is the union of the sorted ranges + * [first1, last1) and [first2, last2). The return value is the + * end of the output range. + * + * In the simplest case, \p set_union performs the "union" operation from set + * theory: the output range contains a copy of every element that is contained in + * [first1, last1), [first2, last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [first1, last1) contains \c m elements + * that are equivalent to each other and if [first2, last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * This version of \p set_union compares elements using a function object \p comp. + * + * \param first1 The beginning of the first input range. + * \param last1 The end of the first input range. + * \param first2 The beginning of the second input range. + * \param last2 The end of the second input range. + * \param result The beginning of the output range. + * \param comp Comparison operator. + * \return The end of the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. + * \pre The resulting range shall not overlap with either input range. + * + * The following code snippet demonstrates how to use \p set_union to compute the union of + * two sets of integers sorted in ascending order. + * + * \code + * #include + * #include + * ... + * int A1[6] = {12, 10, 8, 6, 4, 2, 0}; + * int A2[5] = {9, 7, 5, 3, 1}; + * + * int result[11]; + * + * int *result_end = thrust::set_union(A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); + * // result = {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} + * \endcode + * + * \see http://www.sgi.com/tech/stl/set_union.html + * \see \p merge + * \see \p includes + * \see \p set_union + * \see \p set_intersection + * \see \p set_symmetric_difference + * \see \p sort + * \see \p is_sorted + */ +template + OutputIterator set_union(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakCompare comp); + + +/*! \p set_difference_by_key performs a key-value difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_difference_by_key performs the "difference" operation from set + * theory: the keys output range contains a copy of every element that is contained in + * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [keys_first1, keys_last1) range shall be copied to the output range. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_difference_by_key compares key elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_difference_by_key to compute the + * set difference of two sets of integers sorted in ascending order with their values using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {0, 1, 3, 4, 5, 6, 9}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {1, 3, 5, 7, 9}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[3]; + * int vals_result[3]; + * + * thrust::pair end = thrust::set_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {0, 4, 6} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_difference_by_key performs a key-value difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_difference_by_key performs the "difference" operation from set + * theory: the keys output range contains a copy of every element that is contained in + * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [keys_first1, keys_last1) range shall be copied to the output range. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_difference_by_key compares key elements using \c operator<. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_difference_by_key to compute the + * set difference of two sets of integers sorted in ascending order with their values. + * + * \code + * #include + * ... + * int A_keys[6] = {0, 1, 3, 4, 5, 6, 9}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {1, 3, 5, 7, 9}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[3]; + * int vals_result[3]; + * + * thrust::pair end = thrust::set_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {0, 4, 6} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_difference_by_key performs a key-value difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_difference_by_key performs the "difference" operation from set + * theory: the keys output range contains a copy of every element that is contained in + * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [keys_first1, keys_last1) range shall be copied to the output range. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_difference_by_key compares key elements using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_difference_by_key to compute the + * set difference of two sets of integers sorted in descending order with their values using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A_keys[6] = {9, 6, 5, 4, 3, 1, 0}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {9, 7, 5, 3, 1}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[3]; + * int vals_result[3]; + * + * thrust::pair end = thrust::set_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); + * // keys_result is now {0, 4, 6} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_difference_by_key performs a key-value difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_difference_by_key performs the "difference" operation from set + * theory: the keys output range contains a copy of every element that is contained in + * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, the last max(m-n,0) elements from + * [keys_first1, keys_last1) range shall be copied to the output range. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_difference_by_key compares key elements using a function object \p comp. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_difference_by_key to compute the + * set difference of two sets of integers sorted in descending order with their values. + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {9, 6, 5, 4, 3, 1, 0}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {9, 7, 5, 3, 1}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[3]; + * int vals_result[3]; + * + * thrust::pair end = thrust::set_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); + * // keys_result is now {0, 4, 6} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. + * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set + * theory: the keys output range contains a copy of every element that is contained in both + * [keys_first1, keys_last1) [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) + * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it + * appears min(m,n) times in the keys output range. + * \p set_intersection_by_key is stable, meaning both that elements are copied from the first + * input range rather than the second, and that the relative order of elements in the output range + * is the same as the first input range. + * + * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, + * the corresponding value element is copied from [values_first1, values_last1) to the values + * output range. + * + * This version of \p set_intersection_by_key compares objects using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no + * \c values_first2 parameter because elements from the second input range are never copied to the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the + * set intersection of two sets of integers sorted in ascending order with their values using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {1, 3, 5, 7, 9, 11}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; + * + * int keys_result[7]; + * int vals_result[7]; + * + * thrust::pair end = thrust::set_intersection_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result); + * + * // keys_result is now {1, 3, 5} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_difference_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_intersection_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. + * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set + * theory: the keys output range contains a copy of every element that is contained in both + * [keys_first1, keys_last1) [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) + * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it + * appears min(m,n) times in the keys output range. + * \p set_intersection_by_key is stable, meaning both that elements are copied from the first + * input range rather than the second, and that the relative order of elements in the output range + * is the same as the first input range. + * + * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, + * the corresponding value element is copied from [values_first1, values_last1) to the values + * output range. + * + * This version of \p set_intersection_by_key compares objects using \c operator<. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no + * \c values_first2 parameter because elements from the second input range are never copied to the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the + * set intersection of two sets of integers sorted in ascending order with their values. + * + * \code + * #include + * ... + * int A_keys[6] = {1, 3, 5, 7, 9, 11}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; + * + * int keys_result[7]; + * int vals_result[7]; + * + * thrust::pair end = thrust::set_intersection_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result); + * + * // keys_result is now {1, 3, 5} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_difference_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_intersection_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. + * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set + * theory: the keys output range contains a copy of every element that is contained in both + * [keys_first1, keys_last1) [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) + * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it + * appears min(m,n) times in the keys output range. + * \p set_intersection_by_key is stable, meaning both that elements are copied from the first + * input range rather than the second, and that the relative order of elements in the output range + * is the same as the first input range. + * + * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, + * the corresponding value element is copied from [values_first1, values_last1) to the values + * output range. + * + * This version of \p set_intersection_by_key compares objects using a function object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no + * \c values_first2 parameter because elements from the second input range are never copied to the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the + * set intersection of two sets of integers sorted in descending order with their values using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A_keys[6] = {11, 9, 7, 5, 3, 1}; + * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; + * + * int keys_result[7]; + * int vals_result[7]; + * + * thrust::pair end = thrust::set_intersection_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result, thrust::greater()); + * + * // keys_result is now {5, 3, 1} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_difference_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_intersection_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. + * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set + * theory: the keys output range contains a copy of every element that is contained in both + * [keys_first1, keys_last1) [keys_first2, keys_last2). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) + * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it + * appears min(m,n) times in the keys output range. + * \p set_intersection_by_key is stable, meaning both that elements are copied from the first + * input range rather than the second, and that the relative order of elements in the output range + * is the same as the first input range. + * + * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, + * the corresponding value element is copied from [values_first1, values_last1) to the values + * output range. + * + * This version of \p set_intersection_by_key compares objects using a function object \p comp. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no + * \c values_first2 parameter because elements from the second input range are never copied to the output range. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the + * set intersection of two sets of integers sorted in descending order with their values. + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {11, 9, 7, 5, 3, 1}; + * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; + * + * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; + * + * int keys_result[7]; + * int vals_result[7]; + * + * thrust::pair end = thrust::set_intersection_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result, thrust::greater()); + * + * // keys_result is now {5, 3, 1} + * // vals_result is now {0, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_difference_by_key + * \see \p set_symmetric_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_intersection_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of + * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are + * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and + * the last n - m of these elements from [keys_first2, keys_last2) if m < n. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_symmetric_difference_by_key compares key elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in ascending order with their values using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {0, 1, 2, 2, 4, 6, 7}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {1, 1, 2, 5, 8}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[6]; + * int vals_result[6]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {0, 4, 5, 6, 7, 8} + * // vals_result is now {0, 0, 1, 0, 0, 1} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of + * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are + * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and + * the last n - m of these elements from [keys_first2, keys_last2) if m < n. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_symmetric_difference_by_key compares key elements using \c operator<. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in ascending order with their values. + * + * \code + * #include + * ... + * int A_keys[6] = {0, 1, 2, 2, 4, 6, 7}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {1, 1, 2, 5, 8}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[6]; + * int vals_result[6]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {0, 4, 5, 6, 7, 8} + * // vals_result is now {0, 0, 1, 0, 0, 1} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_symmetric_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of + * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are + * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and + * the last n - m of these elements from [keys_first2, keys_last2) if m < n. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_symmetric_difference_by_key compares key elements using a function object \c comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in descending order with their values using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A_keys[6] = {7, 6, 4, 2, 2, 1, 0}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {8, 5, 2, 1, 1}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[6]; + * int vals_result[6]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {8, 7, 6, 5, 4, 0} + * // vals_result is now {1, 0, 0, 1, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. + * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: + * it constructs the union of the two sets A - B and B - A, where A and B are the two + * input ranges. That is, the output range contains a copy of every element that is + * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of + * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). + * The general case is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are + * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are + * equivalent to them, then |m - n| of those elements shall be copied to the output + * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and + * the last n - m of these elements from [keys_first2, keys_last2) if m < n. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_symmetric_difference_by_key compares key elements using a function object \c comp. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in descending order with their values. + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {7, 6, 4, 2, 2, 1, 0}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {8, 5, 2, 1, 1}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[6]; + * int vals_result[6]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {8, 7, 6, 5, 4, 0} + * // vals_result is now {1, 0, 0, 1, 0, 0} + * \endcode + * + * \see \p set_union_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_symmetric_difference_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_union_by_key performs a key-value union operation from set theory. + * \p set_union_by_key constructs a sorted range that is the union of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: + * the output range contains a copy of every element that is contained in + * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_union_by_key compares key elements using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in ascending order with their values using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {0, 2, 4, 6, 8, 10, 12}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {1, 3, 5, 7, 9}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[11]; + * int vals_result[11]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} + * // vals_result is now {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0} + * \endcode + * + * \see \p set_symmetric_difference_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_union_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_union_by_key performs a key-value union operation from set theory. + * \p set_union_by_key constructs a sorted range that is the union of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: + * the output range contains a copy of every element that is contained in + * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_union_by_key compares key elements using \c operator<. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in ascending order with their values. + * + * \code + * #include + * ... + * int A_keys[6] = {0, 2, 4, 6, 8, 10, 12}; + * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {1, 3, 5, 7, 9}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[11]; + * int vals_result[11]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); + * // keys_result is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} + * // vals_result is now {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0} + * \endcode + * + * \see \p set_symmetric_difference_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_union_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p set_union_by_key performs a key-value union operation from set theory. + * \p set_union_by_key constructs a sorted range that is the union of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: + * the output range contains a copy of every element that is contained in + * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_union_by_key compares key elements using a function object \c comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in descending order with their values using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * int A_keys[6] = {12, 10, 8, 6, 4, 2, 0}; + * int A_vals[6] = { 0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {9, 7, 5, 3, 1}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[11]; + * int vals_result[11]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); + * // keys_result is now {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} + * // vals_result is now { 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0} + * \endcode + * + * \see \p set_symmetric_difference_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_union_by_key(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \p set_union_by_key performs a key-value union operation from set theory. + * \p set_union_by_key constructs a sorted range that is the union of the sorted + * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated + * with each element from the input and output key ranges is a value element. The associated input + * value ranges need not be sorted. + * + * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: + * the output range contains a copy of every element that is contained in + * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case + * is more complicated, because the input ranges may contain duplicate elements. + * The generalization is that if [keys_first1, keys_last1) contains \c m elements + * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n + * elements that are equivalent to them, then all \c m elements from the first + * range shall be copied to the output range, in order, and then max(n - m, 0) + * elements from the second range shall be copied to the output, in order. + * + * Each time a key element is copied from [keys_first1, keys_last1) or + * [keys_first2, keys_last2) is copied to the keys output range, the + * corresponding value element is copied from the corresponding values input range (beginning at + * \p values_first1 or \p values_first2) to the values output range. + * + * This version of \p set_union_by_key compares key elements using a function object \c comp. + * + * \param keys_first1 The beginning of the first input range of keys. + * \param keys_last1 The end of the first input range of keys. + * \param keys_first2 The beginning of the second input range of keys. + * \param keys_last2 The end of the second input range of keys. + * \param values_first1 The beginning of the first input range of values. + * \param values_first2 The beginning of the first input range of values. + * \param keys_result The beginning of the output range of keys. + * \param values_result The beginning of the output range of values. + * \param comp Comparison operator. + * \return A \p pair \c p such that p.first is the end of the output range of keys, + * and such that p.second is the end of the output range of values. + * + * \tparam InputIterator1 is a model of Input Iterator, + * \p InputIterator1 and \p InputIterator2 have the same \c value_type, + * \p InputIterator1's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator2 is a model of Input Iterator, + * \p InputIterator2 and \p InputIterator1 have the same \c value_type, + * \p InputIterator2's \c value_type is a model of LessThan Comparable, + * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, + * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. + * \tparam InputIterator3 is a model of Input Iterator, + * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam InputIterator4 is a model of Input Iterator, + * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. + * \tparam OutputIterator1 is a model of Output Iterator. + * \tparam OutputIterator2 is a model of Output Iterator. + * \tparam StrictWeakCompare is a model of Strict Weak Ordering. + * + * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. + * \pre The resulting ranges shall not overlap with any input range. + * + * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the + * symmetric difference of two sets of integers sorted in descending order with their values. + * + * \code + * #include + * #include + * ... + * int A_keys[6] = {12, 10, 8, 6, 4, 2, 0}; + * int A_vals[6] = { 0, 0, 0, 0, 0, 0, 0}; + * + * int B_keys[5] = {9, 7, 5, 3, 1}; + * int B_vals[5] = {1, 1, 1, 1, 1}; + * + * int keys_result[11]; + * int vals_result[11]; + * + * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); + * // keys_result is now {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} + * // vals_result is now { 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0} + * \endcode + * + * \see \p set_symmetric_difference_by_key + * \see \p set_intersection_by_key + * \see \p set_difference_by_key + * \see \p sort_by_key + * \see \p is_sorted + */ +template + thrust::pair + set_union_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakCompare comp); + + +/*! \} // end set_operations + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/sort.h b/compat/thrust/sort.h new file mode 100644 index 0000000..e8edfcd --- /dev/null +++ b/compat/thrust/sort.h @@ -0,0 +1,1349 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file sort.h + * \brief Functions for reorganizing ranges into sorted order + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup sorting + * \ingroup algorithms + * \{ + */ + + +/*! \p sort sorts the elements in [first, last) into + * ascending order, meaning that if \c i and \c j are any two valid + * iterators in [first, last) such that \c i precedes \c j, + * then \c *j is not less than \c *i. Note: \c sort is not guaranteed + * to be stable. That is, suppose that \c *i and \c *j are equivalent: + * neither one is less than the other. It is not guaranteed that the + * relative order of these two elements will be preserved by \p sort. + * + * This version of \p sort compares objects using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * + * The following code snippet demonstrates how to use \p sort to sort + * a sequence of integers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::sort(thrust::host, A, A + N); + * // A is now {1, 2, 4, 5, 7, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort + * \see \p sort_by_key + */ +template + void sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last); + + +/*! \p sort sorts the elements in [first, last) into + * ascending order, meaning that if \c i and \c j are any two valid + * iterators in [first, last) such that \c i precedes \c j, + * then \c *j is not less than \c *i. Note: \c sort is not guaranteed + * to be stable. That is, suppose that \c *i and \c *j are equivalent: + * neither one is less than the other. It is not guaranteed that the + * relative order of these two elements will be preserved by \p sort. + * + * This version of \p sort compares objects using \c operator<. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * + * The following code snippet demonstrates how to use \p sort to sort + * a sequence of integers. + * + * \code + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::sort(A, A + N); + * // A is now {1, 2, 4, 5, 7, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort + * \see \p sort_by_key + */ +template + void sort(RandomAccessIterator first, + RandomAccessIterator last); + + +/*! \p sort sorts the elements in [first, last) into + * ascending order, meaning that if \c i and \c j are any two valid + * iterators in [first, last) such that \c i precedes \c j, + * then \c *j is not less than \c *i. Note: \c sort is not guaranteed + * to be stable. That is, suppose that \c *i and \c *j are equivalent: + * neither one is less than the other. It is not guaranteed that the + * relative order of these two elements will be preserved by \p sort. + * + * This version of \p sort compares objects using a function object + * \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp Comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code demonstrates how to sort integers in descending order + * using the greater comparison operator using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::sort(thrust::host, A, A + N, thrust::greater()); + * // A is now {8, 7, 5, 4, 2, 1}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort + * \see \p sort_by_key + */ +template + void sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + + +/*! \p sort sorts the elements in [first, last) into + * ascending order, meaning that if \c i and \c j are any two valid + * iterators in [first, last) such that \c i precedes \c j, + * then \c *j is not less than \c *i. Note: \c sort is not guaranteed + * to be stable. That is, suppose that \c *i and \c *j are equivalent: + * neither one is less than the other. It is not guaranteed that the + * relative order of these two elements will be preserved by \p sort. + * + * This version of \p sort compares objects using a function object + * \p comp. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp Comparison operator. + * + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code demonstrates how to sort integers in descending order + * using the greater comparison operator. + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::sort(A, A + N, thrust::greater()); + * // A is now {8, 7, 5, 4, 2, 1}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort + * \see \p sort_by_key + */ +template + void sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + + +/*! \p stable_sort is much like \c sort: it sorts the elements in + * [first, last) into ascending order, meaning that if \c i + * and \c j are any two valid iterators in [first, last) such + * that \c i precedes \c j, then \c *j is not less than \c *i. + * + * As the name suggests, \p stable_sort is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [first, last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort is that \c x + * still precedes \c y. + * + * This version of \p stable_sort compares objects using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * + * The following code snippet demonstrates how to use \p sort to sort + * a sequence of integers using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::stable_sort(thrust::host, A, A + N); + * // A is now {1, 2, 4, 5, 7, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_sort.html + * \see \p sort + * \see \p stable_sort_by_key + */ +template + void stable_sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last); + + +/*! \p stable_sort is much like \c sort: it sorts the elements in + * [first, last) into ascending order, meaning that if \c i + * and \c j are any two valid iterators in [first, last) such + * that \c i precedes \c j, then \c *j is not less than \c *i. + * + * As the name suggests, \p stable_sort is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [first, last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort is that \c x + * still precedes \c y. + * + * This version of \p stable_sort compares objects using \c operator<. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * + * The following code snippet demonstrates how to use \p sort to sort + * a sequence of integers. + * + * \code + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::stable_sort(A, A + N); + * // A is now {1, 2, 4, 5, 7, 8} + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_sort.html + * \see \p sort + * \see \p stable_sort_by_key + */ +template + void stable_sort(RandomAccessIterator first, + RandomAccessIterator last); + + +/*! \p stable_sort is much like \c sort: it sorts the elements in + * [first, last) into ascending order, meaning that if \c i + * and \c j are any two valid iterators in [first, last) such + * that \c i precedes \c j, then \c *j is not less than \c *i. + * + * As the name suggests, \p stable_sort is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [first, last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort is that \c x + * still precedes \c y. + * + * This version of \p stable_sort compares objects using a function object + * \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp Comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code demonstrates how to sort integers in descending order + * using the greater comparison operator using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::sort(A, A + N, thrust::greater()); + * // A is now {8, 7, 5, 4, 2, 1}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_sort.html + * \see \p sort + * \see \p stable_sort_by_key + */ +template + void stable_sort(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + + +/*! \p stable_sort is much like \c sort: it sorts the elements in + * [first, last) into ascending order, meaning that if \c i + * and \c j are any two valid iterators in [first, last) such + * that \c i precedes \c j, then \c *j is not less than \c *i. + * + * As the name suggests, \p stable_sort is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [first, last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort is that \c x + * still precedes \c y. + * + * This version of \p stable_sort compares objects using a function object + * \p comp. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp Comparison operator. + * + * \tparam RandomAccessIterator is a model of Random Access Iterator, + * \p RandomAccessIterator is mutable, + * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * The following code demonstrates how to sort integers in descending order + * using the greater comparison operator. + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int A[N] = {1, 4, 2, 8, 5, 7}; + * thrust::sort(A, A + N, thrust::greater()); + * // A is now {8, 7, 5, 4, 2, 1}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/stable_sort.html + * \see \p sort + * \see \p stable_sort_by_key + */ +template + void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + + +/////////////// +// Key Value // +/////////////// + + +/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the + * elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that + * \c *i and \c *j are equivalent: neither one is less than the other. It is not + * guaranteed that the relative order of these two keys or the relative + * order of their corresponding values will be preserved by \p sort_by_key. + * + * This version of \p sort_by_key compares key objects using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p sort_by_key to sort + * an array of character values using integers as sorting keys using the \p thrust::host execution policy + * for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::sort_by_key(thrust::host, keys, keys + N, values); + * // keys is now { 1, 2, 4, 5, 7, 8} + * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort_by_key + * \see \p sort + */ +template + void sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + + +/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the + * elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that + * \c *i and \c *j are equivalent: neither one is less than the other. It is not + * guaranteed that the relative order of these two keys or the relative + * order of their corresponding values will be preserved by \p sort_by_key. + * + * This version of \p sort_by_key compares key objects using \c operator<. + * + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p sort_by_key to sort + * an array of character values using integers as sorting keys. + * + * \code + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::sort_by_key(keys, keys + N, values); + * // keys is now { 1, 2, 4, 5, 7, 8} + * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort_by_key + * \see \p sort + */ +template + void sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + + +/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the + * elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that + * \c *i and \c *j are equivalent: neither one is less than the other. It is not + * guaranteed that the relative order of these two keys or the relative + * order of their corresponding values will be preserved by \p sort_by_key. + * + * This version of \p sort_by_key compares key objects using a function object + * \c comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * \param comp Comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p sort_by_key to sort + * an array of character values using integers as sorting keys using the \p thrust::host execution policy + * for parallelization.The keys are sorted in descending order using the greater comparison operator. + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::sort_by_key(thrust::host, keys, keys + N, values, thrust::greater()); + * // keys is now { 8, 7, 5, 4, 2, 1} + * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort_by_key + * \see \p sort + */ +template + void sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + + +/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the + * elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that + * \c *i and \c *j are equivalent: neither one is less than the other. It is not + * guaranteed that the relative order of these two keys or the relative + * order of their corresponding values will be preserved by \p sort_by_key. + * + * This version of \p sort_by_key compares key objects using a function object + * \c comp. + * + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * \param comp Comparison operator. + * + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p sort_by_key to sort + * an array of character values using integers as sorting keys. The keys + * are sorted in descending order using the greater comparison operator. + * + * \code + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::sort_by_key(keys, keys + N, values, thrust::greater()); + * // keys is now { 8, 7, 5, 4, 2, 1} + * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p stable_sort_by_key + * \see \p sort + */ +template + void sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + + +/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key + * sorts the elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * As the name suggests, \p stable_sort_by_key is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [keys_first, keys_last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort_by_key is that \c x + * still precedes \c y. + * + * This version of \p stable_sort_by_key compares key objects using \c operator<. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p stable_sort_by_key to sort + * an array of characters using integers as sorting keys using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::stable_sort_by_key(thrust::host, keys, keys + N, values); + * // keys is now { 1, 2, 4, 5, 7, 8} + * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p sort_by_key + * \see \p stable_sort + */ +template + void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + + +/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key + * sorts the elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * As the name suggests, \p stable_sort_by_key is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [keys_first, keys_last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort_by_key is that \c x + * still precedes \c y. + * + * This version of \p stable_sort_by_key compares key objects using \c operator<. + * + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, + * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the + * LessThan Comparable requirements. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p stable_sort_by_key to sort + * an array of characters using integers as sorting keys. + * + * \code + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::stable_sort_by_key(keys, keys + N, values); + * // keys is now { 1, 2, 4, 5, 7, 8} + * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} + * \endcode + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p sort_by_key + * \see \p stable_sort + */ +template + void stable_sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + + +/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key + * sorts the elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * As the name suggests, \p stable_sort_by_key is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [keys_first, keys_last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort_by_key is that \c x + * still precedes \c y. + * + * This version of \p stable_sort_by_key compares key objects using the function + * object \p comp. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * \param comp Comparison operator. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p sort_by_key to sort + * an array of character values using integers as sorting keys using the \p thrust::host execution policy for + * parallelization. The keys are sorted in descending order using the greater comparison operator. + * + * \code + * #include + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::stable_sort_by_key(thrust::host, keys, keys + N, values, thrust::greater()); + * // keys is now { 8, 7, 5, 4, 2, 1} + * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} + * \endcode + * + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p sort_by_key + * \see \p stable_sort + */ +template + void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + + +/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key + * sorts the elements in [keys_first, keys_last) and [values_first, + * values_first + (keys_last - keys_first)) into ascending key order, + * meaning that if \c i and \c j are any two valid iterators in [keys_first, + * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators + * in [values_first, values_first + (keys_last - keys_first)) + * corresponding to \c i and \c j respectively, then \c *j is not less than + * \c *i. + * + * As the name suggests, \p stable_sort_by_key is stable: it preserves the + * relative ordering of equivalent elements. That is, if \c x and \c y + * are elements in [keys_first, keys_last) such that \c x precedes \c y, + * and if the two elements are equivalent (neither x < y nor + * y < x) then a postcondition of \p stable_sort_by_key is that \c x + * still precedes \c y. + * + * This version of \p stable_sort_by_key compares key objects using the function + * object \p comp. + * + * \param keys_first The beginning of the key sequence. + * \param keys_last The end of the key sequence. + * \param values_first The beginning of the value sequence. + * \param comp Comparison operator. + * + * \tparam RandomAccessIterator1 is a model of Random Access Iterator, + * \p RandomAccessIterator1 is mutable, + * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's + * \c first_argument_type and \c second_argument_type. + * \tparam RandomAccessIterator2 is a model of Random Access Iterator, + * and \p RandomAccessIterator2 is mutable. + * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. + * + * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). + * + * The following code snippet demonstrates how to use \p sort_by_key to sort + * an array of character values using integers as sorting keys. The keys + * are sorted in descending order using the greater comparison operator. + * + * \code + * #include + * ... + * const int N = 6; + * int keys[N] = { 1, 4, 2, 8, 5, 7}; + * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; + * thrust::stable_sort_by_key(keys, keys + N, values, thrust::greater()); + * // keys is now { 8, 7, 5, 4, 2, 1} + * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} + * \endcode + * + * + * \see http://www.sgi.com/tech/stl/sort.html + * \see \p sort_by_key + * \see \p stable_sort + */ +template + void stable_sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + + +/*! \} // end sorting + */ + + +/*! \addtogroup reductions + * \{ + * \addtogroup predicates + * \{ + */ + + +/*! \p is_sorted returns \c true if the range [first, last) is + * sorted in ascending order, and \c false otherwise. + * + * Specifically, this version of \p is_sorted returns \c false if for + * some iterator \c i in the range [first, last - 1) the + * expression *(i + 1) < *i is \c true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return \c true, if the sequence is sorted; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator's \c value_type is a model of LessThan Comparable, + * and the ordering on objects of \p ForwardIterator's \c value_type is a strict weak ordering, as defined + * in the LessThan Comparable requirements. + * + * + * The following code demonstrates how to use \p is_sorted to test whether the + * contents of a \c device_vector are stored in ascending order using the \p thrust::device execution policy + * for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector v(6); + * v[0] = 1; + * v[1] = 4; + * v[2] = 2; + * v[3] = 8; + * v[4] = 5; + * v[5] = 7; + * + * bool result = thrust::is_sorted(thrust::device, v.begin(), v.end()); + * + * // result == false + * + * thrust::sort(v.begin(), v.end()); + * result = thrust::is_sorted(thrust::device, v.begin(), v.end()); + * + * // result == true + * \endcode + * + * \see http://www.sgi.com/tech/stl/is_sorted.html + * \see is_sorted_until + * \see \c sort + * \see \c stable_sort + * \see \c less + */ +template + bool is_sorted(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last); + + +/*! \p is_sorted returns \c true if the range [first, last) is + * sorted in ascending order, and \c false otherwise. + * + * Specifically, this version of \p is_sorted returns \c false if for + * some iterator \c i in the range [first, last - 1) the + * expression *(i + 1) < *i is \c true. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \return \c true, if the sequence is sorted; \c false, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator's \c value_type is a model of LessThan Comparable, + * and the ordering on objects of \p ForwardIterator's \c value_type is a strict weak ordering, as defined + * in the LessThan Comparable requirements. + * + * + * The following code demonstrates how to use \p is_sorted to test whether the + * contents of a \c device_vector are stored in ascending order. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector v(6); + * v[0] = 1; + * v[1] = 4; + * v[2] = 2; + * v[3] = 8; + * v[4] = 5; + * v[5] = 7; + * + * bool result = thrust::is_sorted(v.begin(), v.end()); + * + * // result == false + * + * thrust::sort(v.begin(), v.end()); + * result = thrust::is_sorted(v.begin(), v.end()); + * + * // result == true + * \endcode + * + * \see http://www.sgi.com/tech/stl/is_sorted.html + * \see is_sorted_until + * \see \c sort + * \see \c stable_sort + * \see \c less + */ +template + bool is_sorted(ForwardIterator first, + ForwardIterator last); + + +/*! \p is_sorted returns \c true if the range [first, last) is sorted in ascending + * order accoring to a user-defined comparison operation, and \c false otherwise. + * + * Specifically, this version of \p is_sorted returns \c false if for some iterator \c i in + * the range [first, last - 1) the expression comp(*(i + 1), *i) is \c true. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp Comparison operator. + * \return \c true, if the sequence is sorted according to comp; \c false, otherwise. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \c StrictWeakOrdering's \c first_argument_type + * and \c second_argument_type. + * \tparam Compare is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p is_sorted to test whether the + * contents of a \c device_vector are stored in descending order using the \p thrust::device execution + * policy for parallelization: + * + * \code + * #include + * #include + * #include + * #include + * ... + * thrust::device_vector v(6); + * v[0] = 1; + * v[1] = 4; + * v[2] = 2; + * v[3] = 8; + * v[4] = 5; + * v[5] = 7; + * + * thrust::greater comp; + * bool result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp); + * + * // result == false + * + * thrust::sort(v.begin(), v.end(), comp); + * result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp); + * + * // result == true + * \endcode + * + * \see http://www.sgi.com/tech/stl/is_sorted.html + * \see \c sort + * \see \c stable_sort + * \see \c less + */ +template + bool is_sorted(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp); + + +/*! \p is_sorted returns \c true if the range [first, last) is sorted in ascending + * order accoring to a user-defined comparison operation, and \c false otherwise. + * + * Specifically, this version of \p is_sorted returns \c false if for some iterator \c i in + * the range [first, last - 1) the expression comp(*(i + 1), *i) is \c true. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param comp Comparison operator. + * \return \c true, if the sequence is sorted according to comp; \c false, otherwise. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator's \c value_type is convertible to both \c StrictWeakOrdering's \c first_argument_type + * and \c second_argument_type. + * \tparam Compare is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p is_sorted to test whether the + * contents of a \c device_vector are stored in descending order. + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector v(6); + * v[0] = 1; + * v[1] = 4; + * v[2] = 2; + * v[3] = 8; + * v[4] = 5; + * v[5] = 7; + * + * thrust::greater comp; + * bool result = thrust::is_sorted(v.begin(), v.end(), comp); + * + * // result == false + * + * thrust::sort(v.begin(), v.end(), comp); + * result = thrust::is_sorted(v.begin(), v.end(), comp); + * + * // result == true + * \endcode + * + * \see http://www.sgi.com/tech/stl/is_sorted.html + * \see \c sort + * \see \c stable_sort + * \see \c less + */ +template + bool is_sorted(ForwardIterator first, + ForwardIterator last, + Compare comp); + + +/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for + * which the range [first,last) is sorted using \c operator<. If distance(first,last) < 2, + * \p is_sorted_until simply returns \p last. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \return The last iterator in the input range for which it is sorted. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator and + * \p ForwardIterator's \c value_type is a model of LessThan Comparable. + * + * The following code snippet demonstrates how to use \p is_sorted_until to find the first position + * in an array where the data becomes unsorted using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * + * ... + * + * int A[8] = {0, 1, 2, 3, 0, 1, 2, 3}; + * + * int * B = thrust::is_sorted_until(thrust::host, A, A + 8); + * + * // B - A is 4 + * // [A, B) is sorted + * \endcode + * + * \see \p is_sorted + * \see \p sort + * \see \p sort_by_key + * \see \p stable_sort + * \see \p stable_sort_by_key + */ +template + ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last); + + +/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for + * which the range [first,last) is sorted using \c operator<. If distance(first,last) < 2, + * \p is_sorted_until simply returns \p last. + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \return The last iterator in the input range for which it is sorted. + * + * \tparam ForwardIterator is a model of Forward Iterator and + * \p ForwardIterator's \c value_type is a model of LessThan Comparable. + * + * The following code snippet demonstrates how to use \p is_sorted_until to find the first position + * in an array where the data becomes unsorted: + * + * \code + * #include + * + * ... + * + * int A[8] = {0, 1, 2, 3, 0, 1, 2, 3}; + * + * int * B = thrust::is_sorted_until(A, A + 8); + * + * // B - A is 4 + * // [A, B) is sorted + * \endcode + * + * \see \p is_sorted + * \see \p sort + * \see \p sort_by_key + * \see \p stable_sort + * \see \p stable_sort_by_key + */ +template + ForwardIterator is_sorted_until(ForwardIterator first, + ForwardIterator last); + + +/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for + * which the range [first,last) is sorted using the function object \c comp. If distance(first,last) < 2, + * \p is_sorted_until simply returns \p last. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization: + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param comp The function object to use for comparison. + * \return The last iterator in the input range for which it is sorted. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator and + * \p ForwardIterator's \c value_type is convertible to \p Compare's \c argument_type. + * \tparam Compare is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p is_sorted_until to find the first position + * in an array where the data becomes unsorted in descending order using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * ... + * + * int A[8] = {3, 2, 1, 0, 3, 2, 1, 0}; + * + * thrust::greater comp; + * int * B = thrust::is_sorted_until(thrust::host, A, A + 8, comp); + * + * // B - A is 4 + * // [A, B) is sorted in descending order + * \endcode + * + * \see \p is_sorted + * \see \p sort + * \see \p sort_by_key + * \see \p stable_sort + * \see \p stable_sort_by_key + */ +template + ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp); + + +/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for + * which the range [first,last) is sorted using the function object \c comp. If distance(first,last) < 2, + * \p is_sorted_until simply returns \p last. + * + * \param first The beginning of the range of interest. + * \param last The end of the range of interest. + * \param comp The function object to use for comparison. + * \return The last iterator in the input range for which it is sorted. + * + * \tparam ForwardIterator is a model of Forward Iterator and + * \p ForwardIterator's \c value_type is convertible to \p Compare's \c argument_type. + * \tparam Compare is a model of Strict Weak Ordering. + * + * The following code snippet demonstrates how to use \p is_sorted_until to find the first position + * in an array where the data becomes unsorted in descending order: + * + * \code + * #include + * #include + * + * ... + * + * int A[8] = {3, 2, 1, 0, 3, 2, 1, 0}; + * + * thrust::greater comp; + * int * B = thrust::is_sorted_until(A, A + 8, comp); + * + * // B - A is 4 + * // [A, B) is sorted in descending order + * \endcode + * + * \see \p is_sorted + * \see \p sort + * \see \p sort_by_key + * \see \p stable_sort + * \see \p stable_sort_by_key + */ +template + ForwardIterator is_sorted_until(ForwardIterator first, + ForwardIterator last, + Compare comp); + + +/*! \} // end predicates + * \} // end reductions + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/swap.h b/compat/thrust/swap.h new file mode 100644 index 0000000..085e546 --- /dev/null +++ b/compat/thrust/swap.h @@ -0,0 +1,190 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file swap.h + * \brief Functions for swapping the value of elements + */ + +#pragma once + +#include +#include + +// empty Doxygen comment below so namespace thrust's documentation will be extracted + +/*! + */ +namespace thrust +{ + +/*! \addtogroup utility + * \{ + */ + +/*! \addtogroup swap + * \{ + */ + +/*! \p swap assigns the contents of \c a to \c b and the + * contents of \c b to \c a. This is used as a primitive operation + * by many other algorithms. + * + * \param a The first value of interest. After completion, + * the value of b will be returned here. + * \param b The second value of interest. After completion, + * the value of a will be returned here. + * + * \tparam Assignable is a model of Assignable. + * + * The following code snippet demonstrates how to use \p swap to + * swap the contents of two variables. + * + * \code + * #include + * ... + * int x = 1; + * int y = 2; + * thrust::swap(x,h); + * + * // x == 2, y == 1 + * \endcode + */ +template +__host__ __device__ +inline void swap(Assignable1 &a, Assignable2 &b); + +/*! \} // swap + */ + +/*! \} // utility + */ + + +/*! \addtogroup copying + * \{ + */ + + +/*! \p swap_ranges swaps each of the elements in the range [first1, last1) + * with the corresponding element in the range [first2, first2 + (last1 - first1)). + * That is, for each integer \c n such that 0 <= n < (last1 - first1), it swaps + * *(first1 + n) and *(first2 + n). The return value is + * first2 + (last1 - first1). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first sequence to swap. + * \param last1 One position past the last element of the first sequence to swap. + * \param first2 The beginning of the second sequence to swap. + * \return An iterator pointing to one position past the last element of the second + * sequence to swap. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator1 is a model of Forward Iterator, + * and \p ForwardIterator1's \c value_type must be convertible to \p ForwardIterator2's \c value_type. + * \tparam ForwardIterator2 is a model of Forward Iterator, + * and \p ForwardIterator2's \c value_type must be convertible to \p ForwardIterator1's \c value_type. + * + * \pre \p first1 may equal \p first2, but the range [first1, last1) shall not overlap the range [first2, first2 + (last1 - first1)) otherwise. + * + * The following code snippet demonstrates how to use \p swap_ranges to + * swap the contents of two \c thrust::device_vectors using the \p thrust::device execution + * policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * thrust::device_vector v1(2), v2(2); + * v1[0] = 1; + * v1[1] = 2; + * v2[0] = 3; + * v2[1] = 4; + * + * thrust::swap_ranges(thrust::device, v1.begin(), v1.end(), v2.begin()); + * + * // v1[0] == 3, v1[1] == 4, v2[0] == 1, v2[1] == 2 + * \endcode + * + * \see http://www.sgi.com/tech/stl/swap_ranges.html + * \see \c swap + */ +template + ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base &exec, + ForwardIterator1 first1, + ForwardIterator1 last1, + ForwardIterator2 first2); + + +/*! \p swap_ranges swaps each of the elements in the range [first1, last1) + * with the corresponding element in the range [first2, first2 + (last1 - first1)). + * That is, for each integer \c n such that 0 <= n < (last1 - first1), it swaps + * *(first1 + n) and *(first2 + n). The return value is + * first2 + (last1 - first1). + * + * \param first1 The beginning of the first sequence to swap. + * \param last1 One position past the last element of the first sequence to swap. + * \param first2 The beginning of the second sequence to swap. + * \return An iterator pointing to one position past the last element of the second + * sequence to swap. + * + * \tparam ForwardIterator1 is a model of Forward Iterator, + * and \p ForwardIterator1's \c value_type must be convertible to \p ForwardIterator2's \c value_type. + * \tparam ForwardIterator2 is a model of Forward Iterator, + * and \p ForwardIterator2's \c value_type must be convertible to \p ForwardIterator1's \c value_type. + * + * \pre \p first1 may equal \p first2, but the range [first1, last1) shall not overlap the range [first2, first2 + (last1 - first1)) otherwise. + * + * The following code snippet demonstrates how to use \p swap_ranges to + * swap the contents of two \c thrust::device_vectors. + * + * \code + * #include + * #include + * ... + * thrust::device_vector v1(2), v2(2); + * v1[0] = 1; + * v1[1] = 2; + * v2[0] = 3; + * v2[1] = 4; + * + * thrust::swap_ranges(v1.begin(), v1.end(), v2.begin()); + * + * // v1[0] == 3, v1[1] == 4, v2[0] == 1, v2[1] == 2 + * \endcode + * + * \see http://www.sgi.com/tech/stl/swap_ranges.html + * \see \c swap + */ +template + ForwardIterator2 swap_ranges(ForwardIterator1 first1, + ForwardIterator1 last1, + ForwardIterator2 first2); + + +/*! \} // copying + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/system/cpp/detail/adjacent_difference.h b/compat/thrust/system/cpp/detail/adjacent_difference.h new file mode 100644 index 0000000..ea212ff --- /dev/null +++ b/compat/thrust/system/cpp/detail/adjacent_difference.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file adjacent_difference.h + * \brief C++ implementation of adjacent_difference. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +OutputIterator adjacent_difference(execution_policy &, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::adjacent_difference(first, last, result, binary_op); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/assign_value.h b/compat/thrust/system/cpp/detail/assign_value.h new file mode 100644 index 0000000..847fc97 --- /dev/null +++ b/compat/thrust/system/cpp/detail/assign_value.h @@ -0,0 +1,42 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +__host__ __device__ + void assign_value(thrust::system::cpp::detail::execution_policy &, Pointer1 dst, Pointer2 src) +{ + *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); +} // end assign_value() + +} // end detail +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/binary_search.h b/compat/thrust/system/cpp/detail/binary_search.h new file mode 100644 index 0000000..37af539 --- /dev/null +++ b/compat/thrust/system/cpp/detail/binary_search.h @@ -0,0 +1,77 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file binary_search.h + * \brief C++ implementation of binary search algorithms. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +ForwardIterator lower_bound(tag, + ForwardIterator first, + ForwardIterator last, + const T& val, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::lower_bound(first, last, val, comp); +} + + +template +ForwardIterator upper_bound(tag, + ForwardIterator first, + ForwardIterator last, + const T& val, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::upper_bound(first, last, val, comp); +} + +template +bool binary_search(tag, + ForwardIterator first, + ForwardIterator last, + const T& val, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::binary_search(first, last, val, comp); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/copy.h b/compat/thrust/system/cpp/detail/copy.h new file mode 100644 index 0000000..7299bbb --- /dev/null +++ b/compat/thrust/system/cpp/detail/copy.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file copy.h + * \brief C++ implementations of copy functions. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + OutputIterator copy(tag, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + return thrust::system::detail::internal::scalar::copy(first, last, result); +} + +template + OutputIterator copy_n(tag, + InputIterator first, + Size n, + OutputIterator result) +{ + return thrust::system::detail::internal::scalar::copy_n(first, n, result); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/copy_if.h b/compat/thrust/system/cpp/detail/copy_if.h new file mode 100644 index 0000000..2faadfa --- /dev/null +++ b/compat/thrust/system/cpp/detail/copy_if.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +template + OutputIterator copy_if(tag, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::copy_if(first, last, stencil, result, pred); +} + +} // end detail +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/count.h b/compat/thrust/system/cpp/detail/count.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/count.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/dispatch/sort.h b/compat/thrust/system/cpp/detail/dispatch/sort.h new file mode 100644 index 0000000..2a03cf6 --- /dev/null +++ b/compat/thrust/system/cpp/detail/dispatch/sort.h @@ -0,0 +1,119 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ +namespace dispatch +{ + +//////////////// +// Radix Sort // +//////////////// + +template +void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp, + thrust::detail::true_type) +{ + thrust::system::detail::internal::scalar::stable_radix_sort(first, last); + + // if comp is greater then reverse the keys + typedef typename thrust::iterator_traits::value_type KeyType; + const static bool reverse = thrust::detail::is_same >::value; + + if (reverse) + thrust::reverse(first, last); +} + +template +void stable_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp, + thrust::detail::true_type) +{ + // if comp is greater then reverse the keys and values + typedef typename thrust::iterator_traits::value_type KeyType; + const static bool reverse = thrust::detail::is_same >::value; + + // note, we also have to reverse the (unordered) input to preserve stability + if (reverse) + { + thrust::reverse(first1, last1); + thrust::reverse(first2, first2 + (last1 - first1)); + } + + thrust::system::detail::internal::scalar::stable_radix_sort_by_key(first1, last1, first2); + + if (reverse) + { + thrust::reverse(first1, last1); + thrust::reverse(first2, first2 + (last1 - first1)); + } +} + +//////////////// +// Merge Sort // +//////////////// + +template +void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp, + thrust::detail::false_type) +{ + thrust::system::detail::internal::scalar::stable_merge_sort(first, last, comp); +} + +template +void stable_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp, + thrust::detail::false_type) +{ + thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, last1, first2, comp); +} + +} // end namespace dispatch +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/equal.h b/compat/thrust/system/cpp/detail/equal.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/equal.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/execution_policy.h b/compat/thrust/system/cpp/detail/execution_policy.h new file mode 100644 index 0000000..229ff5c --- /dev/null +++ b/compat/thrust/system/cpp/detail/execution_policy.h @@ -0,0 +1,84 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +// put the canonical tag in the same ns as the backend's entry points +namespace cpp +{ +namespace detail +{ + +// this awkward sequence of definitions arise +// from the desire both for tag to derive +// from execution_policy and for execution_policy +// to convert to tag (when execution_policy is not +// an ancestor of tag) + +// forward declaration of tag +struct tag; + +// forward declaration of execution_policy +template struct execution_policy; + +// specialize execution_policy for tag +template<> + struct execution_policy + : thrust::execution_policy +{}; + +// tag's definition comes before the +// generic definition of execution_policy +struct tag : execution_policy {}; + +// allow conversion to tag when it is not a successor +template + struct execution_policy + : thrust::execution_policy +{ + // allow conversion to tag + inline operator tag () const + { + return tag(); + } +}; + +} // end detail + +// alias execution_policy and tag here +using thrust::system::cpp::detail::execution_policy; +using thrust::system::cpp::detail::tag; + +} // end cpp +} // end system + +// alias items at top-level +namespace cpp +{ + +using thrust::system::cpp::execution_policy; +using thrust::system::cpp::tag; + +} // end cpp +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/extrema.h b/compat/thrust/system/cpp/detail/extrema.h new file mode 100644 index 0000000..3eab6d4 --- /dev/null +++ b/compat/thrust/system/cpp/detail/extrema.h @@ -0,0 +1,72 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file extrema.h + * \brief C++ implementations of extrema functions. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +ForwardIterator min_element(execution_policy &, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + return thrust::system::detail::internal::scalar::min_element(first, last, comp); +} + + +template +ForwardIterator max_element(execution_policy &, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + return thrust::system::detail::internal::scalar::max_element(first, last, comp); +} + + +template +thrust::pair minmax_element(execution_policy &, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + return thrust::system::detail::internal::scalar::minmax_element(first, last, comp); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/fill.h b/compat/thrust/system/cpp/detail/fill.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/fill.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/find.h b/compat/thrust/system/cpp/detail/find.h new file mode 100644 index 0000000..9698524 --- /dev/null +++ b/compat/thrust/system/cpp/detail/find.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file find.h + * \brief C++ implementation of find_if. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +InputIterator find_if(tag, + InputIterator first, + InputIterator last, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::find_if(first, last, pred); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/for_each.h b/compat/thrust/system/cpp/detail/for_each.h new file mode 100644 index 0000000..8d4e1c7 --- /dev/null +++ b/compat/thrust/system/cpp/detail/for_each.h @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +template +InputIterator for_each(thrust::system::cpp::detail::execution_policy &, + InputIterator first, + InputIterator last, + UnaryFunction f) +{ + return thrust::system::detail::internal::scalar::for_each(first, last, f); +} + +template +InputIterator for_each_n(thrust::system::cpp::detail::execution_policy &, + InputIterator first, + Size n, + UnaryFunction f) +{ + return thrust::system::detail::internal::scalar::for_each_n(first, n, f); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/gather.h b/compat/thrust/system/cpp/detail/gather.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/gather.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/generate.h b/compat/thrust/system/cpp/detail/generate.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/generate.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/get_value.h b/compat/thrust/system/cpp/detail/get_value.h new file mode 100644 index 0000000..5ddb2c8 --- /dev/null +++ b/compat/thrust/system/cpp/detail/get_value.h @@ -0,0 +1,45 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +template +__host__ __device__ + typename thrust::iterator_value::type + get_value(thrust::system::cpp::detail::execution_policy &, Pointer ptr) +{ + return *thrust::raw_pointer_cast(ptr); +} // end get_value() + + +} // end detail +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/inner_product.h b/compat/thrust/system/cpp/detail/inner_product.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/inner_product.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/iter_swap.h b/compat/thrust/system/cpp/detail/iter_swap.h new file mode 100644 index 0000000..257276f --- /dev/null +++ b/compat/thrust/system/cpp/detail/iter_swap.h @@ -0,0 +1,46 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +template +__host__ __device__ + void iter_swap(tag, Pointer1 a, Pointer2 b) +{ + using thrust::swap; + swap(*thrust::raw_pointer_cast(a), *thrust::raw_pointer_cast(b)); +} // end iter_swap() + + +} // end detail +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/logical.h b/compat/thrust/system/cpp/detail/logical.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/logical.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/malloc_and_free.h b/compat/thrust/system/cpp/detail/malloc_and_free.h new file mode 100644 index 0000000..4f8ae82 --- /dev/null +++ b/compat/thrust/system/cpp/detail/malloc_and_free.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include // for malloc & free +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +// note that malloc returns a raw pointer to avoid +// depending on the heavyweight thrust/system/cpp/memory.h header +template + void *malloc(execution_policy &, std::size_t n) +{ + return std::malloc(n); +} // end malloc() + + +template + void free(execution_policy &, Pointer ptr) +{ + std::free(thrust::raw_pointer_cast(ptr)); +} // end free() + + +} // end detail +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/memory.inl b/compat/thrust/system/cpp/detail/memory.inl new file mode 100644 index 0000000..7f9a48d --- /dev/null +++ b/compat/thrust/system/cpp/detail/memory.inl @@ -0,0 +1,92 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ + +// XXX WAR an issue with MSVC 2005 (cl v14.00) incorrectly implementing +// pointer_raw_pointer for pointer by specializing it here +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) +namespace detail +{ + +template + struct pointer_raw_pointer< thrust::cpp::pointer > +{ + typedef typename thrust::cpp::pointer::raw_pointer type; +}; // end pointer_raw_pointer + +} // end detail +#endif + +namespace system +{ +namespace cpp +{ + + +template + template + reference & + reference + ::operator=(const reference &other) +{ + return super_t::operator=(other); +} // end reference::operator=() + +template + reference & + reference + ::operator=(const value_type &x) +{ + return super_t::operator=(x); +} // end reference::operator=() + +template +__host__ __device__ +void swap(reference a, reference b) +{ + a.swap(b); +} // end swap() + +pointer malloc(std::size_t n) +{ + tag t; + return pointer(thrust::system::cpp::detail::malloc(t, n)); +} // end malloc() + +template +pointer malloc(std::size_t n) +{ + pointer raw_ptr = thrust::system::cpp::malloc(sizeof(T) * n); + return pointer(reinterpret_cast(raw_ptr.get())); +} // end malloc() + +void free(pointer ptr) +{ + tag t; + return thrust::system::cpp::detail::free(t, ptr); +} // end free() + +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/merge.h b/compat/thrust/system/cpp/detail/merge.h new file mode 100644 index 0000000..7f01c07 --- /dev/null +++ b/compat/thrust/system/cpp/detail/merge.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +OutputIterator merge(execution_policy &, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::merge(first1, last1, first2, last2, result, comp); +} + +template +thrust::pair + merge_by_key(execution_policy &, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::merge_by_key(keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/mismatch.h b/compat/thrust/system/cpp/detail/mismatch.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/mismatch.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/par.h b/compat/thrust/system/cpp/detail/par.h new file mode 100644 index 0000000..953e527 --- /dev/null +++ b/compat/thrust/system/cpp/detail/par.h @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +struct par_t : thrust::system::cpp::detail::execution_policy +{ + par_t() : thrust::system::cpp::detail::execution_policy() {} + + template + thrust::detail::execute_with_allocator + operator()(Allocator &alloc) const + { + return thrust::detail::execute_with_allocator(alloc); + } +}; + + +} // end detail + + +static const detail::par_t par; + + +} // end cpp +} // end system + + +// alias par here +namespace cpp +{ + + +using thrust::system::cpp::par; + + +} // end cpp +} // end thrust + diff --git a/compat/thrust/system/cpp/detail/partition.h b/compat/thrust/system/cpp/detail/partition.h new file mode 100644 index 0000000..25a4f1c --- /dev/null +++ b/compat/thrust/system/cpp/detail/partition.h @@ -0,0 +1,95 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file partition.h + * \brief cpp implementations of partition functions + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + ForwardIterator stable_partition(tag, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::stable_partition(first, last, pred); +} + +template + ForwardIterator stable_partition(tag, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::stable_partition(first, last, stencil, pred); +} + +template + thrust::pair + stable_partition_copy(tag, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::stable_partition_copy(first, last, out_true, out_false, pred); +} + +template + thrust::pair + stable_partition_copy(tag, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::stable_partition_copy(first, last, stencil, out_true, out_false, pred); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/reduce.h b/compat/thrust/system/cpp/detail/reduce.h new file mode 100644 index 0000000..5428206 --- /dev/null +++ b/compat/thrust/system/cpp/detail/reduce.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief C++ implementation of reduce algorithms. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + OutputType reduce(execution_policy &, + InputIterator begin, + InputIterator end, + OutputType init, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::reduce(begin, end, init, binary_op); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/reduce_by_key.h b/compat/thrust/system/cpp/detail/reduce_by_key.h new file mode 100644 index 0000000..22dc2d9 --- /dev/null +++ b/compat/thrust/system/cpp/detail/reduce_by_key.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + thrust::pair + reduce_by_key(execution_policy &, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::reduce_by_key(keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/remove.h b/compat/thrust/system/cpp/detail/remove.h new file mode 100644 index 0000000..cf2202b --- /dev/null +++ b/compat/thrust/system/cpp/detail/remove.h @@ -0,0 +1,88 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + ForwardIterator remove_if(tag, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::remove_if(first, last, pred); +} + + +template + ForwardIterator remove_if(tag, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::remove_if(first, last, stencil, pred); +} + + +template + OutputIterator remove_copy_if(tag, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::remove_copy_if(first, last, result, pred); +} + + + +template + OutputIterator remove_copy_if(tag, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + return thrust::system::detail::internal::scalar::remove_copy_if(first, last, stencil, result, pred); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/replace.h b/compat/thrust/system/cpp/detail/replace.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/replace.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/reverse.h b/compat/thrust/system/cpp/detail/reverse.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/reverse.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/scan.h b/compat/thrust/system/cpp/detail/scan.h new file mode 100644 index 0000000..d4bae1e --- /dev/null +++ b/compat/thrust/system/cpp/detail/scan.h @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.h + * \brief C++ implementations of scan functions. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + OutputIterator inclusive_scan(execution_policy &, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::inclusive_scan(first, last, result, binary_op); +} + + +template + OutputIterator exclusive_scan(execution_policy &, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::exclusive_scan(first, last, result, init, binary_op); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/scan_by_key.h b/compat/thrust/system/cpp/detail/scan_by_key.h new file mode 100644 index 0000000..4165d84 --- /dev/null +++ b/compat/thrust/system/cpp/detail/scan_by_key.h @@ -0,0 +1,71 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + OutputIterator inclusive_scan_by_key(tag, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::inclusive_scan_by_key(first1, last1, first2, result, binary_pred, binary_op); +} + + +template + OutputIterator exclusive_scan_by_key(tag, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + return thrust::system::detail::internal::scalar::exclusive_scan_by_key(first1, last1, first2, result, init, binary_pred, binary_op); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/scatter.h b/compat/thrust/system/cpp/detail/scatter.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/scatter.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/sequence.h b/compat/thrust/system/cpp/detail/sequence.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/sequence.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/set_operations.h b/compat/thrust/system/cpp/detail/set_operations.h new file mode 100644 index 0000000..07ce712 --- /dev/null +++ b/compat/thrust/system/cpp/detail/set_operations.h @@ -0,0 +1,105 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + + +template + OutputIterator set_difference(execution_policy &, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::set_difference(first1, last1, first2, last2, result, comp); +} + + +template + OutputIterator set_intersection(execution_policy &, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::set_intersection(first1, last1, first2, last2, result, comp); +} + + +template + OutputIterator set_symmetric_difference(execution_policy &, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::set_symmetric_difference(first1, last1, first2, last2, result, comp); +} + + +template + OutputIterator set_union(execution_policy &, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return thrust::system::detail::internal::scalar::set_union(first1, last1, first2, last2, result, comp); +} + + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/sort.h b/compat/thrust/system/cpp/detail/sort.h new file mode 100644 index 0000000..60244e2 --- /dev/null +++ b/compat/thrust/system/cpp/detail/sort.h @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + void stable_sort(execution_policy &, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + thrust::system::detail::internal::scalar::stable_sort(first, last, comp); +} + +template + void stable_sort_by_key(execution_policy &, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + thrust::system::detail::internal::scalar::stable_sort_by_key(keys_first, keys_last, values_first, comp); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/swap_ranges.h b/compat/thrust/system/cpp/detail/swap_ranges.h new file mode 100644 index 0000000..a834a2c --- /dev/null +++ b/compat/thrust/system/cpp/detail/swap_ranges.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// cpp has no special swap_ranges + diff --git a/compat/thrust/system/cpp/detail/tabulate.h b/compat/thrust/system/cpp/detail/tabulate.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/tabulate.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/temporary_buffer.h b/compat/thrust/system/cpp/detail/temporary_buffer.h new file mode 100644 index 0000000..628bd75 --- /dev/null +++ b/compat/thrust/system/cpp/detail/temporary_buffer.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special temporary buffer functions + diff --git a/compat/thrust/system/cpp/detail/transform.h b/compat/thrust/system/cpp/detail/transform.h new file mode 100644 index 0000000..5909d4a --- /dev/null +++ b/compat/thrust/system/cpp/detail/transform.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// cpp has no special transform + diff --git a/compat/thrust/system/cpp/detail/transform_reduce.h b/compat/thrust/system/cpp/detail/transform_reduce.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/transform_reduce.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/transform_scan.h b/compat/thrust/system/cpp/detail/transform_scan.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/transform_scan.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/uninitialized_copy.h b/compat/thrust/system/cpp/detail/uninitialized_copy.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/uninitialized_copy.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/uninitialized_fill.h b/compat/thrust/system/cpp/detail/uninitialized_fill.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cpp/detail/uninitialized_fill.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cpp/detail/unique.h b/compat/thrust/system/cpp/detail/unique.h new file mode 100644 index 0000000..cf74049 --- /dev/null +++ b/compat/thrust/system/cpp/detail/unique.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + ForwardIterator unique(execution_policy &, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + return thrust::system::detail::internal::scalar::unique(first, last, binary_pred); +} + +template + OutputIterator unique_copy(execution_policy &, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + return thrust::system::detail::internal::scalar::unique_copy(first, last, output, binary_pred); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/unique_by_key.h b/compat/thrust/system/cpp/detail/unique_by_key.h new file mode 100644 index 0000000..a9f13d6 --- /dev/null +++ b/compat/thrust/system/cpp/detail/unique_by_key.h @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template + thrust::pair + unique_by_key(execution_policy &, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + return thrust::system::detail::internal::scalar::unique_by_key(keys_first, keys_last, values_first, binary_pred); +} + + +template + thrust::pair + unique_by_key_copy(execution_policy &, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + return thrust::system::detail::internal::scalar::unique_by_key_copy(keys_first, keys_last, values_first, keys_output, values_output, binary_pred); +} + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cpp/detail/vector.inl b/compat/thrust/system/cpp/detail/vector.inl new file mode 100644 index 0000000..03bffcd --- /dev/null +++ b/compat/thrust/system/cpp/detail/vector.inl @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ + +template + vector + ::vector() + : super_t() +{} + +template + vector + ::vector(size_type n) + : super_t(n) +{} + +template + vector + ::vector(size_type n, const value_type &value) + : super_t(n,value) +{} + +template + vector + ::vector(const vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(const thrust::detail::vector_base &x) + : super_t(x) +{} + +template + template + vector + ::vector(const std::vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(InputIterator first, InputIterator last) + : super_t(first,last) +{} + +template + template + vector & + vector + ::operator=(const std::vector &x) +{ + super_t::operator=(x); + return *this; +} + +template + template + vector & + vector + ::operator=(const thrust::detail::vector_base &x) +{ + super_t::operator=(x); + return *this; +} + +} // end cpp +} // end system +} // end thrust + diff --git a/compat/thrust/system/cpp/execution_policy.h b/compat/thrust/system/cpp/execution_policy.h new file mode 100644 index 0000000..f192eb9 --- /dev/null +++ b/compat/thrust/system/cpp/execution_policy.h @@ -0,0 +1,157 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +/*! \file thrust/system/cpp/execution_policy.h + * \brief Execution policies for Thrust's standard C++ system. + */ + +#include + +// get the execution policies definitions first +#include + +// get the definition of par +#include + +// now get all the algorithm definitions + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// define these entities here for the purpose of Doxygenating them +// they are actually defined elsewhere +#if 0 +namespace thrust +{ +namespace system +{ +namespace cpp +{ + + +/*! \addtogroup execution_policies + * \{ + */ + + +/*! \p thrust::system::cpp::execution_policy is the base class for all Thrust parallel execution + * policies which are derived from Thrust's standard C++ backend system. + */ +template +struct execution_policy : thrust::execution_policy +{}; + + +/*! \p thrust::system::cpp::tag is a type representing Thrust's standard C++ backend system in C++'s type system. + * Iterators "tagged" with a type which is convertible to \p cpp::tag assert that they may be + * "dispatched" to algorithm implementations in the \p cpp system. + */ +struct tag : thrust::system::cpp::execution_policy { unspecified }; + + +/*! + * \p thrust::system::cpp::par is the parallel execution policy associated with Thrust's standard + * C++ backend system. + * + * Instead of relying on implicit algorithm dispatch through iterator system tags, users may + * directly target Thrust's C++ backend system by providing \p thrust::cpp::par as an algorithm + * parameter. + * + * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such + * as \p thrust::cpp::vector. + * + * The type of \p thrust::cpp::par is implementation-defined. + * + * The following code snippet demonstrates how to use \p thrust::cpp::par to explicitly dispatch an + * invocation of \p thrust::for_each to the standard C++ backend system: + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * printf("%d\n"); + * } + * }; + * ... + * int vec[3]; + * vec[0] = 0; vec[1] = 1; vec[2] = 2; + * + * thrust::for_each(thrust::cpp::par, vec.begin(), vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + */ +static const unspecified par; + + +/*! \} + */ + + +} // end cpp +} // end system +} // end thrust +#endif + + diff --git a/compat/thrust/system/cpp/memory.h b/compat/thrust/system/cpp/memory.h new file mode 100644 index 0000000..f3a58b8 --- /dev/null +++ b/compat/thrust/system/cpp/memory.h @@ -0,0 +1,414 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/cpp/memory.h + * \brief Managing memory associated with Thrust's standard C++ system. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ + +template class pointer; + +} // end cpp +} // end system +} // end thrust + + +/*! \cond + */ + +// specialize std::iterator_traits to avoid problems with the name of +// pointer's constructor shadowing its nested pointer type +// do this before pointer is defined so the specialization is correctly +// used inside the definition +namespace std +{ + +template + struct iterator_traits > +{ + private: + typedef thrust::system::cpp::pointer ptr; + + public: + typedef typename ptr::iterator_category iterator_category; + typedef typename ptr::value_type value_type; + typedef typename ptr::difference_type difference_type; + typedef ptr pointer; + typedef typename ptr::reference reference; +}; // end iterator_traits + +} // end std + +/*! \endcond + */ + + +namespace thrust +{ +namespace system +{ + +/*! \addtogroup system_backends Systems + * \ingroup system + * \{ + */ + +/*! \namespace thrust::system::cpp + * \brief \p thrust::system::cpp is the namespace containing functionality for allocating, manipulating, + * and deallocating memory available to Thrust's standard C++ backend system. + * The identifiers are provided in a separate namespace underneath thrust::system + * for import convenience but are also aliased in the top-level thrust::cpp + * namespace for easy access. + * + */ +namespace cpp +{ + +// forward declaration of reference for pointer +template class reference; + +/*! \cond + */ + +// XXX nvcc + msvc have trouble instantiating reference below +// this is a workaround +namespace detail +{ + +template + struct reference_msvc_workaround +{ + typedef thrust::system::cpp::reference type; +}; // end reference_msvc_workaround + +} // end detail + +/*! \endcond + */ + + +/*! \p pointer stores a pointer to an object allocated in memory available to the cpp system. + * This type provides type safety when dispatching standard algorithms on ranges resident + * in cpp memory. + * + * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. + * + * \p pointer can be created with the function \p cpp::malloc, or by explicitly calling its constructor + * with a raw pointer. + * + * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function + * or the \p raw_pointer_cast function. + * + * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory + * pointed to by \p pointer. + * + * \tparam T specifies the type of the pointee. + * + * \see cpp::malloc + * \see cpp::free + * \see raw_pointer_cast + */ +template + class pointer + : public thrust::pointer< + T, + thrust::system::cpp::tag, + thrust::system::cpp::reference, + thrust::system::cpp::pointer + > +{ + /*! \cond + */ + + private: + typedef thrust::pointer< + T, + thrust::system::cpp::tag, + //thrust::system::cpp::reference, + typename detail::reference_msvc_workaround::type, + thrust::system::cpp::pointer + > super_t; + + /*! \endcond + */ + + public: + // note that cpp::pointer's member functions need __host__ __device__ + // to interoperate with nvcc + iterators' dereference member function + + /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. + */ + __host__ __device__ + pointer() : super_t() {} + + /*! This constructor allows construction of a pointer from a T*. + * + * \param ptr A raw pointer to copy from, presumed to point to a location in memory + * accessible by the \p cpp system. + * \tparam OtherT \p OtherT shall be convertible to \p T. + */ + template + __host__ __device__ + explicit pointer(OtherT *ptr) : super_t(ptr) {} + + /*! This constructor allows construction from another pointer-like object with related type. + * + * \param other The \p OtherPointer to copy. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::cpp::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type * = 0) : super_t(other) {} + + /*! Assignment operator allows assigning from another pointer-like object with related type. + * + * \param other The other pointer-like object to assign from. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::cpp::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + pointer & + >::type + operator=(const OtherPointer &other) + { + return super_t::operator=(other); + } +}; // end pointer + + +/*! \p reference is a wrapped reference to an object stored in memory available to the \p cpp system. + * \p reference is the type of the result of dereferencing a \p cpp::pointer. + * + * \tparam T Specifies the type of the referenced object. + */ +template + class reference + : public thrust::reference< + T, + thrust::system::cpp::pointer, + thrust::system::cpp::reference + > +{ + /*! \cond + */ + + private: + typedef thrust::reference< + T, + thrust::system::cpp::pointer, + thrust::system::cpp::reference + > super_t; + + /*! \endcond + */ + + public: + /*! \cond + */ + + typedef typename super_t::value_type value_type; + typedef typename super_t::pointer pointer; + + /*! \endcond + */ + + /*! This constructor initializes this \p reference to refer to an object + * pointed to by the given \p pointer. After this \p reference is constructed, + * it shall refer to the object pointed to by \p ptr. + * + * \param ptr A \p pointer to copy from. + */ + __host__ __device__ + explicit reference(const pointer &ptr) + : super_t(ptr) + {} + + /*! This constructor accepts a const reference to another \p reference of related type. + * After this \p reference is constructed, it shall refer to the same object as \p other. + * + * \param other A \p reference to copy from. + * \tparam OtherT The element type of the other \p reference. + * + * \note This constructor is templated primarily to allow initialization of reference + * from reference. + */ + template + __host__ __device__ + reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type * = 0) + : super_t(other) + {} + + /*! Copy assignment operator copy assigns from another \p reference of related type. + * + * \param other The other \p reference to assign from. + * \return *this + * \tparam OtherT The element type of the other \p reference. + */ + template + reference &operator=(const reference &other); + + /*! Assignment operator assigns from a \p value_type. + * + * \param x The \p value_type to assign from. + * \return *this + */ + reference &operator=(const value_type &x); +}; // end reference + +/*! Exchanges the values of two objects referred to by \p reference. + * \p x The first \p reference of interest. + * \p y The second \p reference ot interest. + */ +template +__host__ __device__ +void swap(reference x, reference y); + +/*! Allocates an area of memory available to Thrust's cpp system. + * \param n Number of bytes to allocate. + * \return A cpp::pointer pointing to the beginning of the newly + * allocated memory. A null cpp::pointer is returned if + * an error occurs. + * \note The cpp::pointer returned by this function must be + * deallocated with \p cpp::free. + * \see cpp::free + * \see std::malloc + */ +inline pointer malloc(std::size_t n); + +/*! Allocates a typed area of memory available to Thrust's cpp system. + * \param n Number of elements to allocate. + * \return A cpp::pointer pointing to the beginning of the newly + * allocated elements. A null cpp::pointer is returned if + * an error occurs. + * \note The cpp::pointer returned by this function must be + * deallocated with \p cpp::free. + * \see cpp::free + * \see std::malloc + */ +template +inline pointer malloc(std::size_t n); + +/*! Deallocates an area of memory previously allocated by cpp::malloc. + * \param ptr A cpp::pointer pointing to the beginning of an area + * of memory previously allocated with cpp::malloc. + * \see cpp::malloc + * \see std::free + */ +inline void free(pointer ptr); + +// XXX upon c++11 +// template using allocator = thrust::detail::malloc_allocator >; + +/*! \p cpp::allocator is the default allocator used by the \p cpp system's containers such as + * cpp::vector if no user-specified allocator is provided. \p cpp::allocator allocates + * (deallocates) storage with \p cpp::malloc (\p cpp::free). + */ +template + struct allocator + : thrust::detail::malloc_allocator< + T, + tag, + pointer + > +{ + /*! The \p rebind metafunction provides the type of an \p allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p allocator. + */ + typedef allocator other; + }; + + /*! No-argument constructor has no effect. + */ + __host__ __device__ + inline allocator() {} + + /*! Copy constructor has no effect. + */ + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Constructor from other \p allocator has no effect. + */ + template + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Destructor has no effect. + */ + __host__ __device__ + inline ~allocator() {} +}; // end allocator + +} // end cpp + +/*! \} + */ + +} // end system + +/*! \namespace thrust::cpp + * \brief \p thrust::cpp is a top-level alias for thrust::system::cpp. + */ +namespace cpp +{ + +using thrust::system::cpp::pointer; +using thrust::system::cpp::reference; +using thrust::system::cpp::malloc; +using thrust::system::cpp::free; +using thrust::system::cpp::allocator; + +} // end cpp + +} // end thrust + +#include + diff --git a/compat/thrust/system/cpp/vector.h b/compat/thrust/system/cpp/vector.h new file mode 100644 index 0000000..4282df9 --- /dev/null +++ b/compat/thrust/system/cpp/vector.h @@ -0,0 +1,149 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/cpp/vector.h + * \brief A dynamically-sizable array of elements which reside in memory available to + * Thrust's standard C++ system. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +// forward declaration of host_vector +template class host_vector; + +namespace system +{ +namespace cpp +{ + +// XXX upon c++11 +// template > using vector = thrust::detail::vector_base; + +/*! \p cpp::vector is a container that supports random access to elements, + * constant time removal of elements at the end, and linear time insertion + * and removal of elements at the beginning or in the middle. The number of + * elements in a \p cpp::vector may vary dynamically; memory management is + * automatic. The elements contained in a \p cpp::vector reside in memory + * available to the \p cpp system. + * + * \tparam T The element type of the \p cpp::vector. + * \tparam Allocator The allocator type of the \p cpp::vector. Defaults to \p cpp::allocator. + * + * \see http://www.sgi.com/tech/stl/Vector.html + * \see host_vector For the documentation of the complete interface which is + * shared by \p cpp::vector + * \see device_vector + */ +template > + class vector + : public thrust::detail::vector_base +{ + /*! \cond + */ + private: + typedef thrust::detail::vector_base super_t; + /*! \endcond + */ + + public: + + /*! \cond + */ + typedef typename super_t::size_type size_type; + typedef typename super_t::value_type value_type; + + /*! \endcond + */ + + /*! This constructor creates an empty \p cpp::vector. + */ + vector(); + + /*! This constructor creates a \p cpp::vector with \p n default-constructed elements. + * \param n The size of the \p cpp::vector to create. + */ + explicit vector(size_type n); + + /*! This constructor creates a \p cpp::vector with \p n copies of \p value. + * \param n The size of the \p cpp::vector to create. + * \param value An element to copy. + */ + explicit vector(size_type n, const value_type &value); + + /*! Copy constructor copies from another \p cpp::vector. + * \param x The other \p cpp::vector to copy. + */ + vector(const vector &x); + + /*! This constructor copies from another Thrust vector-like object. + * \param x The other object to copy from. + */ + template + vector(const thrust::detail::vector_base &x); + + /*! This constructor copies from a \c std::vector. + * \param x The \c std::vector to copy from. + */ + template + vector(const std::vector &x); + + /*! This constructor creates a \p cpp::vector by copying from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + vector(InputIterator first, InputIterator last); + + // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns + + /*! Assignment operator assigns from a \c std::vector. + * \param x The \c std::vector to assign from. + * \return *this + */ + template + vector &operator=(const std::vector &x); + + /*! Assignment operator assigns from another Thrust vector-like object. + * \param x The other object to assign from. + * \return *this + */ + template + vector &operator=(const thrust::detail::vector_base &x); +}; // end vector + +} // end cpp +} // end system + +// alias system::cpp names at top-level +namespace cpp +{ + +using thrust::system::cpp::vector; + +} // end cpp + +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/adjacent_difference.h b/compat/thrust/system/cuda/detail/adjacent_difference.h new file mode 100644 index 0000000..ec51794 --- /dev/null +++ b/compat/thrust/system/cuda/detail/adjacent_difference.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file adjacent_difference.h + * \brief CUDA implementation of adjacent_difference. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template +OutputIterator adjacent_difference(execution_policy &exec, + InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/adjacent_difference.inl b/compat/thrust/system/cuda/detail/adjacent_difference.inl new file mode 100644 index 0000000..9e4756a --- /dev/null +++ b/compat/thrust/system/cuda/detail/adjacent_difference.inl @@ -0,0 +1,197 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +struct last_index_in_each_interval : public thrust::unary_function +{ + typedef typename Decomposition::index_type index_type; + + Decomposition decomp; + + last_index_in_each_interval(Decomposition decomp) : decomp(decomp) {} + + __host__ __device__ + index_type operator()(index_type interval) + { + return decomp[interval].end() - 1; + } +}; + +template +struct adjacent_difference_closure +{ + InputIterator1 input; + InputIterator2 input_copy; + OutputIterator output; + BinaryFunction binary_op; + Decomposition decomp; + Context context; + + typedef Context context_type; + + adjacent_difference_closure(InputIterator1 input, + InputIterator2 input_copy, + OutputIterator output, + BinaryFunction binary_op, + Decomposition decomp, + Context context = Context()) + : input(input), input_copy(input_copy), output(output), binary_op(binary_op), decomp(decomp), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename thrust::iterator_value::type InputType; + typedef typename Decomposition::index_type index_type; + + // this block processes results in [range.begin(), range.end()) + thrust::system::detail::internal::index_range range = decomp[context.block_index()]; + + input_copy += context.block_index() - 1; + + // prime the temp values for all threads so we don't need to launch a default constructor + InputType next_left = (context.block_index() == 0) ? *input : *input_copy; + + index_type base = range.begin(); + index_type i = range.begin() + context.thread_index(); + + if (i < range.end()) + { + if (context.thread_index() > 0) + { + InputIterator1 temp = input + (i - 1); + next_left = *temp; + } + } + + input += i; + output += i; + + while (base < range.end()) + { + InputType curr_left = next_left; + + if (i + context.block_dimension() < range.end()) + { + InputIterator1 temp = input + (context.block_dimension() - 1); + next_left = *temp; + } + + context.barrier(); + + if (i < range.end()) + { + if (i == 0) + *output = *input; + else + { + InputType x = *input; + *output = binary_op(x, curr_left); + } + } + + i += context.block_dimension(); + base += context.block_dimension(); + input += context.block_dimension(); + output += context.block_dimension(); + } + } +}; + +} // end namespace detail + + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + +template +OutputIterator adjacent_difference(execution_policy &exec, + InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_value::type InputType; + typedef typename thrust::iterator_difference::type IndexType; + typedef thrust::system::detail::internal::uniform_decomposition Decomposition; + + IndexType n = last - first; + + if (n == 0) + return result; + + Decomposition decomp = default_decomposition(last - first); + + // allocate temporary storage + thrust::detail::temporary_array temp(exec, decomp.size() - 1); + + // gather last value in each interval + detail::last_index_in_each_interval unary_op(decomp); + thrust::gather(exec, + thrust::make_transform_iterator(thrust::counting_iterator(0), unary_op), + thrust::make_transform_iterator(thrust::counting_iterator(0), unary_op) + (decomp.size() - 1), + first, + temp.begin()); + + + typedef typename thrust::detail::temporary_array::iterator InputIterator2; + typedef detail::blocked_thread_array Context; + typedef detail::adjacent_difference_closure Closure; + + Closure closure(first, temp.begin(), result, binary_op, decomp); + + detail::launch_closure(closure, decomp.size()); + + return result + n; +} + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/assign_value.h b/compat/thrust/system/cuda/detail/assign_value.h new file mode 100644 index 0000000..c90cf65 --- /dev/null +++ b/compat/thrust/system/cuda/detail/assign_value.h @@ -0,0 +1,198 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +// XXX WAR an issue with msvc 2005 (cl v14.00) which creates multiply-defined +// symbols resulting from assign_value +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) + +namespace +{ + +template +inline __host__ __device__ + void assign_value_msvc2005_war(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) +{ + // XXX war nvbugs/881631 + struct war_nvbugs_881631 + { + __host__ inline static void host_path(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) + { + thrust::copy(exec, src, src + 1, dst); + } + + __device__ inline static void device_path(thrust::cuda::execution_policy &, Pointer1 dst, Pointer2 src) + { + *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); + } + }; + +#ifndef __CUDA_ARCH__ + war_nvbugs_881631::host_path(exec,dst,src); +#else + war_nvbugs_881631::device_path(exec,dst,src); +#endif // __CUDA_ARCH__ +} // end assign_value_msvc2005_war() + +} // end anon namespace + +template +inline __host__ __device__ + void assign_value(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) +{ + return assign_value_msvc2005_war(exec,dst,src); +} // end assign_value() + +#else + +template +inline __host__ __device__ + void assign_value(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) +{ + // XXX war nvbugs/881631 + struct war_nvbugs_881631 + { + __host__ inline static void host_path(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) + { + thrust::copy(exec, src, src + 1, dst); + } + + __device__ inline static void device_path(thrust::cuda::execution_policy &, Pointer1 dst, Pointer2 src) + { + *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); + } + }; + +#ifndef __CUDA_ARCH__ + war_nvbugs_881631::host_path(exec,dst,src); +#else + war_nvbugs_881631::device_path(exec,dst,src); +#endif // __CUDA_ARCH__ +} // end assign_value() + +#endif // msvc 2005 WAR + + +// XXX WAR an issue with msvc 2005 (cl v14.00) which creates multiply-defined +// symbols resulting from assign_value +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) + +namespace +{ + + +template +inline __host__ __device__ + void assign_value_msvc2005_war(cross_system &systems, Pointer1 dst, Pointer2 src) +{ + // XXX war nvbugs/881631 + struct war_nvbugs_881631 + { + __host__ inline static void host_path(cross_system &systems, Pointer1 dst, Pointer2 src) + { + // rotate the systems so that they are ordered the same as (src, dst) + // for the call to thrust::copy + cross_system rotated_systems = systems.rotate(); + thrust::copy(rotated_systems, src, src + 1, dst); + } + + __device__ inline static void device_path(cross_system &systems, Pointer1 dst, Pointer2 src) + { + // XXX forward the true cuda::execution_policy inside systems here + // instead of materializing a tag + thrust::cuda::tag cuda_tag; + thrust::system::cuda::detail::assign_value(cuda_tag, dst, src); + } + }; + +#if __CUDA_ARCH__ + war_nvbugs_881631::device_path(systems,dst,src); +#else + war_nvbugs_881631::host_path(systems,dst,src); +#endif +} // end assign_value_msvc2005_war + + +} // end anon namespace + + +template +inline __host__ __device__ + void assign_value(cross_system &systems, Pointer1 dst, Pointer2 src) +{ + return assign_value_msvc2005_war(systems,dst,src); +} // end assign_value() + + +#else + + +template +inline __host__ __device__ + void assign_value(cross_system &systems, Pointer1 dst, Pointer2 src) +{ + // XXX war nvbugs/881631 + struct war_nvbugs_881631 + { + __host__ inline static void host_path(cross_system &systems, Pointer1 dst, Pointer2 src) + { + // rotate the systems so that they are ordered the same as (src, dst) + // for the call to thrust::copy + cross_system rotated_systems = systems.rotate(); + thrust::copy(rotated_systems, src, src + 1, dst); + } + + __device__ inline static void device_path(cross_system &systems, Pointer1 dst, Pointer2 src) + { + // XXX forward the true cuda::execution_policy inside systems here + // instead of materializing a tag + thrust::cuda::tag cuda_tag; + thrust::system::cuda::detail::assign_value(cuda_tag, dst, src); + } + }; + +#if __CUDA_ARCH__ + war_nvbugs_881631::device_path(systems,dst,src); +#else + war_nvbugs_881631::host_path(systems,dst,src); +#endif +} // end assign_value() + + +#endif // msvc 2005 WAR + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/binary_search.h b/compat/thrust/system/cuda/detail/binary_search.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/binary_search.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/block/copy.h b/compat/thrust/system/cuda/detail/block/copy.h new file mode 100644 index 0000000..9cc786b --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/copy.h @@ -0,0 +1,223 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file copy.h + * \brief CUDA implementation of device-to-device copy, + * based on Gregory Diamos' memcpy code. + */ + +#pragma once + +#include + +#include + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + +namespace trivial_copy_detail +{ + + +template + inline __device__ thrust::pair quotient_and_remainder(Size n, Size d) +{ + Size quotient = n / d; + Size remainder = n - d * quotient; + return thrust::make_pair(quotient,remainder); +} // end quotient_and_remainder() + + +// assumes the addresses dst & src are aligned to T boundaries +template +__device__ __thrust_forceinline__ +void aligned_copy(Context context, T *dst, const T *src, unsigned int num_elements) +{ + for(unsigned int i = context.thread_index(); + i < num_elements; + i += context.block_dimension()) + { + dst[i] = src[i]; + } +} // end aligned_copy() + + +} // end namespace trivial_copy_detail + + +template +__device__ __thrust_forceinline__ +void trivial_copy(Context context, void* destination_, const void* source_, size_t num_bytes) +{ + // reinterpret at bytes + char* destination = reinterpret_cast(destination_); + const char* source = reinterpret_cast(source_); + + // TODO replace this with uint64 +#if THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC + typedef long long int2; + typedef long long uint2; +#endif // THRUST_DEVICE_COMPILER_NVCC + + // check alignment + // XXX can we do this in three steps? + // 1. copy until alignment is met + // 2. go hog wild + // 3. get the remainder + if(reinterpret_cast(destination) % sizeof(uint2) != 0 || reinterpret_cast(source) % sizeof(uint2) != 0) + { + for(unsigned int i = context.thread_index(); i < num_bytes; i += context.block_dimension()) + { + destination[i] = source[i]; + } + } + else + { + // it's aligned; do a wide copy + + // this pair stores the number of int2s in the aligned portion of the arrays + // and the number of bytes in the remainder + const thrust::pair num_wide_elements_and_remainder_bytes = trivial_copy_detail::quotient_and_remainder(num_bytes, sizeof(int2)); + + // copy int2 elements + trivial_copy_detail::aligned_copy(context, + reinterpret_cast(destination), + reinterpret_cast(source), + num_wide_elements_and_remainder_bytes.first); + + // XXX we could copy int elements here + + // copy remainder byte by byte + + // to find the beginning of the remainder arrays, we need to point at the beginning, and then skip the number of bytes in the aligned portion + // this is sizeof(int2) times the number of int2s comprising the aligned portion + const char *remainder_first = reinterpret_cast(source + sizeof(int2) * num_wide_elements_and_remainder_bytes.first); + char *remainder_result = reinterpret_cast(destination + sizeof(int2) * num_wide_elements_and_remainder_bytes.first); + + trivial_copy_detail::aligned_copy(context, remainder_result, remainder_first, num_wide_elements_and_remainder_bytes.second); + } +} // end trivial_copy() + + +namespace detail +{ +namespace dispatch +{ + +template + __thrust_forceinline__ __device__ + RandomAccessIterator2 copy(Context context, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + RandomAccessIterator2 result, + thrust::detail::true_type is_trivial_copy) +{ + typedef typename thrust::iterator_value::type T; + + const T *src = &thrust::raw_reference_cast(*first); + T *dst = &thrust::raw_reference_cast(*result); + + size_t n = (last - first); + thrust::system::cuda::detail::block::trivial_copy(context, dst, src, n * sizeof(T)); + return result + n; +} // end copy() + +template + __thrust_forceinline__ __device__ + RandomAccessIterator2 copy(Context context, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + RandomAccessIterator2 result, + thrust::detail::false_type is_trivial_copy) +{ + RandomAccessIterator2 end_of_output = result + (last - first); + + // advance iterators + first += context.thread_index(); + result += context.thread_index(); + + for(; + first < last; + first += context.block_dimension(), + result += context.block_dimension()) + { + *result = *first; + } // end for + + return end_of_output; +} // end copy() + +} // end namespace dispatch +} // end namespace detail + +template + __thrust_forceinline__ __device__ + RandomAccessIterator2 copy(Context context, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + RandomAccessIterator2 result) +{ + return detail::dispatch::copy(context, first, last, result, +#if __CUDA_ARCH__ < 200 + // does not work reliably on pre-Fermi due to "Warning: ... assuming global memory space" issues + thrust::detail::false_type() +#else + typename thrust::detail::dispatch::is_trivial_copy::type() +#endif + ); +} // end copy() + + +template +inline __device__ +RandomAccessIterator2 copy_n(Context &ctx, RandomAccessIterator1 first, Size n, RandomAccessIterator2 result) +{ + for(Size i = ctx.thread_index(); i < n; i += ctx.block_dimension()) + { + result[i] = first[i]; + } + + ctx.barrier(); + + return result + n; +} + + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/block/exclusive_scan.h b/compat/thrust/system/cuda/detail/block/exclusive_scan.h new file mode 100644 index 0000000..580a757 --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/exclusive_scan.h @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + + +template +inline __device__ +typename thrust::iterator_value::type + inplace_exclusive_scan(Context &ctx, RandomAccessIterator first, T init, BinaryFunction op) +{ + // perform an inclusive scan, then shift right + block::inplace_inclusive_scan(ctx, first, op); + + typename thrust::iterator_value::type carry = first[ctx.block_dimension() - 1]; + + ctx.barrier(); + + typename thrust::iterator_value::type left = (ctx.thread_index() == 0) ? init : first[ctx.thread_index() - 1]; + + ctx.barrier(); + + first[ctx.thread_index()] = left; + + ctx.barrier(); + + return carry; +} + + +template +inline __device__ + typename thrust::iterator_value::type + inplace_exclusive_scan(Context &ctx, Iterator first, T init) +{ + return block::inplace_exclusive_scan(ctx, first, init, thrust::plus::type>()); +} + + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/block/inclusive_scan.h b/compat/thrust/system/cuda/detail/block/inclusive_scan.h new file mode 100644 index 0000000..012f7cd --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/inclusive_scan.h @@ -0,0 +1,191 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + +template +__device__ __thrust_forceinline__ +void inclusive_scan(Context context, + InputIterator first, + BinaryFunction binary_op) +{ + // TODO generalize to arbitrary n + // TODO support dynamic block_size + const unsigned int block_size = Context::ThreadsPerBlock::value; + + typename thrust::iterator_value::type val = first[context.thread_index()]; + + if(block_size > 1) { if (context.thread_index() >= 1) { val = binary_op(first[context.thread_index() - 1], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 2) { if (context.thread_index() >= 2) { val = binary_op(first[context.thread_index() - 2], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 4) { if (context.thread_index() >= 4) { val = binary_op(first[context.thread_index() - 4], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 8) { if (context.thread_index() >= 8) { val = binary_op(first[context.thread_index() - 8], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 16) { if (context.thread_index() >= 16) { val = binary_op(first[context.thread_index() - 16], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 32) { if (context.thread_index() >= 32) { val = binary_op(first[context.thread_index() - 32], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 64) { if (context.thread_index() >= 64) { val = binary_op(first[context.thread_index() - 64], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 128) { if (context.thread_index() >= 128) { val = binary_op(first[context.thread_index() - 128], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 256) { if (context.thread_index() >= 256) { val = binary_op(first[context.thread_index() - 256], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 512) { if (context.thread_index() >= 512) { val = binary_op(first[context.thread_index() - 512], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } + if(block_size > 1024) { if (context.thread_index() >= 1024) { val = binary_op(first[context.thread_index() - 1024], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } +} // end inclusive_scan() + + +template +__device__ __thrust_forceinline__ +void inclusive_scan_n(Context context, + InputIterator first, + Size n, + BinaryFunction binary_op) +{ + // TODO support n > context.block_dimension() + typename thrust::iterator_value::type val = first[context.thread_index()]; + + for (unsigned int i = 1; i < n; i <<= 1) + { + if (context.thread_index() < n && context.thread_index() >= i) + val = binary_op(first[context.thread_index() - i], val); + + context.barrier(); + + first[context.thread_index()] = val; + + context.barrier(); + } +} // end inclusive_scan() + + +template +__device__ __thrust_forceinline__ +void inclusive_scan_by_flag(Context context, + InputIterator1 first1, + InputIterator2 first2, + BinaryFunction binary_op) +{ + // TODO generalize to arbitrary n + // TODO support dynamic block_size + const unsigned int block_size = Context::ThreadsPerBlock::value; + + typename thrust::iterator_value::type flg = first1[context.thread_index()]; + typename thrust::iterator_value::type val = first2[context.thread_index()]; + + if(block_size > 1) { if (context.thread_index() >= 1) { if (!flg) { flg |= first1[context.thread_index() - 1]; val = binary_op(first2[context.thread_index() - 1], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 2) { if (context.thread_index() >= 2) { if (!flg) { flg |= first1[context.thread_index() - 2]; val = binary_op(first2[context.thread_index() - 2], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 4) { if (context.thread_index() >= 4) { if (!flg) { flg |= first1[context.thread_index() - 4]; val = binary_op(first2[context.thread_index() - 4], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 8) { if (context.thread_index() >= 8) { if (!flg) { flg |= first1[context.thread_index() - 8]; val = binary_op(first2[context.thread_index() - 8], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 16) { if (context.thread_index() >= 16) { if (!flg) { flg |= first1[context.thread_index() - 16]; val = binary_op(first2[context.thread_index() - 16], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 32) { if (context.thread_index() >= 32) { if (!flg) { flg |= first1[context.thread_index() - 32]; val = binary_op(first2[context.thread_index() - 32], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 64) { if (context.thread_index() >= 64) { if (!flg) { flg |= first1[context.thread_index() - 64]; val = binary_op(first2[context.thread_index() - 64], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 128) { if (context.thread_index() >= 128) { if (!flg) { flg |= first1[context.thread_index() - 128]; val = binary_op(first2[context.thread_index() - 128], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 256) { if (context.thread_index() >= 256) { if (!flg) { flg |= first1[context.thread_index() - 256]; val = binary_op(first2[context.thread_index() - 256], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 512) { if (context.thread_index() >= 512) { if (!flg) { flg |= first1[context.thread_index() - 512]; val = binary_op(first2[context.thread_index() - 512], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } + if(block_size > 1024) { if (context.thread_index() >= 1024) { if (!flg) { flg |= first1[context.thread_index() - 1024]; val = binary_op(first2[context.thread_index() - 1024], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } +} // end inclusive_scan_by_flag() + + +template +__device__ __thrust_forceinline__ +void inclusive_scan_by_flag_n(Context context, + InputIterator1 first1, + InputIterator2 first2, + Size n, + BinaryFunction binary_op) +{ + // TODO support n > context.block_dimension() + typename thrust::iterator_value::type flg = first1[context.thread_index()]; + typename thrust::iterator_value::type val = first2[context.thread_index()]; + + for (unsigned int i = 1; i < n; i <<= 1) + { + if (context.thread_index() < n && context.thread_index() >= i) + { + if (!flg) + { + flg |= first1[context.thread_index() - i]; + val = binary_op(first2[context.thread_index() - i], val); + } + } + + context.barrier(); + + first1[context.thread_index()] = flg; + first2[context.thread_index()] = val; + + context.barrier(); + } +} // end inclusive_scan_by_flag() + + +template +__device__ __thrust_forceinline__ +void inplace_inclusive_scan(Context &ctx, RandomAccessIterator first, BinaryFunction op) +{ + typename thrust::iterator_value::type x = first[ctx.thread_index()]; + + for(unsigned int offset = 1; offset < ctx.block_dimension(); offset *= 2) + { + if(ctx.thread_index() >= offset) + { + x = op(first[ctx.thread_index() - offset], x); + } + + ctx.barrier(); + + first[ctx.thread_index()] = x; + + ctx.barrier(); + } +} + + +template +__device__ __thrust_forceinline__ +void inplace_inclusive_scan(Context &ctx, RandomAccessIterator first) +{ + block::inplace_inclusive_scan(ctx, first, thrust::plus::type>()); +} + + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/block/merge.h b/compat/thrust/system/cuda/detail/block/merge.h new file mode 100644 index 0000000..9af0b7b --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/merge.h @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + +template +__device__ __thrust_forceinline__ + RandomAccessIterator3 merge(Context context, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + StrictWeakOrdering comp); + +// XXX assumes that context.block_dimension() <= n1 and +// context.block_dimension() <= n2 +// This algorithm is analogous to inplace_merge +// but instead of working on the ranges +// [first, middle) and [middle, last) +// it works on the ranges +// [first, first + n1) and [first + n1, first + n1 + n2) +template +__device__ __thrust_forceinline__ + void inplace_merge_by_key_n(Context context, + RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + Size1 n1, + Size2 n2, + StrictWeakOrdering comp); + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/block/merge.inl b/compat/thrust/system/cuda/detail/block/merge.inl new file mode 100644 index 0000000..5eae2b5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/merge.inl @@ -0,0 +1,168 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + +template +__device__ __thrust_forceinline__ + RandomAccessIterator3 merge(Context context, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_difference::type difference1; + typedef typename thrust::iterator_difference::type difference2; + + difference1 n1 = last1 - first1; + difference2 n2 = last2 - first2; + + // find the rank of each element in the other array + difference2 rank2 = 0; + if(context.thread_index() < n1) + { + RandomAccessIterator1 x = first1; + x += context.thread_index(); + + // lower_bound ensures that x sorts before any equivalent element of input2 + // this ensures stability + rank2 = thrust::system::detail::generic::scalar::lower_bound(first2, last2, raw_reference_cast(*x), comp) - first2; + } // end if + + difference1 rank1 = 0; + if(context.thread_index() < n2) + { + RandomAccessIterator2 x = first2 + context.thread_index(); + + // upper_bound ensures that x sorts before any equivalent element of input1 + // this ensures stability + rank1 = thrust::system::detail::generic::scalar::upper_bound(first1, last1, raw_reference_cast(*x), comp) - first1; + } // end if + + if(context.thread_index() < n1) + { + // scatter each element from input1 + RandomAccessIterator1 src = first1 + context.thread_index(); + RandomAccessIterator3 dst = result + context.thread_index() + rank2; + + *dst = *src; + } + + if(context.thread_index() < n2) + { + // scatter each element from input2 + RandomAccessIterator2 src = first2 + context.thread_index(); + RandomAccessIterator3 dst = result + context.thread_index() + rank1; + + *dst = *src; + } + + return result + n1 + n2; +} // end merge + + +template +__device__ __thrust_forceinline__ + void inplace_merge_by_key_n(Context context, + RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + Size1 n1, + Size2 n2, + StrictWeakOrdering comp) +{ + RandomAccessIterator1 input1 = keys_first; + RandomAccessIterator1 input2 = keys_first + n1; + + RandomAccessIterator2 input1val = values_first; + RandomAccessIterator2 input2val = values_first + n1; + + typedef typename thrust::iterator_value::type KeyType; + typedef typename thrust::iterator_value::type ValueType; + + // XXX use uninitialized here + KeyType inp1 = input1[context.thread_index()]; ValueType inp1val = input1val[context.thread_index()]; + KeyType inp2 = input2[context.thread_index()]; ValueType inp2val = input2val[context.thread_index()]; + + // to merge input1 and input2, use binary search to find the rank of inp1 & inp2 in arrays input2 & input1, respectively + // as before, the "end" variables point to one element after the last element of the arrays + + // start by looking through input2 for inp1's rank + unsigned int start_1 = 0; + + // don't do the search if our value is beyond the end of input1 + if(context.thread_index() < n1) + { + start_1 = thrust::system::detail::generic::scalar::lower_bound_n(input2, n2, inp1, comp) - input2; + } // end if + + // now look through input1 for inp2's rank + unsigned int start_2 = 0; + + // don't do the search if our value is beyond the end of input2 + if(context.thread_index() < n2) + { + // upper_bound ensures that equivalent elements in the first range sort before the second + start_2 = thrust::system::detail::generic::scalar::upper_bound_n(input1, n1, inp2, comp) - input1; + } // end if + + context.barrier(); + + // Write back into the right position to the input arrays; can be done in place since we read in + // the input arrays into registers before. + if(context.thread_index() < n1) + { + input1[start_1 + context.thread_index()] = inp1; + input1val[start_1 + context.thread_index()] = inp1val; + } // end if + + if(context.thread_index() < n2) + { + input1[start_2 + context.thread_index()] = inp2; + input1val[start_2 + context.thread_index()] = inp2val; + } // end if +} // end inplace_merge_by_key_n() + + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/block/merging_sort.h b/compat/thrust/system/cuda/detail/block/merging_sort.h new file mode 100644 index 0000000..8f8f999 --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/merging_sort.h @@ -0,0 +1,199 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file merging_sort.h + * \brief Block version of merge sort + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + + +template +__device__ void conditional_swap(RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + const unsigned int i, + const unsigned int end, + bool pred, + Compare comp) +{ + typedef typename thrust::iterator_traits::value_type KeyType; + typedef typename thrust::iterator_traits::value_type ValueType; + + if(pred && i+1 +__device__ void transposition_sort(Context context, + RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + const unsigned int i, + const unsigned int end, + const unsigned int size, + Compare comp) +{ + const bool is_odd = i&0x1; + + for(unsigned int round=size/2; round>0; --round) + { + // ODDS + conditional_swap(keys_first, values_first, i, end, is_odd, comp); + context.barrier(); + + // EVENS + conditional_swap(keys_first, values_first, i, end, !is_odd, comp); + context.barrier(); + } +} + +template +__device__ void merge(Context context, + RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + const unsigned int i, + const unsigned int n, + unsigned int begin, + unsigned int end, + unsigned int h, + StrictWeakOrdering cmp) +{ + // INVARIANT: Every element i resides within a sequence [begin,end) + // of length h which is already sorted + while( h::value_type KeyType; + typedef typename thrust::iterator_traits::value_type ValueType; + + KeyType key; + ValueType value; + + unsigned int rank = i - begin; + + // prevent out-of-bounds access + if(i < new_end) + { + key = keys_first[i]; + + if(begin==new_begin) // in the left side of merging pair + { + RandomAccessIterator1 result = thrust::system::detail::generic::scalar::lower_bound_n(keys_first+end, new_end-end, key, cmp); + rank += (result - (keys_first+end)); + } + else // in the right side of merging pair + { + RandomAccessIterator1 result = thrust::system::detail::generic::scalar::upper_bound_n(keys_first+new_begin, begin-new_begin, key, cmp); + rank += (result - (keys_first+new_begin)); + } + + value = values_first[i]; + } + + context.barrier(); + + if(i < new_end) + { + keys_first[new_begin+rank] = key; + values_first[new_begin+rank] = value; + } + + context.barrier(); + + begin = new_begin; + end = new_end; + } +} + + +/*! Block-wise implementation of merge sort. + * It provides the same external interface as odd_even_sort. + */ +template +__device__ void merging_sort(Context context, + RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + const unsigned int n, + StrictWeakOrdering comp) +{ + // Phase 1: Sort subsequences of length 32 using odd-even + // transposition sort. The code below assumes that h is a + // power of 2. Empirically, 32 delivers best results, + // which is not surprising since that's the warp width. + unsigned int i = context.thread_index(); + unsigned int h = 32; + unsigned int begin=i&(~(h-1)), end=min(n,begin+h); + + transposition_sort(context, keys_first, values_first, i, end, h, comp); + + // Phase 2: Apply merge tree to produce final sorted results + merge(context, keys_first, values_first, i, n, begin, end, h, comp); +} // end merging_sort() + + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/block/odd_even_sort.h b/compat/thrust/system/cuda/detail/block/odd_even_sort.h new file mode 100644 index 0000000..0fa0ea0 --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/odd_even_sort.h @@ -0,0 +1,151 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file odd_even_sort.h + * \brief Block versions of Batcher's Odd-Even Merge Sort + */ + +#pragma once + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + + +/*! Block-wise implementation of Batcher's Odd-Even Merge Sort + * This implementation is based on Nadathur Satish's. + */ +template + __device__ void odd_even_sort(KeyType *keys, + ValueType *data, + const unsigned int n, + StrictWeakOrdering comp) +{ + for(unsigned int p = blockDim.x>>1; p > 0; p >>= 1) + { + unsigned int q = blockDim.x>>1, r = 0, d = p; + + while(q >= p) + { + unsigned int j = threadIdx.x + d; + + // if j lies beyond the end of the array, we consider it "sorted" wrt i + // regardless of whether i lies beyond the end of the array + if(threadIdx.x < (blockDim.x-d) && (threadIdx.x & p) == r && j < n) + { + KeyType xikey = keys[threadIdx.x]; + KeyType xjkey = keys[j]; + + ValueType xivalue = data[threadIdx.x]; + ValueType xjvalue = data[j]; + + // does xj sort before xi? + if(comp(xjkey, xikey)) + { + keys[threadIdx.x] = xjkey; + keys[j] = xikey; + + data[threadIdx.x] = xjvalue; + data[j] = xivalue; + } // end if + } // end if + + d = q - p; + q >>= 1; + r = p; + + __syncthreads(); + } // end while + } // end for p +} // end odd_even_sort() + +template + __device__ void stable_odd_even_sort(KeyType *keys, + ValueType *data, + const unsigned int n, + StrictWeakOrdering comp) +{ + for(unsigned int i = 0; + i < blockDim.x>>1; + ++i) + { + bool thread_is_odd = threadIdx.x & 0x1; + + // do odds first + if(thread_is_odd && threadIdx.x + 1 < n) + { + KeyType xikey = keys[threadIdx.x]; + KeyType xjkey = keys[threadIdx.x + 1]; + + ValueType xivalue = data[threadIdx.x]; + ValueType xjvalue = data[threadIdx.x + 1]; + + // does xj sort before xi? + if(comp(xjkey, xikey)) + { + keys[threadIdx.x] = xjkey; + keys[threadIdx.x + 1] = xikey; + + data[threadIdx.x] = xjvalue; + data[threadIdx.x + 1] = xivalue; + } // end if + } // end if + + __syncthreads(); + + // do evens second + if(!thread_is_odd && threadIdx.x + 1 < n) + { + KeyType xikey = keys[threadIdx.x]; + KeyType xjkey = keys[threadIdx.x + 1]; + + ValueType xivalue = data[threadIdx.x]; + ValueType xjvalue = data[threadIdx.x + 1]; + + // does xj sort before xi? + if(comp(xjkey, xikey)) + { + keys[threadIdx.x] = xjkey; + keys[threadIdx.x + 1] = xikey; + + data[threadIdx.x] = xjvalue; + data[threadIdx.x + 1] = xivalue; + } // end if + } // end if + + __syncthreads(); + } // end for i +} // end stable_odd_even_sort() + + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/block/reduce.h b/compat/thrust/system/cuda/detail/block/reduce.h new file mode 100644 index 0000000..e0a1901 --- /dev/null +++ b/compat/thrust/system/cuda/detail/block/reduce.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace block +{ + +/* Reduces [data, data + n) using binary_op and stores the result in data[0] + * + * Upon return the elements in [data + 1, data + n) have unspecified values. + */ +template +__device__ __thrust_forceinline__ +void reduce_n(Context context, ValueIterator data, unsigned int n, BinaryFunction binary_op) +{ + if (context.block_dimension() < n) + { + for (unsigned int i = context.block_dimension() + context.thread_index(); i < n; i += context.block_dimension()) + data[context.thread_index()] = binary_op(data[context.thread_index()], data[i]); + + context.barrier(); + } + + while (n > 1) + { + unsigned int half = n / 2; + + if (context.thread_index() < half) + data[context.thread_index()] = binary_op(data[context.thread_index()], data[n - context.thread_index() - 1]); + + context.barrier(); + + n = n - half; + } +} + +} // end namespace block +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/copy.h b/compat/thrust/system/cuda/detail/copy.h new file mode 100644 index 0000000..8f7ee97 --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy.h @@ -0,0 +1,79 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template + OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template + OutputIterator copy(cross_system exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template + OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result); + + +template + OutputIterator copy_n(cross_system exec, + InputIterator first, + Size n, + OutputIterator result); + + +} // end detail +} // end cuda +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/copy.inl b/compat/thrust/system/cuda/detail/copy.inl new file mode 100644 index 0000000..125eebd --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy.inl @@ -0,0 +1,88 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template + OutputIterator copy(execution_policy &system, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + return thrust::system::cuda::detail::copy_device_to_device(system,first,last,result); +} // end copy() + + +template + OutputIterator copy(cross_system systems, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + return thrust::system::cuda::detail::copy_cross_system(systems,first,last,result); +} // end copy() + + +template + OutputIterator copy_n(execution_policy &system, + InputIterator first, + Size n, + OutputIterator result) +{ + return thrust::system::cuda::detail::copy_device_to_device(system,first,first+n,result); +} // end copy_n() + + +template + OutputIterator copy_n(cross_system systems, + InputIterator first, + Size n, + OutputIterator result) +{ + return thrust::system::cuda::detail::copy_cross_system_n(systems,first,n,result); +} // end copy_n() + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/copy_cross_system.h b/compat/thrust/system/cuda/detail/copy_cross_system.h new file mode 100644 index 0000000..f68ea3c --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy_cross_system.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template + OutputIterator copy_cross_system(cross_system systems, + InputIterator begin, + InputIterator end, + OutputIterator result); + + +template + OutputIterator copy_cross_system_n(cross_system systems, + InputIterator begin, + Size n, + OutputIterator result); + + +} // end detail +} // end cuda +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/copy_cross_system.inl b/compat/thrust/system/cuda/detail/copy_cross_system.inl new file mode 100644 index 0000000..861cb2c --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy_cross_system.inl @@ -0,0 +1,301 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +// XXX WAR circular #inclusion problem +template class temporary_array; + +} // end detail + +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +// general input to random access case +template + RandomAccessIterator copy_cross_system(cross_system systems, + InputIterator begin, + InputIterator end, + RandomAccessIterator result, + thrust::incrementable_traversal_tag, + thrust::random_access_traversal_tag) +{ + //std::cerr << std::endl; + //std::cerr << "general copy_host_to_device(): InputIterator: " << typeid(InputIterator).name() << std::endl; + //std::cerr << "general copy_host_to_device(): OutputIterator: " << typeid(OutputIterator).name() << std::endl; + + typedef typename thrust::iterator_value::type InputType; + + // allocate temporary storage in System1 + thrust::detail::temporary_array temp(systems.system1,begin,end); + return thrust::copy(systems, temp.begin(), temp.end(), result); +} + +template + RandomAccessIterator copy_cross_system_n(cross_system systems, + InputIterator first, + Size n, + RandomAccessIterator result, + thrust::incrementable_traversal_tag, + thrust::random_access_traversal_tag) +{ + typedef typename thrust::iterator_value::type InputType; + + // allocate and copy to temporary storage System1 + thrust::detail::temporary_array temp(systems.system1, first, n); + + // recurse + return copy_cross_system(systems, temp.begin(), temp.end(), result); +} + + +// random access to general output case +template + OutputIterator copy_cross_system(cross_system systems, + RandomAccessIterator begin, + RandomAccessIterator end, + OutputIterator result, + thrust::random_access_traversal_tag, + thrust::incrementable_traversal_tag) +{ + typedef typename thrust::iterator_value::type InputType; + + // copy to temporary storage in System2 + thrust::detail::temporary_array temp(systems.system2, systems.system1, begin, end); + + return thrust::copy(systems.system2, temp.begin(), temp.end(), result); +} + +template + OutputIterator copy_cross_system_n(cross_system systems, + RandomAccessIterator first, + Size n, + OutputIterator result, + thrust::random_access_traversal_tag, + thrust::incrementable_traversal_tag) +{ + typedef typename thrust::iterator_value::type InputType; + + // copy to temporary storage in System2 + thrust::detail::temporary_array temp(systems.system2, systems.system1, first, n); + + // copy temp to result + return thrust::copy(systems.system2, temp.begin(), temp.end(), result); +} + + +// trivial copy +template + RandomAccessIterator2 copy_cross_system(cross_system systems, + RandomAccessIterator1 begin, + RandomAccessIterator1 end, + RandomAccessIterator2 result, + thrust::random_access_traversal_tag, + thrust::random_access_traversal_tag, + thrust::detail::true_type) // trivial copy +{ +// std::cerr << std::endl; +// std::cerr << "random access copy_device_to_host(): trivial" << std::endl; +// std::cerr << "general copy_device_to_host(): RandomAccessIterator1: " << typeid(RandomAccessIterator1).name() << std::endl; +// std::cerr << "general copy_device_to_host(): RandomAccessIterator2: " << typeid(RandomAccessIterator2).name() << std::endl; + + // how many elements to copy? + typename thrust::iterator_traits::difference_type n = end - begin; + + thrust::system::cuda::detail::trivial_copy_n(systems, begin, n, result); + + return result + n; +} + + +namespace detail +{ + +// random access non-trivial iterator to random access iterator +template + RandomAccessIterator2 non_trivial_random_access_copy_cross_system(cross_system systems, + RandomAccessIterator1 begin, + RandomAccessIterator1 end, + RandomAccessIterator2 result, + thrust::detail::false_type) // InputIterator is non-trivial +{ + // copy the input to a temporary input system buffer of OutputType + typedef typename thrust::iterator_value::type OutputType; + + // allocate temporary storage in System1 + thrust::detail::temporary_array temp(systems.system1, begin, end); + + // recurse + return copy_cross_system(systems, temp.begin(), temp.end(), result); +} + +template + RandomAccessIterator2 non_trivial_random_access_copy_cross_system(cross_system systems, + RandomAccessIterator1 begin, + RandomAccessIterator1 end, + RandomAccessIterator2 result, + thrust::detail::true_type) // InputIterator is trivial +{ + typename thrust::iterator_difference::type n = thrust::distance(begin, end); + + // allocate temporary storage in System2 + // retain the input's type for the intermediate storage + // do not initialize the storage (the 0 does this) + typedef typename thrust::iterator_value::type InputType; + thrust::detail::temporary_array temp(0, systems.system2, n); + + // force a trivial (memcpy) copy of the input to the temporary + // note that this will not correctly account for copy constructors + // but there's nothing we can do about that + // XXX one thing we might try is to use pinned memory for the temporary storage + // this might allow us to correctly account for copy constructors + thrust::system::cuda::detail::trivial_copy_n(systems, begin, n, temp.begin()); + + // finally, copy to the result + return thrust::copy(systems.system2, temp.begin(), temp.end(), result); +} + +} // end detail + + +// random access iterator to random access host iterator with non-trivial copy +template + RandomAccessIterator2 copy_cross_system(cross_system systems, + RandomAccessIterator1 begin, + RandomAccessIterator1 end, + RandomAccessIterator2 result, + thrust::random_access_traversal_tag, + thrust::random_access_traversal_tag, + thrust::detail::false_type) // is_trivial_copy +{ + // dispatch a non-trivial random access cross system copy based on whether or not the InputIterator is trivial + return detail::non_trivial_random_access_copy_cross_system(systems, begin, end, result, + typename thrust::detail::is_trivial_iterator::type()); +} + +// random access iterator to random access iterator +template + RandomAccessIterator2 copy_cross_system(cross_system systems, + RandomAccessIterator1 begin, + RandomAccessIterator1 end, + RandomAccessIterator2 result, + thrust::random_access_traversal_tag input_traversal, + thrust::random_access_traversal_tag output_traversal) +{ + // dispatch on whether this is a trivial copy + return copy_cross_system(systems, begin, end, result, input_traversal, output_traversal, + typename thrust::detail::dispatch::is_trivial_copy::type()); +} + +template + RandomAccessIterator2 copy_cross_system_n(cross_system systems, + RandomAccessIterator1 first, + Size n, + RandomAccessIterator2 result, + thrust::random_access_traversal_tag input_traversal, + thrust::random_access_traversal_tag output_traversal) +{ + // implement with copy_cross_system + return copy_cross_system(systems, first, first + n, result, input_traversal, output_traversal); +} + +///////////////// +// Entry Point // +///////////////// + +template + OutputIterator copy_cross_system(cross_system systems, + InputIterator begin, + InputIterator end, + OutputIterator result) +{ + return copy_cross_system(systems, begin, end, result, + typename thrust::iterator_traversal::type(), + typename thrust::iterator_traversal::type()); +} + +template + OutputIterator copy_cross_system_n(cross_system systems, + InputIterator begin, + Size n, + OutputIterator result) +{ + return copy_cross_system_n(systems, begin, n, result, + typename thrust::iterator_traversal::type(), + typename thrust::iterator_traversal::type()); +} + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/copy_device_to_device.h b/compat/thrust/system/cuda/detail/copy_device_to_device.h new file mode 100644 index 0000000..a7d8df8 --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy_device_to_device.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file copy_device_to_device.h + * \brief Device implementations for copying on the device. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template + OutputIterator copy_device_to_device(execution_policy &exec, + InputIterator begin, + InputIterator end, + OutputIterator result); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/copy_device_to_device.inl b/compat/thrust/system/cuda/detail/copy_device_to_device.inl new file mode 100644 index 0000000..c8263c5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy_device_to_device.inl @@ -0,0 +1,127 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template + OutputIterator copy_device_to_device(execution_policy &exec, + InputIterator begin, + InputIterator end, + OutputIterator result, + thrust::detail::false_type) +{ + // general case (mixed types) + typedef typename thrust::iterator_traits::value_type InputType; + +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + return thrust::transform(exec, begin, end, result, thrust::identity()); +#else + // we're not compiling with nvcc: copy [begin, end) to temp host memory + typename thrust::iterator_traits::difference_type n = thrust::distance(begin, end); + + thrust::host_system_tag temp_exec; + thrust::detail::temporary_array temp1(temp_exec, begin, end); + + // transform temp1 to OutputType in host memory + typedef typename thrust::iterator_traits::value_type OutputType; + + thrust::detail::temporary_array temp2(temp_exec, temp1.begin(), temp1.end()); + + // copy temp2 to device + result = thrust::system::cuda::detail::copy_cross_system(temp2.begin(), temp2.end(), result); + + return result; +#endif // THRUST_DEVICE_COMPILER_NVCC +} + + +template + OutputIterator copy_device_to_device(execution_policy &exec, + InputIterator begin, + InputIterator end, + OutputIterator result, + thrust::detail::true_type) +{ + // specialization for device to device when the value_types match, operator= is not overloaded, + // and the iterators are pointers + + // how many elements to copy? + typename thrust::iterator_traits::difference_type n = end - begin; + + thrust::system::cuda::detail::trivial_copy_n(exec, begin, n, result); + + return result + n; +} + +} // end namespace detail + +///////////////// +// Entry Point // +///////////////// + +template + OutputIterator copy_device_to_device(execution_policy &exec, + InputIterator begin, + InputIterator end, + OutputIterator result) +{ + typedef typename thrust::iterator_traits::value_type InputType; + typedef typename thrust::iterator_traits::value_type OutputType; + + const bool use_trivial_copy = + thrust::detail::is_same::value + && thrust::detail::is_trivial_iterator::value + && thrust::detail::is_trivial_iterator::value; + + // XXX WAR unused variable warning + (void) use_trivial_copy; + + return detail::copy_device_to_device(exec, begin, end, result, + thrust::detail::integral_constant()); + +} + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/copy_if.h b/compat/thrust/system/cuda/detail/copy_if.h new file mode 100644 index 0000000..5ed0f6c --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy_if.h @@ -0,0 +1,49 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + OutputIterator copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/copy_if.inl b/compat/thrust/system/cuda/detail/copy_if.inl new file mode 100644 index 0000000..15ea7fa --- /dev/null +++ b/compat/thrust/system/cuda/detail/copy_if.inl @@ -0,0 +1,212 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template +struct copy_if_intervals_closure +{ + InputIterator1 input; + InputIterator2 stencil; + InputIterator3 offsets; + Decomposition decomp; + OutputIterator output; + + typedef Context context_type; + context_type context; + + copy_if_intervals_closure(InputIterator1 input, + InputIterator2 stencil, + InputIterator3 offsets, + Decomposition decomp, + OutputIterator output, + Context context = Context()) + : input(input), stencil(stencil), offsets(offsets), decomp(decomp), output(output), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename thrust::iterator_value::type OutputType; + + typedef unsigned int PredicateType; + + const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; + + thrust::plus binary_op; + + __shared__ PredicateType sdata[CTA_SIZE]; context.barrier(); + + typedef typename Decomposition::index_type IndexType; + + // this block processes results in [range.begin(), range.end()) + thrust::system::detail::internal::index_range range = decomp[context.block_index()]; + + IndexType base = range.begin(); + + PredicateType predicate = 0; + + // advance input iterators to this thread's starting position + input += base + context.thread_index(); + stencil += base + context.thread_index(); + + // advance output to this interval's starting position + if (context.block_index() != 0) + { + InputIterator3 temp = offsets + (context.block_index() - 1); + output += *temp; + } + + // process full blocks + while(base + CTA_SIZE <= range.end()) + { + // read data + sdata[context.thread_index()] = predicate = *stencil; + + context.barrier(); + + // scan block + block::inclusive_scan(context, sdata, binary_op); + + // write data + if (predicate) + { + OutputIterator temp2 = output + (sdata[context.thread_index()] - 1); + *temp2 = *input; + } + + // advance inputs by CTA_SIZE + base += CTA_SIZE; + input += CTA_SIZE; + stencil += CTA_SIZE; + + // advance output by number of true predicates + output += sdata[CTA_SIZE - 1]; + + context.barrier(); + } + + // process partially full block at end of input (if necessary) + if (base < range.end()) + { + // read data + if (base + context.thread_index() < range.end()) + sdata[context.thread_index()] = predicate = *stencil; + else + sdata[context.thread_index()] = predicate = 0; + + context.barrier(); + + // scan block + block::inclusive_scan(context, sdata, binary_op); + + // write data + if (predicate) // expects predicate=false for >= interval_end + { + OutputIterator temp2 = output + (sdata[context.thread_index()] - 1); + *temp2 = *input; + } + } + } +}; // copy_if_intervals_closure + + +template + OutputIterator copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator output, + Predicate pred) +{ + typedef typename thrust::iterator_difference::type IndexType; + typedef typename thrust::iterator_value::type OutputType; + + if (first == last) + return output; + + typedef thrust::system::detail::internal::uniform_decomposition Decomposition; + typedef thrust::detail::temporary_array IndexArray; + + Decomposition decomp = default_decomposition(last - first); + + // storage for per-block predicate counts + IndexArray block_results(exec, decomp.size()); + + // convert stencil into an iterator that produces integral values in {0,1} + typedef typename thrust::detail::predicate_to_integral PredicateToIndexTransform; + typedef thrust::transform_iterator PredicateToIndexIterator; + + PredicateToIndexIterator predicate_stencil(stencil, PredicateToIndexTransform(pred)); + + // compute number of true values in each interval + thrust::system::cuda::detail::reduce_intervals(exec, predicate_stencil, block_results.begin(), thrust::plus(), decomp); + + // scan the partial sums + thrust::inclusive_scan(exec, block_results.begin(), block_results.end(), block_results.begin(), thrust::plus()); + + // copy values to output + const unsigned int ThreadsPerBlock = 256; + typedef typename IndexArray::iterator InputIterator3; + typedef detail::statically_blocked_thread_array Context; + typedef copy_if_intervals_closure Closure; + Closure closure(first, predicate_stencil, block_results.begin(), decomp, output); + detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); + + return output + block_results[decomp.size() - 1]; +} // end copy_if() + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + diff --git a/compat/thrust/system/cuda/detail/count.h b/compat/thrust/system/cuda/detail/count.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/count.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/cuda_launch_config.h b/compat/thrust/system/cuda/detail/cuda_launch_config.h new file mode 100644 index 0000000..b7f0ca2 --- /dev/null +++ b/compat/thrust/system/cuda/detail/cuda_launch_config.h @@ -0,0 +1,384 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +// XXX define our own device_properties_t to avoid errors when #including +// this file in the absence of a CUDA installation +struct device_properties_t +{ + // mirror the type and spelling of cudaDeviceProp's members + // keep these alphabetized + int major; + int maxGridSize[3]; + int maxThreadsPerBlock; + int maxThreadsPerMultiProcessor; + int minor; + int multiProcessorCount; + int regsPerBlock; + size_t sharedMemPerBlock; + int warpSize; +}; + + +// XXX define our own device_properties_t to avoid errors when #including +// this file in the absence of a CUDA installation +struct function_attributes_t +{ + // mirror the type and spelling of cudaFuncAttributes' members + // keep these alphabetized + size_t constSizeBytes; + size_t localSizeBytes; + int maxThreadsPerBlock; + int numRegs; + size_t sharedSizeBytes; +}; + + +/*! Computes a block size in number of threads for a CUDA kernel using a occupancy-promoting heuristic. + * \param attributes The cudaFuncAttributes corresponding to a __global__ function of interest on a GPU of interest. + * \param properties The cudaDeviceProp corresponding to a GPU on which to launch the __global__ function of interest. + * \return A CUDA block size, in number of threads, which the resources of the GPU's streaming multiprocessor can + * accomodate and which is intended to promote occupancy. The result is equivalent to the one performed by + * the "CUDA Occupancy Calculator". + * \note The __global__ function of interest is presumed to use 0 bytes of dynamically-allocated __shared__ memory. + */ +inline __host__ __device__ +std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, + const device_properties_t &properties); + +/*! Computes a block size in number of threads for a CUDA kernel using a occupancy-promoting heuristic. + * Use this version of the function when a CUDA block's dynamically-allocated __shared__ memory requirements + * vary with the size of the block. + * \param attributes The cudaFuncAttributes corresponding to a __global__ function of interest on a GPU of interest. + * \param properties The cudaDeviceProp corresponding to a GPU on which to launch the __global__ function of interest. + * \param block_size_to_dynamic_smem_bytes A unary function which maps an integer CUDA block size to the number of bytes + * of dynamically-allocated __shared__ memory required by a CUDA block of that size. + * \return A CUDA block size, in number of threads, which the resources of the GPU's streaming multiprocessor can + * accomodate and which is intended to promote occupancy. The result is equivalent to the one performed by + * the "CUDA Occupancy Calculator". + */ +template +inline __host__ __device__ +std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, + const device_properties_t &properties, + UnaryFunction block_size_to_dynamic_smem_size); + + +/*! Returns the maximum amount of dynamic shared memory each block + * can utilize without reducing thread occupancy. + * + * \param properties CUDA device properties + * \param attributes CUDA function attributes + * \param blocks_per_processor Number of blocks per streaming multiprocessor + */ +inline __host__ __device__ +size_t proportional_smem_allocation(const device_properties_t &properties, + const function_attributes_t &attributes, + size_t blocks_per_processor); + + +template +inline __host__ __device__ +size_t max_blocksize_subject_to_smem_usage(const device_properties_t &properties, + const function_attributes_t &attributes, + UnaryFunction blocksize_to_dynamic_smem_usage); + + + +namespace cuda_launch_config_detail +{ + +using std::size_t; + +namespace util +{ + + +template +inline __host__ __device__ +T min_(const T &lhs, const T &rhs) +{ + return rhs < lhs ? rhs : lhs; +} + + +template +struct zero_function +{ + inline __host__ __device__ + T operator()(T) + { + return 0; + } +}; + + +// x/y rounding towards +infinity for integers, used to determine # of blocks/warps etc. +template + inline __host__ __device__ L divide_ri(const L x, const R y) +{ + return (x + (y - 1)) / y; +} + +// x/y rounding towards zero for integers, used to determine # of blocks/warps etc. +template + inline __host__ __device__ L divide_rz(const L x, const R y) +{ + return x / y; +} + +// round x towards infinity to the next multiple of y +template + inline __host__ __device__ L round_i(const L x, const R y){ return y * divide_ri(x, y); } + +// round x towards zero to the next multiple of y +template + inline __host__ __device__ L round_z(const L x, const R y){ return y * divide_rz(x, y); } + +} // end namespace util + + + +// granularity of shared memory allocation +inline __host__ __device__ +size_t smem_allocation_unit(const device_properties_t &properties) +{ + switch(properties.major) + { + case 1: return 512; + case 2: return 128; + case 3: return 256; + default: return 256; // unknown GPU; have to guess + } +} + + +// granularity of register allocation +inline __host__ __device__ +size_t reg_allocation_unit(const device_properties_t &properties, const size_t regsPerThread) +{ + switch(properties.major) + { + case 1: return (properties.minor <= 1) ? 256 : 512; + case 2: switch(regsPerThread) + { + case 21: + case 22: + case 29: + case 30: + case 37: + case 38: + case 45: + case 46: + return 128; + default: + return 64; + } + case 3: return 256; + default: return 256; // unknown GPU; have to guess + } +} + + +// granularity of warp allocation +inline __host__ __device__ +size_t warp_allocation_multiple(const device_properties_t &properties) +{ + return (properties.major <= 1) ? 2 : 1; +} + +// number of "sides" into which the multiprocessor is partitioned +inline __host__ __device__ +size_t num_sides_per_multiprocessor(const device_properties_t &properties) +{ + switch(properties.major) + { + case 1: return 1; + case 2: return 2; + case 3: return 4; + default: return 4; // unknown GPU; have to guess + } +} + + +inline __host__ __device__ +size_t max_blocks_per_multiprocessor(const device_properties_t &properties) +{ + return (properties.major <= 2) ? 8 : 16; +} + + +inline __host__ __device__ +size_t max_active_blocks_per_multiprocessor(const device_properties_t &properties, + const function_attributes_t &attributes, + int CTA_SIZE, + size_t dynamic_smem_bytes) +{ + // Determine the maximum number of CTAs that can be run simultaneously per SM + // This is equivalent to the calculation done in the CUDA Occupancy Calculator spreadsheet + + ////////////////////////////////////////// + // Limits due to threads/SM or blocks/SM + ////////////////////////////////////////// + const size_t maxThreadsPerSM = properties.maxThreadsPerMultiProcessor; // 768, 1024, 1536, etc. + const size_t maxBlocksPerSM = max_blocks_per_multiprocessor(properties); + + // Calc limits + const size_t ctaLimitThreads = (CTA_SIZE <= properties.maxThreadsPerBlock) ? maxThreadsPerSM / CTA_SIZE : 0; + const size_t ctaLimitBlocks = maxBlocksPerSM; + + ////////////////////////////////////////// + // Limits due to shared memory/SM + ////////////////////////////////////////// + const size_t smemAllocationUnit = smem_allocation_unit(properties); + const size_t smemBytes = attributes.sharedSizeBytes + dynamic_smem_bytes; + const size_t smemPerCTA = util::round_i(smemBytes, smemAllocationUnit); + + // Calc limit + const size_t ctaLimitSMem = smemPerCTA > 0 ? properties.sharedMemPerBlock / smemPerCTA : maxBlocksPerSM; + + ////////////////////////////////////////// + // Limits due to registers/SM + ////////////////////////////////////////// + const size_t regAllocationUnit = reg_allocation_unit(properties, attributes.numRegs); + const size_t warpAllocationMultiple = warp_allocation_multiple(properties); + const size_t numWarps = util::round_i(util::divide_ri(CTA_SIZE, properties.warpSize), warpAllocationMultiple); + + // Calc limit + size_t ctaLimitRegs; + if(properties.major <= 1) + { + // GPUs of compute capability 1.x allocate registers to CTAs + // Number of regs per block is regs per thread times number of warps times warp size, rounded up to allocation unit + const size_t regsPerCTA = util::round_i(attributes.numRegs * properties.warpSize * numWarps, regAllocationUnit); + ctaLimitRegs = regsPerCTA > 0 ? properties.regsPerBlock / regsPerCTA : maxBlocksPerSM; + } + else + { + // GPUs of compute capability 2.x and higher allocate registers to warps + // Number of regs per warp is regs per thread times times warp size, rounded up to allocation unit + const size_t regsPerWarp = util::round_i(attributes.numRegs * properties.warpSize, regAllocationUnit); + const size_t numSides = num_sides_per_multiprocessor(properties); + const size_t numRegsPerSide = properties.regsPerBlock / numSides; + ctaLimitRegs = regsPerWarp > 0 ? ((numRegsPerSide / regsPerWarp) * numSides) / numWarps : maxBlocksPerSM; + } + + ////////////////////////////////////////// + // Overall limit is min() of limits due to above reasons + ////////////////////////////////////////// + return util::min_(ctaLimitRegs, util::min_(ctaLimitSMem, util::min_(ctaLimitThreads, ctaLimitBlocks))); +} + + +} // end namespace cuda_launch_config_detail + + +template +inline __host__ __device__ +std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, + const device_properties_t &properties, + UnaryFunction block_size_to_dynamic_smem_size) +{ + size_t max_occupancy = properties.maxThreadsPerMultiProcessor; + size_t largest_blocksize = cuda_launch_config_detail::util::min_(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock); + size_t granularity = properties.warpSize; + size_t max_blocksize = 0; + size_t highest_occupancy = 0; + + for(size_t blocksize = largest_blocksize; blocksize != 0; blocksize -= granularity) + { + size_t occupancy = blocksize * cuda_launch_config_detail::max_active_blocks_per_multiprocessor(properties, attributes, blocksize, block_size_to_dynamic_smem_size(blocksize)); + + if(occupancy > highest_occupancy) + { + max_blocksize = blocksize; + highest_occupancy = occupancy; + } + + // early out, can't do better + if(highest_occupancy == max_occupancy) + break; + } + + return max_blocksize; +} + + +inline __host__ __device__ +std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, + const device_properties_t &properties) +{ + return block_size_with_maximum_potential_occupancy(attributes, properties, cuda_launch_config_detail::util::zero_function()); +} + + +inline __host__ __device__ +size_t proportional_smem_allocation(const device_properties_t &properties, + const function_attributes_t &attributes, + size_t blocks_per_processor) +{ + size_t smem_per_processor = properties.sharedMemPerBlock; + size_t smem_allocation_unit = cuda_launch_config_detail::smem_allocation_unit(properties); + + size_t total_smem_per_block = cuda_launch_config_detail::util::round_z(smem_per_processor / blocks_per_processor, smem_allocation_unit); + size_t static_smem_per_block = attributes.sharedSizeBytes; + + return total_smem_per_block - static_smem_per_block; +} + + +template +inline __host__ __device__ +size_t max_blocksize_subject_to_smem_usage(const device_properties_t &properties, + const function_attributes_t &attributes, + UnaryFunction blocksize_to_dynamic_smem_usage) +{ + size_t largest_blocksize = (thrust::min)(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock); + size_t granularity = properties.warpSize; + + for(int blocksize = largest_blocksize; blocksize > 0; blocksize -= granularity) + { + size_t total_smem_usage = blocksize_to_dynamic_smem_usage(blocksize) + attributes.sharedSizeBytes; + + if(total_smem_usage <= properties.sharedMemPerBlock) + { + return blocksize; + } + } + + return 0; +} + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/default_decomposition.h b/compat/thrust/system/cuda/detail/default_decomposition.h new file mode 100644 index 0000000..1ed6bcf --- /dev/null +++ b/compat/thrust/system/cuda/detail/default_decomposition.h @@ -0,0 +1,45 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file default_decomposition.h + * \brief Return a decomposition that is appropriate for the CUDA backend. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template +thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/default_decomposition.inl b/compat/thrust/system/cuda/detail/default_decomposition.inl new file mode 100644 index 0000000..3f0879a --- /dev/null +++ b/compat/thrust/system/cuda/detail/default_decomposition.inl @@ -0,0 +1,41 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template +thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n) +{ + // TODO eliminate magical constant + device_properties_t properties = device_properties(); + return thrust::system::detail::internal::uniform_decomposition(n, properties.maxThreadsPerBlock, 10 * properties.multiProcessorCount); +} + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/alignment.h b/compat/thrust/system/cuda/detail/detail/alignment.h new file mode 100644 index 0000000..31fdaaf --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/alignment.h @@ -0,0 +1,223 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace alignment_of_detail +{ + + +template class alignment_of_impl; + +template + struct helper +{ + static const std::size_t value = size_diff; +}; + +template + class helper +{ + public: + static const std::size_t value = alignment_of_impl::value; +}; + +template + class alignment_of_impl +{ + private: + struct big { T x; char c; }; + + public: + static const std::size_t value = helper::value; +}; + + +} // end alignment_of_detail + + +template + struct alignment_of + : alignment_of_detail::alignment_of_impl +{}; + + +template struct aligned_type; + +// __align__ is CUDA-specific, so guard it +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + +// implementing aligned_type portably is tricky: + +# if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +// implement aligned_type with specialization because MSVC +// requires literals as arguments to declspec(align(n)) +template<> struct aligned_type<1> +{ + struct __align__(1) type { }; +}; + +template<> struct aligned_type<2> +{ + struct __align__(2) type { }; +}; + +template<> struct aligned_type<4> +{ + struct __align__(4) type { }; +}; + +template<> struct aligned_type<8> +{ + struct __align__(8) type { }; +}; + +template<> struct aligned_type<16> +{ + struct __align__(16) type { }; +}; + +template<> struct aligned_type<32> +{ + struct __align__(32) type { }; +}; + +template<> struct aligned_type<64> +{ + struct __align__(64) type { }; +}; + +template<> struct aligned_type<128> +{ + struct __align__(128) type { }; +}; + +template<> struct aligned_type<256> +{ + struct __align__(256) type { }; +}; + +template<> struct aligned_type<512> +{ + struct __align__(512) type { }; +}; + +template<> struct aligned_type<1024> +{ + struct __align__(1024) type { }; +}; + +template<> struct aligned_type<2048> +{ + struct __align__(2048) type { }; +}; + +template<> struct aligned_type<4096> +{ + struct __align__(4096) type { }; +}; + +template<> struct aligned_type<8192> +{ + struct __align__(8192) type { }; +}; +# elif (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION < 40300) +// implement aligned_type with specialization because gcc 4.2 +// requires literals as arguments to __attribute__(aligned(n)) +template<> struct aligned_type<1> +{ + struct __align__(1) type { }; +}; + +template<> struct aligned_type<2> +{ + struct __align__(2) type { }; +}; + +template<> struct aligned_type<4> +{ + struct __align__(4) type { }; +}; + +template<> struct aligned_type<8> +{ + struct __align__(8) type { }; +}; + +template<> struct aligned_type<16> +{ + struct __align__(16) type { }; +}; + +template<> struct aligned_type<32> +{ + struct __align__(32) type { }; +}; + +template<> struct aligned_type<64> +{ + struct __align__(64) type { }; +}; + +template<> struct aligned_type<128> +{ + struct __align__(128) type { }; +}; + +# else +// assume the compiler allows template parameters as +// arguments to __align__ +template struct aligned_type +{ + struct __align__(Align) type { }; +}; +# endif // THRUST_HOST_COMPILER +#else +template struct aligned_type +{ + struct type { }; +}; +#endif // THRUST_DEVICE_COMPILER + + +template + struct aligned_storage +{ + union type + { + unsigned char data[Len]; + + typename aligned_type::type align; + }; +}; + + +} // end detail +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h b/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h new file mode 100644 index 0000000..e2c5a44 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h @@ -0,0 +1,284 @@ +/** + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + */ + + +//------------------------------------------------------------------------------ +// Common B40C Defines, Properties, and Routines +//------------------------------------------------------------------------------ + + +#pragma once + +#include +#include + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + +//------------------------------------------------------------------------------ +// Device properties +//------------------------------------------------------------------------------ + + +#ifndef __CUDA_ARCH__ + #define __CUDA_ARCH__ 0 +#endif + +#define B40C_FERMI(version) (version >= 200) +#define B40C_LOG_WARP_THREADS 5 // 32 threads in a warp +#define B40C_WARP_THREADS (1 << B40C_LOG_WARP_THREADS) +#define B40C_LOG_MEM_BANKS(version) ((version >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla +#define B40C_MEM_BANKS(version) (1 << B40C_LOG_MEM_BANKS(version)) + +// TODO refactor these +#if __CUDA_ARCH__ >= 200 + #define FastMul(a, b) (a * b) +#else + #define FastMul(a, b) (__umul24(a, b)) +#endif + +#if __CUDA_ARCH__ >= 120 + #define WarpVoteAll(active_threads, predicate) (__all(predicate)) +#else + #define WarpVoteAll(active_threads, predicate) (EmulatedWarpVoteAll(predicate)) +#endif + +#if __CUDA_ARCH__ >= 200 + #define TallyWarpVote(active_threads, predicate, storage) (__popc(__ballot(predicate))) +#else + #define TallyWarpVote(active_threads, predicate, storage) (TallyWarpVoteSm10(predicate, storage)) +#endif + +#ifdef __LP64__ + #define _B40C_LP64_ true +#else + #define _B40C_LP64_ false +#endif + +#define _B40C_REG_MISER_QUALIFIER_ __shared__ + + +//------------------------------------------------------------------------------ +// Handy routines +//------------------------------------------------------------------------------ + + +/** + * Select maximum + */ +#define B40C_MAX(a, b) ((a > b) ? a : b) + + +/** + * MagnitudeShift(). Allows you to shift left for positive magnitude values, + * right for negative. + * + * N.B. This code is a little strange; we are using this meta-programming + * pattern of partial template specialization for structures in order to + * decide whether to shift left or right. Normally we would just use a + * conditional to decide if something was negative or not and then shift + * accordingly, knowing that the compiler will elide the untaken branch, + * i.e., the out-of-bounds shift during dead code elimination. However, + * the pass for bounds-checking shifts seems to happen before the DCE + * phase, which results in a an unsightly number of compiler warnings, so + * we force the issue earlier using structural template specialization. + */ + +template struct MagnitudeShiftOp; + +template +struct MagnitudeShiftOp { + __device__ __forceinline__ static K Shift(K key) { + return key << magnitude; + } +}; + +template +struct MagnitudeShiftOp { + __device__ __forceinline__ static K Shift(K key) { + return key >> magnitude; + } +}; + +template +__device__ __forceinline__ K MagnitudeShift(K key) { + return MagnitudeShiftOp 0) ? magnitude : magnitude * -1, (magnitude > 0)>::Shift(key); +} + + +/** + * Supress warnings for unused constants + */ +template +__device__ __forceinline__ void SuppressUnusedConstantWarning(const T) {} + + + + +//------------------------------------------------------------------------------ +// Common device routines +//------------------------------------------------------------------------------ + + +/** + * Perform a warp-synchrounous prefix scan. Allows for diverting a warp's + * threads into separate scan problems (multi-scan). + */ +template +__device__ __forceinline__ int WarpScan( + volatile int warpscan[][NUM_ELEMENTS], + int partial_reduction, + int copy_section) { + + int warpscan_idx; + if (MULTI_SCAN) { + warpscan_idx = threadIdx.x & (NUM_ELEMENTS - 1); + } else { + warpscan_idx = threadIdx.x; + } + + warpscan[1][warpscan_idx] = partial_reduction; + + if (NUM_ELEMENTS > 1) warpscan[1][warpscan_idx] = partial_reduction = + partial_reduction + warpscan[1][warpscan_idx - 1]; + if (NUM_ELEMENTS > 2) warpscan[1][warpscan_idx] = partial_reduction = + partial_reduction + warpscan[1][warpscan_idx - 2]; + if (NUM_ELEMENTS > 4) warpscan[1][warpscan_idx] = partial_reduction = + partial_reduction + warpscan[1][warpscan_idx - 4]; + if (NUM_ELEMENTS > 8) warpscan[1][warpscan_idx] = partial_reduction = + partial_reduction + warpscan[1][warpscan_idx - 8]; + if (NUM_ELEMENTS > 16) warpscan[1][warpscan_idx] = partial_reduction = + partial_reduction + warpscan[1][warpscan_idx - 16]; + + if (copy_section > 0) { + warpscan[1 + copy_section][warpscan_idx] = partial_reduction; + } + + return warpscan[1][warpscan_idx - 1]; +} + +/** + * Perform a warp-synchronous reduction + */ +template +__device__ __forceinline__ void WarpReduce( + int idx, + volatile int *storage, + int partial_reduction) +{ + storage[idx] = partial_reduction; + + if (NUM_ELEMENTS > 16) storage[idx] = partial_reduction = partial_reduction + storage[idx + 16]; + if (NUM_ELEMENTS > 8) storage[idx] = partial_reduction = partial_reduction + storage[idx + 8]; + if (NUM_ELEMENTS > 4) storage[idx] = partial_reduction = partial_reduction + storage[idx + 4]; + if (NUM_ELEMENTS > 2) storage[idx] = partial_reduction = partial_reduction + storage[idx + 2]; + if (NUM_ELEMENTS > 1) storage[idx] = partial_reduction = partial_reduction + storage[idx + 1]; +} + + +/** + * Tally a warp-vote regarding the given predicate using the supplied storage + */ +template +__device__ __forceinline__ int TallyWarpVoteSm10(int predicate, int storage[]) { + WarpReduce(threadIdx.x, storage, predicate); + return storage[0]; +} + + +/** + * Tally a warp-vote regarding the given predicate + */ +template +__device__ __forceinline__ int TallyWarpVoteSm10(int predicate) { + __shared__ int vote_reduction[B40C_WARP_THREADS]; + return TallyWarpVoteSm10(predicate, vote_reduction); +} + +/** + * Emulate the __all() warp vote instruction + */ +template +__device__ __forceinline__ int EmulatedWarpVoteAll(int predicate) { + return (TallyWarpVoteSm10(predicate) == ACTIVE_THREADS); +} + + +/** + * Have each thread concurrently perform a serial reduction over its specified segment + */ +template +__device__ __forceinline__ int +SerialReduce(int segment[]) { + + int reduce = segment[0]; + + #pragma unroll + for (int i = 1; i < (int) LENGTH; i++) { + reduce += segment[i]; + } + + return reduce; +} + + +/** + * Have each thread concurrently perform a serial scan over its specified segment + */ +template +__device__ __forceinline__ +void SerialScan(int segment[], int seed0) { + + int seed1; + + #pragma unroll + for (int i = 0; i < (int) LENGTH; i += 2) { + seed1 = segment[i] + seed0; + segment[i] = seed0; + seed0 = seed1 + segment[i + 1]; + segment[i + 1] = seed1; + } +} + + + + +//------------------------------------------------------------------------------ +// Empty Kernels +//------------------------------------------------------------------------------ + +template +__global__ void FlushKernel(void) +{ +} + + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h new file mode 100644 index 0000000..2b199bb --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h @@ -0,0 +1,807 @@ +/****************************************************************************** + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + ******************************************************************************/ + + + +/****************************************************************************** + * Radix Sorting API + * + * USAGE: + * + * Using the B40C radix sorting implementation is easy. Just #include this API + * file and its kernel include dependencies within your source. Below are two + * examples for using: + * + * (1) A keys-only example for sorting floats: + * + * // Create storage-management structure + * RadixSortStorage device_storage(d_float_keys); + * + * // Create and enact sorter + * RadixSortingEnactor sorter(d_float_keys_len); + * sorter.EnactSort(device_storage); + * + * // Re-acquire pointer to sorted keys, free unused/temp storage + * d_float_keys = device_storage.d_keys; + * device_storage.CleanupTempStorage(); + * + * (2) And a key-value example for sorting ints paired with doubles: + * + * // Create storage-management structure + * RadixSortStorage device_storage(d_int_keys, d_double_values); + * + * // Create and enact sorter + * RadixSortingEnactor sorter(d_int_keys_len); + * sorter.EnactSort(device_storage); + * + * // Re-acquire pointer to sorted keys and values, free unused/temp storage + * d_int_keys = device_storage.d_keys; + * d_double_values = device_storage.d_values; + * device_storage.CleanupTempStorage(); + * + * + ******************************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include + +#include "radixsort_reduction_kernel.h" +#include "radixsort_spine_kernel.h" +#include "radixsort_scanscatter_kernel.h" + +#include + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + + +/****************************************************************************** + * Debugging options + ******************************************************************************/ + +static bool RADIXSORT_DEBUG = false; + + + +/****************************************************************************** + * Structures for mananging device-side sorting state + ******************************************************************************/ + +/** + * Sorting storage-management structure for device vectors + */ +template +struct RadixSortStorage { + + // Device vector of keys to sort + K* d_keys; + + // Device vector of values to sort + V* d_values; + + // Ancillary device vector for key storage + K* d_alt_keys; + + // Ancillary device vector for value storage + V* d_alt_values; + + // Temporary device storage needed for radix sorting histograms + int *d_spine; + + // Flip-flopping temporary device storage denoting which digit place + // pass should read from which input source (i.e., false if reading from + // keys, true if reading from alternate_keys + bool *d_from_alt_storage; + + // Host-side boolean whether or not an odd number of sorting passes left the + // results in alternate storage. If so, the d_keys (and d_values) pointers + // will have been swapped with the d_alt_keys (and d_alt_values) pointers in order to + // point to the final results. + bool using_alternate_storage; + + // Constructor + RadixSortStorage(K* keys = NULL, V* values = NULL) + { + d_keys = keys; + d_values = values; + d_alt_keys = NULL; + d_alt_values = NULL; + d_spine = NULL; + d_from_alt_storage = NULL; + + using_alternate_storage = false; + } + + // Clean up non-results storage (may include freeing original storage if + // primary pointers were swizzled as per using_alternate_storage) + cudaError_t CleanupTempStorage() + { + if (d_alt_keys) cudaFree(d_alt_keys); + if (d_alt_values) cudaFree(d_alt_values); + if (d_spine) cudaFree(d_spine); + if (d_from_alt_storage) cudaFree(d_from_alt_storage); + + return cudaSuccess; + } +}; + + + +/****************************************************************************** + * Base class for sorting enactors + ******************************************************************************/ + + +/** + * Base class for SRTS radix sorting enactors. + */ +template +class BaseRadixSortingEnactor +{ +public: + + // Unsigned integer type suitable for radix sorting of keys + typedef typename KeyConversion::UnsignedBits ConvertedKeyType; + +protected: + + // + // Information about our problem configuration + // + + bool _keys_only; + unsigned int _num_elements; + int _cycle_elements; + int _spine_elements; + int _grid_size; + CtaDecomposition _work_decomposition; + int _passes; + bool _swizzle_pointers_for_odd_passes; + + // Information about our target device + cudaDeviceProp _device_props; + int _device_sm_version; + + // Information about our kernel assembly + int _kernel_ptx_version; + cudaFuncAttributes _spine_scan_kernel_attrs; + +protected: + + /** + * Constructor. + */ + BaseRadixSortingEnactor(int passes, int radix_bits, unsigned int num_elements, int max_grid_size, bool swizzle_pointers_for_odd_passes = true); + + /** + * Heuristic for determining the number of CTAs to launch. + * + * @param[in] max_grid_size + * Maximum allowable number of CTAs to launch. A value of 0 indicates + * that the default value should be used. + * + * @return The actual number of CTAs that should be launched + */ + int GridSize(int max_grid_size); + + /** + * Performs a distribution sorting pass over a single digit place + */ + template + cudaError_t DigitPlacePass(const RadixSortStorage &converted_storage); + + /** + * Enacts a sorting operation by performing the the appropriate + * digit-place passes. To be overloaded by specialized subclasses. + */ + virtual cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) = 0; + +public: + + /** + * Returns the length (in unsigned ints) of the device vector needed for + * temporary storage of the reduction spine. Useful if pre-allocating + * your own device storage (as opposed to letting EnactSort() allocate it + * for you). + */ + int SpineElements() { return _spine_elements; } + + /** + * Returns whether or not the problem will fit on the device. + */ + bool CanFit(); + + /** + * Enacts a radix sorting operation on the specified device data. + * + * IMPORTANT NOTES: The device storage backing the specified input vectors of + * keys (and data) will be modified. (I.e., treat this as an in-place sort.) + * + * Additionally, the pointers in the problem_storage structure may be updated + * (a) depending upon the number of digit-place sorting passes needed, and (b) + * whether or not the caller has already allocated temporary storage. + * + * The sorted results will always be referenced by problem_storage.d_keys (and + * problem_storage.d_values). However, for an odd number of sorting passes (uncommon) + * these results will actually be backed by the storage initially allocated for + * by problem_storage.d_alt_keys (and problem_storage.d_alt_values). If so, + * problem_storage.d_alt_keys and problem_storage.d_alt_keys will be updated to + * reference the original problem_storage.d_keys and problem_storage.d_values in order + * to facilitate cleanup. + * + * This means it is important to avoid keeping stale copies of device pointers + * to keys/data; you will want to re-reference the pointers in problem_storage. + * + * @param[in/out] problem_storage + * Device vectors of keys and values to sort, and ancillary storage + * needed by the sorting kernels. See the IMPORTANT NOTES above. + * + * The problem_storage.[alternate_keys|alternate_values|d_spine] fields are + * temporary storage needed by the sorting kernels. To facilitate + * speed, callers are welcome to re-use this storage for same-sized + * (or smaller) sortign problems. If NULL, these storage vectors will be + * allocated by this routine (and must be subsequently cuda-freed by + * the caller). + * + * @return cudaSuccess on success, error enumeration otherwise + */ + cudaError_t EnactSort(RadixSortStorage &problem_storage); + + /* + * Destructor + */ + virtual ~BaseRadixSortingEnactor() {} +}; + + + +template +BaseRadixSortingEnactor::BaseRadixSortingEnactor( + int passes, + int max_radix_bits, + unsigned int num_elements, + int max_grid_size, + bool swizzle_pointers_for_odd_passes) +{ + // + // Get current device properties + // + + int current_device; + cudaGetDevice(¤t_device); + cudaGetDeviceProperties(&_device_props, current_device); + _device_sm_version = _device_props.major * 100 + _device_props.minor * 10; + + + // + // Get SM version of compiled kernel assembly + // + cudaFuncGetAttributes(&_spine_scan_kernel_attrs, SrtsScanSpine); + _kernel_ptx_version = _spine_scan_kernel_attrs.ptxVersion * 10; + + + // + // Determine number of CTAs to launch, shared memory, cycle elements, etc. + // + + _passes = passes; + _num_elements = num_elements; + _keys_only = IsKeysOnly(); + _cycle_elements = B40C_RADIXSORT_CYCLE_ELEMENTS(_kernel_ptx_version , ConvertedKeyType, V); + _grid_size = GridSize(max_grid_size); + _swizzle_pointers_for_odd_passes = swizzle_pointers_for_odd_passes; + + int total_cycles = _num_elements / _cycle_elements; + unsigned int cycles_per_block = total_cycles / _grid_size; + unsigned int extra_cycles = total_cycles - (cycles_per_block * _grid_size); + + CtaDecomposition work_decomposition = { + extra_cycles, // num_big_blocks + (cycles_per_block + 1) * _cycle_elements, // big_block_elements + cycles_per_block * _cycle_elements, // normal_block_elements + _num_elements - (total_cycles * _cycle_elements), // extra_elements_last_block + _num_elements}; // num_elements + + _work_decomposition = work_decomposition; + + int spine_cycles = ((_grid_size * (1 << max_radix_bits)) + B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS - 1) / B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS; + _spine_elements = spine_cycles * B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS; +} + + + +template +int BaseRadixSortingEnactor::GridSize(int max_grid_size) +{ + const int SINGLE_CTA_CUTOFF = 0; // right now zero; we have no single-cta sorting + + // find maximum number of threadblocks if "use-default" + if (max_grid_size == 0) { + + if (_num_elements <= static_cast(SINGLE_CTA_CUTOFF)) { + + // The problem size is too small to warrant a two-level reduction: + // use only one stream-processor + max_grid_size = 1; + + } else { + + if (_device_sm_version <= 120) { + + // G80/G90 + max_grid_size = _device_props.multiProcessorCount * 4; + + } else if (_device_sm_version < 200) { + + // GT200 (has some kind of TLB or icache drama) + int orig_max_grid_size = _device_props.multiProcessorCount * B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(_kernel_ptx_version); + if (_keys_only) { + orig_max_grid_size *= (_num_elements + (1024 * 1024 * 96) - 1) / (1024 * 1024 * 96); + } else { + orig_max_grid_size *= (_num_elements + (1024 * 1024 * 64) - 1) / (1024 * 1024 * 64); + } + max_grid_size = orig_max_grid_size; + + if (_num_elements / _cycle_elements > static_cast(max_grid_size)) { + + double multiplier1 = 4.0; + double multiplier2 = 16.0; + + double delta1 = 0.068; + double delta2 = 0.127; + + int dividend = (_num_elements + _cycle_elements - 1) / _cycle_elements; + + while(true) { + + double quotient = ((double) dividend) / (multiplier1 * max_grid_size); + quotient -= (int) quotient; + + if ((quotient > delta1) && (quotient < 1 - delta1)) { + + quotient = ((double) dividend) / (multiplier2 * max_grid_size / 3.0); + quotient -= (int) quotient; + + if ((quotient > delta2) && (quotient < 1 - delta2)) { + break; + } + } + + if (max_grid_size == orig_max_grid_size - 2) { + max_grid_size = orig_max_grid_size - 30; + } else { + max_grid_size -= 1; + } + } + } + } else { + + // GF100 + max_grid_size = 418; + } + } + } + + // Calculate the actual number of threadblocks to launch. Initially + // assume that each threadblock will do only one cycle_elements worth + // of work, but then clamp it by the "max" restriction derived above + // in order to accomodate the "single-sp" and "saturated" cases. + + int grid_size = _num_elements / _cycle_elements; + if (grid_size == 0) { + grid_size = 1; + } + if (grid_size > max_grid_size) { + grid_size = max_grid_size; + } + + return grid_size; +} + + + +template +bool BaseRadixSortingEnactor:: +CanFit() +{ + long long bytes = (_num_elements * sizeof(K) * 2) + (_spine_elements * sizeof(int)); + if (!_keys_only) bytes += _num_elements * sizeof(V) * 2; + + if (_device_props.totalGlobalMem < 1024 * 1024 * 513) { + return (bytes < ((double) _device_props.totalGlobalMem) * 0.81); // allow up to 81% capacity for 512MB + } + + return (bytes < ((double) _device_props.totalGlobalMem) * 0.89); // allow up to 90% capacity +} + + + +template +template +cudaError_t BaseRadixSortingEnactor:: +DigitPlacePass(const RadixSortStorage &converted_storage) +{ + int threads = B40C_RADIXSORT_THREADS; + int dynamic_smem; + + cudaFuncAttributes reduce_kernel_attrs, scan_scatter_attrs; + cudaFuncGetAttributes(&reduce_kernel_attrs, RakingReduction); + cudaFuncGetAttributes(&scan_scatter_attrs, ScanScatterDigits); + + // + // Counting Reduction + // + + // Run tesla flush kernel if we have two or more threadblocks for each of the SMs + if ((_device_sm_version == 130) && (_work_decomposition.num_elements > static_cast(_device_props.multiProcessorCount * _cycle_elements * 2))) { + FlushKernel<<<_grid_size, B40C_RADIXSORT_THREADS, scan_scatter_attrs.sharedSizeBytes>>>(); + synchronize_if_enabled("FlushKernel"); + } + + // GF100 and GT200 get the same smem allocation for every kernel launch (pad the reduction/top-level-scan kernels) + dynamic_smem = (_kernel_ptx_version >= 130) ? scan_scatter_attrs.sharedSizeBytes - reduce_kernel_attrs.sharedSizeBytes : 0; + + RakingReduction <<<_grid_size, threads, dynamic_smem>>>( + converted_storage.d_from_alt_storage, + converted_storage.d_spine, + converted_storage.d_keys, + converted_storage.d_alt_keys, + _work_decomposition); + synchronize_if_enabled("RakingReduction"); + + + // + // Spine + // + + // GF100 and GT200 get the same smem allocation for every kernel launch (pad the reduction/top-level-scan kernels) + dynamic_smem = (_kernel_ptx_version >= 130) ? scan_scatter_attrs.sharedSizeBytes - _spine_scan_kernel_attrs.sharedSizeBytes : 0; + + SrtsScanSpine<<<_grid_size, B40C_RADIXSORT_SPINE_THREADS, dynamic_smem>>>( + converted_storage.d_spine, + converted_storage.d_spine, + _spine_elements); + synchronize_if_enabled("SrtsScanSpine"); + + + // + // Scanning Scatter + // + + // Run tesla flush kernel if we have two or more threadblocks for each of the SMs + if ((_device_sm_version == 130) && (_work_decomposition.num_elements > static_cast(_device_props.multiProcessorCount * _cycle_elements * 2))) { + FlushKernel<<<_grid_size, B40C_RADIXSORT_THREADS, scan_scatter_attrs.sharedSizeBytes>>>(); + synchronize_if_enabled("FlushKernel"); + } + + ScanScatterDigits <<<_grid_size, threads, 0>>>( + converted_storage.d_from_alt_storage, + converted_storage.d_spine, + converted_storage.d_keys, + converted_storage.d_alt_keys, + converted_storage.d_values, + converted_storage.d_alt_values, + _work_decomposition); + synchronize_if_enabled("ScanScatterDigits"); + + return cudaSuccess; +} + + + +template +cudaError_t BaseRadixSortingEnactor:: +EnactSort(RadixSortStorage &problem_storage) +{ + // + // Allocate device memory for temporary storage (if necessary) + // + + if (problem_storage.d_alt_keys == NULL) { + cudaMalloc((void**) &problem_storage.d_alt_keys, _num_elements * sizeof(K)); + } + if (!_keys_only && (problem_storage.d_alt_values == NULL)) { + cudaMalloc((void**) &problem_storage.d_alt_values, _num_elements * sizeof(V)); + } + if (problem_storage.d_spine == NULL) { + cudaMalloc((void**) &problem_storage.d_spine, _spine_elements * sizeof(int)); + } + if (problem_storage.d_from_alt_storage == NULL) { + cudaMalloc((void**) &problem_storage.d_from_alt_storage, 2 * sizeof(bool)); + } + + // Determine suitable type of unsigned byte storage to use for keys + typedef typename KeyConversion::UnsignedBits ConvertedKeyType; + + // Copy storage pointers to an appropriately typed stucture + RadixSortStorage converted_storage; + memcpy(&converted_storage, &problem_storage, sizeof(RadixSortStorage)); + + // + // Enact the sorting operation + // + + if (RADIXSORT_DEBUG) { + + printf("_device_sm_version: %d, _kernel_ptx_version: %d\n", _device_sm_version, _kernel_ptx_version); + printf("Bottom-level reduction & scan kernels:\n\tgrid_size: %d, \n\tthreads: %d, \n\tcycle_elements: %d, \n\tnum_big_blocks: %d, \n\tbig_block_elements: %d, \n\tnormal_block_elements: %d\n\textra_elements_last_block: %d\n\n", + _grid_size, B40C_RADIXSORT_THREADS, _cycle_elements, _work_decomposition.num_big_blocks, _work_decomposition.big_block_elements, _work_decomposition.normal_block_elements, _work_decomposition.extra_elements_last_block); + printf("Top-level spine scan:\n\tgrid_size: %d, \n\tthreads: %d, \n\tspine_block_elements: %d\n\n", + _grid_size, B40C_RADIXSORT_SPINE_THREADS, _spine_elements); + } + + cudaError_t retval = EnactDigitPlacePasses(converted_storage); + + + // + // Swizzle pointers if we left our sorted output in temp storage + // + + if (_swizzle_pointers_for_odd_passes) { + + cudaMemcpy( + &problem_storage.using_alternate_storage, + &problem_storage.d_from_alt_storage[_passes & 0x1], + sizeof(bool), + cudaMemcpyDeviceToHost); + + if (problem_storage.using_alternate_storage) { + thrust::swap(problem_storage.d_keys, problem_storage.d_alt_keys); + if (!_keys_only) { + thrust::swap(problem_storage.d_values, problem_storage.d_alt_values); + } + } + } + + return retval; +} + + + + + +/****************************************************************************** + * Sorting enactor classes + ******************************************************************************/ + +/** + * Generic sorting enactor class. Simply create an instance of this class + * with your key-type K (and optionally value-type V if sorting with satellite + * values). + * + * Template specialization provides the appropriate enactor instance to handle + * the specified data types. + * + * @template-param K + * Type of keys to be sorted + * + * @template-param V + * Type of values to be sorted. + * + * @template-param ConvertedKeyType + * Leave as default to effect necessary enactor specialization. + */ +template ::UnsignedBits> +class RadixSortingEnactor; + + + +/** + * Sorting enactor that is specialized for for 8-bit key types + */ +template +class RadixSortingEnactor : public BaseRadixSortingEnactor +{ +protected: + + typedef BaseRadixSortingEnactor Base; + typedef typename Base::ConvertedKeyType ConvertedKeyType; + + cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) + { + Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<1, 4, 4, NopFunctor, PostprocessKeyFunctor > (converted_storage); + + return cudaSuccess; + } + +public: + + /** + * Constructor. + * + * @param[in] num_elements + * Length (in elements) of the input to a sorting operation + * + * @param[in] max_grid_size + * Maximum allowable number of CTAs to launch. The default value of 0 indicates + * that the dispatch logic should select an appropriate value for the target device. + */ + RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(2, 4, num_elements, max_grid_size) {} + +}; + + + +/** + * Sorting enactor that is specialized for for 16-bit key types + */ +template +class RadixSortingEnactor : public BaseRadixSortingEnactor +{ +protected: + + typedef BaseRadixSortingEnactor Base; + typedef typename Base::ConvertedKeyType ConvertedKeyType; + + cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) + { + Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<1, 4, 4, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<2, 4, 8, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<3, 4, 12, NopFunctor, PostprocessKeyFunctor > (converted_storage); + + return cudaSuccess; + } + +public: + + /** + * Constructor. + * + * @param[in] num_elements + * Length (in elements) of the input to a sorting operation + * + * @param[in] max_grid_size + * Maximum allowable number of CTAs to launch. The default value of 0 indicates + * that the dispatch logic should select an appropriate value for the target device. + */ + RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(4, 4, num_elements, max_grid_size) {} + +}; + + +/** + * Sorting enactor that is specialized for for 32-bit key types + */ +template +class RadixSortingEnactor : public BaseRadixSortingEnactor +{ +protected: + + typedef BaseRadixSortingEnactor Base; + typedef typename Base::ConvertedKeyType ConvertedKeyType; + + cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) + { + Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<1, 4, 4, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<2, 4, 8, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<3, 4, 12, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<4, 4, 16, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<5, 4, 20, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<6, 4, 24, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<7, 4, 28, NopFunctor, PostprocessKeyFunctor > (converted_storage); + + return cudaSuccess; + } + +public: + + /** + * Constructor. + * + * @param[in] num_elements + * Length (in elements) of the input to a sorting operation + * + * @param[in] max_grid_size + * Maximum allowable number of CTAs to launch. The default value of 0 indicates + * that the dispatch logic should select an appropriate value for the target device. + */ + RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(8, 4, num_elements, max_grid_size) {} + +}; + + + +/** + * Sorting enactor that is specialized for for 64-bit key types + */ +template +class RadixSortingEnactor : public BaseRadixSortingEnactor +{ +protected: + + typedef BaseRadixSortingEnactor Base; + typedef typename Base::ConvertedKeyType ConvertedKeyType; + + cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) + { + Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<1, 4, 4, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<2, 4, 8, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<3, 4, 12, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<4, 4, 16, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<5, 4, 20, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<6, 4, 24, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<7, 4, 28, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<8, 4, 32, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<9, 4, 36, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<10, 4, 40, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<11, 4, 44, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<12, 4, 48, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<13, 4, 52, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<14, 4, 56, NopFunctor, NopFunctor >(converted_storage); + Base::template DigitPlacePass<15, 4, 60, NopFunctor, PostprocessKeyFunctor > (converted_storage); + + return cudaSuccess; + } + +public: + + /** + * Constructor. + * + * @param[in] num_elements + * Length (in elements) of the input to a sorting operation + * + * @param[in] max_grid_size + * Maximum allowable number of CTAs to launch. The default value of 0 indicates + * that the dispatch logic should select an appropriate value for the target device. + */ + RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(16, 4, num_elements, max_grid_size) {} + +}; + + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h new file mode 100644 index 0000000..7899dc3 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h @@ -0,0 +1,173 @@ +/****************************************************************************** + * + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + * + ******************************************************************************/ + + +/****************************************************************************** + * Configuration management for B40C radix sorting kernels + ******************************************************************************/ + +#pragma once + +#include "kernel_utils.h" +#include "vector_types.h" +#include "radixsort_key_conversion.h" + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + + +/****************************************************************************** + * Radix sorting configuration + ******************************************************************************/ + +// 128 threads +#define B40C_RADIXSORT_LOG_THREADS 7 +#define B40C_RADIXSORT_THREADS (1 << B40C_RADIXSORT_LOG_THREADS) + +// Target threadblock occupancy for counting/reduction kernel +#define B40C_SM20_REDUCE_CTA_OCCUPANCY() (8) // 8 threadblocks on GF100 +#define B40C_SM12_REDUCE_CTA_OCCUPANCY() (5) // 5 threadblocks on GT200 +#define B40C_SM10_REDUCE_CTA_OCCUPANCY() (3) // 4 threadblocks on G80 +#define B40C_RADIXSORT_REDUCE_CTA_OCCUPANCY(version) ((version >= 200) ? B40C_SM20_REDUCE_CTA_OCCUPANCY() : \ + (version >= 120) ? B40C_SM12_REDUCE_CTA_OCCUPANCY() : \ + B40C_SM10_REDUCE_CTA_OCCUPANCY()) + +// Target threadblock occupancy for bulk scan/scatter kernel +#define B40C_SM20_SCAN_SCATTER_CTA_OCCUPANCY() (7) // 7 threadblocks on GF100 +#define B40C_SM12_SCAN_SCATTER_CTA_OCCUPANCY() (5) // 5 threadblocks on GT200 +#define B40C_SM10_SCAN_SCATTER_CTA_OCCUPANCY() (2) // 2 threadblocks on G80 +#define B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(version) ((version >= 200) ? B40C_SM20_SCAN_SCATTER_CTA_OCCUPANCY() : \ + (version >= 120) ? B40C_SM12_SCAN_SCATTER_CTA_OCCUPANCY() : \ + B40C_SM10_SCAN_SCATTER_CTA_OCCUPANCY()) + +// Number of 256-element sets to rake per raking pass +#define B40C_SM20_LOG_SETS_PER_PASS() (1) // 2 sets on GF100 +#define B40C_SM12_LOG_SETS_PER_PASS() (0) // 1 set on GT200 +#define B40C_SM10_LOG_SETS_PER_PASS() (1) // 2 sets on G80 +#define B40C_RADIXSORT_LOG_SETS_PER_PASS(version) ((version >= 200) ? B40C_SM20_LOG_SETS_PER_PASS() : \ + (version >= 120) ? B40C_SM12_LOG_SETS_PER_PASS() : \ + B40C_SM10_LOG_SETS_PER_PASS()) + +// Number of raking passes per cycle +#define B40C_SM20_LOG_PASSES_PER_CYCLE(K, V) (((B40C_MAX(sizeof(K), sizeof(V)) > 4) || _B40C_LP64_) ? 0 : 1) // 2 passes on GF100 (only one for large keys/values, or for 64-bit device pointers) +#define B40C_SM12_LOG_PASSES_PER_CYCLE(K, V) (B40C_MAX(sizeof(K), sizeof(V)) > 4 ? 0 : 1) // 2 passes on GT200 (only for large keys/values) +#define B40C_SM10_LOG_PASSES_PER_CYCLE(K, V) (0) // 1 pass on G80 +#define B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(version, K, V) ((version >= 200) ? B40C_SM20_LOG_PASSES_PER_CYCLE(K, V) : \ + (version >= 120) ? B40C_SM12_LOG_PASSES_PER_CYCLE(K, V) : \ + B40C_SM10_LOG_PASSES_PER_CYCLE(K, V)) + + +// Number of raking threads per raking pass +#define B40C_SM20_LOG_RAKING_THREADS_PER_PASS() (B40C_LOG_WARP_THREADS + 1) // 2 raking warps on GF100 +#define B40C_SM12_LOG_RAKING_THREADS_PER_PASS() (B40C_LOG_WARP_THREADS) // 1 raking warp on GT200 +#define B40C_SM10_LOG_RAKING_THREADS_PER_PASS() (B40C_LOG_WARP_THREADS + 2) // 4 raking warps on G80 +#define B40C_RADIXSORT_LOG_RAKING_THREADS_PER_PASS(version) ((version >= 200) ? B40C_SM20_LOG_RAKING_THREADS_PER_PASS() : \ + (version >= 120) ? B40C_SM12_LOG_RAKING_THREADS_PER_PASS() : \ + B40C_SM10_LOG_RAKING_THREADS_PER_PASS()) + + +// Number of elements per cycle +#define B40C_RADIXSORT_LOG_CYCLE_ELEMENTS(version, K, V) (B40C_RADIXSORT_LOG_SETS_PER_PASS(version) + B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(version, K, V) + B40C_RADIXSORT_LOG_THREADS + 1) +#define B40C_RADIXSORT_CYCLE_ELEMENTS(version, K, V) (1 << B40C_RADIXSORT_LOG_CYCLE_ELEMENTS(version, K, V)) + +// Number of warps per CTA +#define B40C_RADIXSORT_LOG_WARPS (B40C_RADIXSORT_LOG_THREADS - B40C_LOG_WARP_THREADS) +#define B40C_RADIXSORT_WARPS (1 << B40C_RADIXSORT_LOG_WARPS) + +// Number of threads for spine-scanning kernel +#define B40C_RADIXSORT_LOG_SPINE_THREADS 7 // 128 threads +#define B40C_RADIXSORT_SPINE_THREADS (1 << B40C_RADIXSORT_LOG_SPINE_THREADS) + +// Number of elements per spine-scanning cycle +#define B40C_RADIXSORT_LOG_SPINE_CYCLE_ELEMENTS 9 // 512 elements +#define B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS (1 << B40C_RADIXSORT_LOG_SPINE_CYCLE_ELEMENTS) + + + +/****************************************************************************** + * SRTS Control Structures + ******************************************************************************/ + + +/** + * Value-type structure denoting keys-only sorting + */ +struct KeysOnlyType {}; + +/** + * Returns whether or not the templated type indicates keys-only sorting + */ +template +inline __host__ __device__ bool IsKeysOnly() {return false;} + + +/** + * Returns whether or not the templated type indicates keys-only sorting + */ +template <> +inline __host__ __device__ bool IsKeysOnly() {return true;} + + +/** + * A given threadblock may receive one of three different amounts of + * work: "big", "normal", and "last". The big workloads are one + * cycle_elements greater than the normal, and the last workload + * does the extra (problem-size % cycle_elements) work. + */ +struct CtaDecomposition { + unsigned int num_big_blocks; + unsigned int big_block_elements; + unsigned int normal_block_elements; + unsigned int extra_elements_last_block; + unsigned int num_elements; +}; + + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h new file mode 100644 index 0000000..a170f95 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h @@ -0,0 +1,352 @@ +/****************************************************************************** + * + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + * + ******************************************************************************/ + + +/****************************************************************************** + * Functors for converting signed and floating point types to unsigned types + * suitable for radix sorting + ******************************************************************************/ + +#pragma once + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + + +// +// Do-nothing functors +// + +template +struct NopFunctor{ + template + __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} + __device__ __host__ __forceinline__ static bool MustApply(){ return false;} +}; + +// +// Do-nothing functors that indicate a mandatory pass +// + +template +struct MandatoryPassNopFunctor{ + template + __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} + __device__ __host__ __forceinline__ static bool MustApply(){ return false;} +}; + + +// +// Conversion for generic unsigned types +// + +template struct KeyConversion { + typedef T UnsignedBits; +}; + +template +struct PreprocessKeyFunctor{ + template + __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} + __device__ __host__ __forceinline__ static bool MustApply(){ return false;} +}; + +template +struct PostprocessKeyFunctor { + template + __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} + __device__ __host__ __forceinline__ static bool MustApply(){ return false;} +}; + + + +// +// Conversion for floats +// + +template <> struct KeyConversion { + typedef unsigned int UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { + + unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000; + converted_key ^= mask; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { + + unsigned int mask = (converted_key & 0x80000000) ? 0x80000000 : 0xffffffff; + converted_key ^= mask; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + + +// +// Conversion for doubles +// + +template <> struct KeyConversion { + typedef unsigned long long UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { + + unsigned long long mask = (converted_key & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; + converted_key ^= mask; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { + unsigned long long mask = (converted_key & 0x8000000000000000) ? 0x8000000000000000 : 0xffffffffffffffff; + converted_key ^= mask; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + +// +// Conversion for signed chars +// + +template <> struct KeyConversion { + typedef unsigned char UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { + // char is unsigned on some platforms, so we have to check + if(std::numeric_limits::is_signed) + { + const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); + converted_key ^= SIGN_MASK; + } + } + __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits::is_signed;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { + // char is unsigned on some platforms, so we have to check + if(std::numeric_limits::is_signed) + { + const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); + converted_key ^= SIGN_MASK; + } + } + __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits::is_signed;} +}; + + +// TODO handle this more gracefully +template <> struct KeyConversion { + typedef unsigned char UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { + const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { + const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + +// +// Conversion for signed shorts +// + +template <> struct KeyConversion { + typedef unsigned short UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) { + const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) { + const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + + +// +// Conversion for signed ints +// + +template <> struct KeyConversion { + typedef unsigned int UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { + const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { + const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + + +// +// Conversion for signed longs +// + +// TODO rework this with metaprogramming +template <> struct KeyConversion { +#if ULONG_MAX == UINT_MAX + typedef unsigned int UnsignedBits; +#else + typedef unsigned long long UnsignedBits; +#endif +}; + +// TODO rework this with metaprogramming +template <> struct KeyConversion { +#if ULONG_MAX == UINT_MAX + typedef unsigned int UnsignedBits; +#else + typedef unsigned long long UnsignedBits; +#endif +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(typename KeyConversion::UnsignedBits& converted_key) { + const typename KeyConversion::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(typename KeyConversion::UnsignedBits& converted_key) { + const typename KeyConversion::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + + +// +// Conversion for signed long longs +// + +template <> struct KeyConversion { + typedef unsigned long long UnsignedBits; +}; + +template <> +struct PreprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { + const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + +template <> +struct PostprocessKeyFunctor { + __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { + const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1); + converted_key ^= SIGN_MASK; + } + __device__ __host__ __forceinline__ static bool MustApply(){ return true;} +}; + + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h new file mode 100644 index 0000000..a8f91d3 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h @@ -0,0 +1,439 @@ +/****************************************************************************** + * + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + * + ******************************************************************************/ + + +/****************************************************************************** + * Bottom-level digit-reduction/counting kernel + ******************************************************************************/ + +#pragma once + +#include "radixsort_kernel_common.h" + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + +/****************************************************************************** + * Defines + ******************************************************************************/ + +const int BYTE_ENCODE_SHIFT = 0x3; + + +/****************************************************************************** + * Cycle-processing Routines + ******************************************************************************/ + +__device__ __forceinline__ int DecodeInt(int encoded, int quad_byte){ + return (encoded >> quad_byte) & 0xff; // shift right 8 bits per digit and return rightmost 8 bits +} + + +__device__ __forceinline__ int EncodeInt(int count, int quad_byte) { + return count << quad_byte; // shift left 8 bits per digit +} + + +template +__device__ __forceinline__ void DecodeDigit( + K key, + int &lane, + int &quad_shift) +{ + const K DIGIT_MASK = RADIX_DIGITS - 1; + lane = (key & (DIGIT_MASK << BIT)) >> (BIT + 2); + + const K QUAD_MASK = (RADIX_DIGITS < 4) ? 0x1 : 0x3; + if (BIT == 32) { + // N.B.: This takes one more instruction than the code below it, but + // otherwise the compiler goes nuts and shoves hundreds of bytes + // to lmem when bit = 32 on 64-bit keys. + quad_shift = ((key >> BIT) & QUAD_MASK) << BYTE_ENCODE_SHIFT; + } else { + quad_shift = MagnitudeShift(key & (QUAD_MASK << BIT)); + } +} + + +template +__device__ __forceinline__ void ReduceEncodedCounts( + int local_counts[LANES_PER_WARP][4], + int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) +{ + const int LOG_PARTIALS_PER_THREAD = B40C_RADIXSORT_LOG_THREADS - B40C_LOG_WARP_THREADS; + const int PARTIALS_PER_THREAD = 1 << LOG_PARTIALS_PER_THREAD; + + int encoded; + int idx = threadIdx.x & (B40C_WARP_THREADS - 1); + + + __syncthreads(); + + #pragma unroll + for (int j = 0; j < (int) LANES_PER_WARP; j++) { + + int warp_id = (threadIdx.x >> B40C_LOG_WARP_THREADS) + (j * B40C_RADIXSORT_WARPS); + if (warp_id < SCAN_LANES) { + + // rest of my elements + #pragma unroll + for (int i = 0; i < (int) PARTIALS_PER_THREAD; i++) { + encoded = encoded_carry[warp_id][idx + (i * B40C_WARP_THREADS)]; + local_counts[j][0] += DecodeInt(encoded, 0u << BYTE_ENCODE_SHIFT); + local_counts[j][1] += DecodeInt(encoded, 1u << BYTE_ENCODE_SHIFT); + local_counts[j][2] += DecodeInt(encoded, 2u << BYTE_ENCODE_SHIFT); + local_counts[j][3] += DecodeInt(encoded, 3u << BYTE_ENCODE_SHIFT); + } + + if (FINAL_REDUCE) { + // reduce all four packed fields, leaving them in the first four elements of our row + WarpReduce(idx, &encoded_carry[warp_id][0], local_counts[j][0]); + WarpReduce(idx, &encoded_carry[warp_id][1], local_counts[j][1]); + WarpReduce(idx, &encoded_carry[warp_id][2], local_counts[j][2]); + WarpReduce(idx, &encoded_carry[warp_id][3], local_counts[j][3]); + } + } + } + + __syncthreads(); + +} + + +template +__device__ __forceinline__ void Bucket( + K input, + int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS], + PreprocessFunctor preprocess = PreprocessFunctor()) +{ + int lane, quad_shift; + preprocess(input); + DecodeDigit(input, lane, quad_shift); + encoded_carry[lane][threadIdx.x] += EncodeInt(1, quad_shift); +} + + +template +struct LoadOp; + +template +struct LoadOp +{ + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + K key = d_in_keys[offset + threadIdx.x]; + Bucket(key, encoded_carry); + } +}; + +template +struct LoadOp +{ + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 1), encoded_carry); + } +}; + +template +struct LoadOp +{ + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 2), encoded_carry); + } +}; + +template +struct LoadOp +{ + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + K keys[8]; + + keys[0] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 0) + threadIdx.x]; + keys[1] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 1) + threadIdx.x]; + keys[2] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 2) + threadIdx.x]; + keys[3] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 3) + threadIdx.x]; + + if (B40C_FERMI(__CUDA_ARCH__)) __syncthreads(); + + keys[4] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 4) + threadIdx.x]; + keys[5] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 5) + threadIdx.x]; + keys[6] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 6) + threadIdx.x]; + keys[7] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 7) + threadIdx.x]; + + Bucket(keys[0], encoded_carry); + Bucket(keys[1], encoded_carry); + Bucket(keys[2], encoded_carry); + Bucket(keys[3], encoded_carry); + Bucket(keys[4], encoded_carry); + Bucket(keys[5], encoded_carry); + Bucket(keys[6], encoded_carry); + Bucket(keys[7], encoded_carry); + } +}; + +template +struct LoadOp { + + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 8), encoded_carry); + } +}; + +template +struct LoadOp { + + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 16), encoded_carry); + } +}; + +template +struct LoadOp { + + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 32), encoded_carry); + } +}; + +template +struct LoadOp { + + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 64), encoded_carry); + } +}; + +template +struct LoadOp { + + static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) + { + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 128), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 192), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 224), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 240), encoded_carry); + LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 248), encoded_carry); + } +}; + + +template +__device__ __forceinline__ void ResetEncodedCarry( + int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) +{ + #pragma unroll + for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES; SCAN_LANE++) { + encoded_carry[SCAN_LANE][threadIdx.x] = 0; + } +} + + +template +__device__ __forceinline__ int ProcessLoads( + K *d_in_keys, + int loads, + int &offset, + int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS], + int local_counts[LANES_PER_WARP][4]) +{ + // Unroll batches of loads with occasional reduction to avoid overflow + while (loads >= 32) { + + LoadOp::BlockOfLoads(d_in_keys, offset, encoded_carry); + offset += B40C_RADIXSORT_THREADS * 32; + loads -= 32; + + // Reduce int local count registers to prevent overflow + ReduceEncodedCounts( + local_counts, + encoded_carry); + + // Reset encoded counters + ResetEncodedCarry(encoded_carry); + } + + int retval = loads; + + // Wind down loads in decreasing batch sizes + + while (loads >= 4) { + LoadOp::BlockOfLoads(d_in_keys, offset, encoded_carry); + offset += B40C_RADIXSORT_THREADS * 4; + loads -= 4; + } + + while (loads) { + LoadOp::BlockOfLoads(d_in_keys, offset, encoded_carry); + offset += B40C_RADIXSORT_THREADS * 1; + loads--; + } + + return retval; +} + + +/****************************************************************************** + * Reduction/counting Kernel Entry Point + ******************************************************************************/ + +template +__launch_bounds__ (B40C_RADIXSORT_THREADS, B40C_RADIXSORT_REDUCE_CTA_OCCUPANCY(__CUDA_ARCH__)) +__global__ +void RakingReduction( + bool *d_from_alt_storage, + int *d_spine, + K *d_in_keys, + K *d_out_keys, + CtaDecomposition work_decomposition) +{ + const int RADIX_DIGITS = 1 << RADIX_BITS; + + const int LOG_SCAN_LANES = (RADIX_BITS >= 2) ? RADIX_BITS - 2 : 0; // Always at least one fours group + const int SCAN_LANES = 1 << LOG_SCAN_LANES; + + const int LOG_LANES_PER_WARP = (SCAN_LANES > B40C_RADIXSORT_WARPS) ? LOG_SCAN_LANES - B40C_RADIXSORT_LOG_WARPS : 0; // Always at least one fours group per warp + const int LANES_PER_WARP = 1 << LOG_LANES_PER_WARP; + + + // Each thread gets its own column of fours-groups (for conflict-free updates) + __shared__ int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]; + + // Each thread is also responsible for aggregating an unencoded segment of a fours-group + int local_counts[LANES_PER_WARP][4]; + + // Determine where to read our input + bool from_alt_storage = (PASS == 0) ? false : d_from_alt_storage[PASS & 0x1]; + if (from_alt_storage) d_in_keys = d_out_keys; + + // Calculate our threadblock's range + int offset, block_elements; + if (blockIdx.x < work_decomposition.num_big_blocks) { + offset = work_decomposition.big_block_elements * blockIdx.x; + block_elements = work_decomposition.big_block_elements; + } else { + offset = (work_decomposition.normal_block_elements * blockIdx.x) + (work_decomposition.num_big_blocks * B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V)); + block_elements = work_decomposition.normal_block_elements; + } + + // Initialize local counts + #pragma unroll + for (int LANE = 0; LANE < (int) LANES_PER_WARP; LANE++) { + local_counts[LANE][0] = 0; + local_counts[LANE][1] = 0; + local_counts[LANE][2] = 0; + local_counts[LANE][3] = 0; + } + + // Reset encoded counters + ResetEncodedCarry(encoded_carry); + + // Process loads + int loads = block_elements >> B40C_RADIXSORT_LOG_THREADS; + int unreduced_loads = ProcessLoads( + d_in_keys, + loads, + offset, + encoded_carry, + local_counts); + + // Cleanup if we're the last block + if ((blockIdx.x == gridDim.x - 1) && (work_decomposition.extra_elements_last_block)) { + + const int LOADS_PER_CYCLE = B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) / B40C_RADIXSORT_THREADS; + + // If extra guarded loads may cause overflow, reduce now and reset counters + if (unreduced_loads + LOADS_PER_CYCLE > 255) { + + ReduceEncodedCounts( + local_counts, + encoded_carry); + + ResetEncodedCarry(encoded_carry); + } + + // perform up to LOADS_PER_CYCLE extra guarded loads + #pragma unroll + for (int EXTRA_LOAD = 0; EXTRA_LOAD < (int) LOADS_PER_CYCLE; EXTRA_LOAD++) { + if (threadIdx.x + (B40C_RADIXSORT_THREADS * EXTRA_LOAD) < work_decomposition.extra_elements_last_block) { + K key = d_in_keys[offset + (B40C_RADIXSORT_THREADS * EXTRA_LOAD) + threadIdx.x]; + Bucket(key, encoded_carry); + } + } + } + + // Aggregate + ReduceEncodedCounts( + local_counts, + encoded_carry); + + // Write carry in parallel (carries per row are in the first four bytes of each row) + if (threadIdx.x < RADIX_DIGITS) { + + int row = threadIdx.x >> 2; + int col = threadIdx.x & 3; + d_spine[(gridDim.x * threadIdx.x) + blockIdx.x] = encoded_carry[row][col]; + } +} + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h new file mode 100644 index 0000000..1377999 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h @@ -0,0 +1,1207 @@ +/****************************************************************************** + * + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + * + ******************************************************************************/ + + +/****************************************************************************** +// Bottom-level digit scanning/scattering kernel + ******************************************************************************/ + +#pragma once + +#include "radixsort_kernel_common.h" + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + +/****************************************************************************** + * Appropriate substitutes to use for out-of-bounds key (and value) offsets + ******************************************************************************/ + +template +__device__ __forceinline__ T DefaultextraValue() { + return T(); +} + +template <> +__device__ __forceinline__ unsigned char DefaultextraValue() { + return (unsigned char) -1; +} + +template <> +__device__ __forceinline__ unsigned short DefaultextraValue() { + return (unsigned short) -1; +} + +template <> +__device__ __forceinline__ unsigned int DefaultextraValue() { + return (unsigned int) -1u; +} + +template <> +__device__ __forceinline__ unsigned long DefaultextraValue() { + return (unsigned long) -1ul; +} + +template <> +__device__ __forceinline__ unsigned long long DefaultextraValue() { + return (unsigned long long) -1ull; +} + + +/****************************************************************************** + * Cycle-processing Routines + ******************************************************************************/ + +template +__device__ __forceinline__ int DecodeDigit(K key) +{ + const K DIGIT_MASK = RADIX_DIGITS - 1; + return (key >> BIT) & DIGIT_MASK; +} + + +template +__device__ __forceinline__ void DecodeDigit( + K key, + int &digit, + int &flag_offset, // in bytes + const int SET_OFFSET) +{ + const int PADDED_BYTES_PER_LANE = PADDED_PARTIALS_PER_LANE * 4; + const int SET_OFFSET_BYTES = SET_OFFSET * 4; + const K QUAD_MASK = (RADIX_DIGITS < 4) ? 0x1 : 0x3; + + digit = DecodeDigit(key); + int lane = digit >> 2; + int quad_byte = digit & QUAD_MASK; + + flag_offset = SET_OFFSET_BYTES + FastMul(lane, PADDED_BYTES_PER_LANE) + quad_byte; +} + + +template +__device__ __forceinline__ void DecodeDigits( + typename VecType::Type keypairs[SETS_PER_PASS], + int2 digits[SETS_PER_PASS], + int2 flag_offsets[SETS_PER_PASS]) // in bytes +{ + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + + const int SET_OFFSET = SET * SCAN_LANES_PER_SET * PADDED_PARTIALS_PER_LANE; + + DecodeDigit( + keypairs[SET].x, digits[SET].x, flag_offsets[SET].x, SET_OFFSET); + + DecodeDigit( + keypairs[SET].y, digits[SET].y, flag_offsets[SET].y, SET_OFFSET); + } +} + + +template +__device__ __forceinline__ void GuardedReadSet( + T *in, + typename VecType::Type &pair, + int offset, + int extra[1], + PreprocessFunctor preprocess = PreprocessFunctor()) +{ + if (offset - extra[0] < 0) { + pair.x = in[offset]; + preprocess(pair.x); + } else { + pair.x = DefaultextraValue(); + } + + if (offset + 1 - extra[0] < 0) { + pair.y = in[offset + 1]; + preprocess(pair.y); + } else { + pair.y = DefaultextraValue(); + } +} + + +template +__device__ __forceinline__ void ReadSets( + typename VecType::Type *d_in, + typename VecType::Type pairs[SETS_PER_PASS], + const int BASE2, + int extra[1], + PreprocessFunctor preprocess = PreprocessFunctor()) +{ + if (UNGUARDED_IO) { + + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler makes it 1% slower + if (SETS_PER_PASS > 0) pairs[0] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 0)]; + if (SETS_PER_PASS > 1) pairs[1] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 1)]; + if (SETS_PER_PASS > 2) pairs[2] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 2)]; + if (SETS_PER_PASS > 3) pairs[3] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 3)]; + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + preprocess(pairs[SET].x); + preprocess(pairs[SET].y); + } + + } else { + + T* in = (T*) d_in; + + // N.B. -- I wish we could do some pragma unrolling here, but the compiler won't let + // us with user-defined value types (e.g., Fribbitz): "Advisory: Loop was not unrolled, cannot deduce loop trip count" + + if (SETS_PER_PASS > 0) GuardedReadSet(in, pairs[0], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 0), extra); + if (SETS_PER_PASS > 1) GuardedReadSet(in, pairs[1], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 1), extra); + if (SETS_PER_PASS > 2) GuardedReadSet(in, pairs[2], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 2), extra); + if (SETS_PER_PASS > 3) GuardedReadSet(in, pairs[3], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 3), extra); + } +} + + +template +__device__ __forceinline__ void PlacePartials( + unsigned char * base_partial, + int2 digits[SETS_PER_PASS], + int2 flag_offsets[SETS_PER_PASS]) +{ + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + base_partial[flag_offsets[SET].x] = 1; + base_partial[flag_offsets[SET].y] = 1 + (digits[SET].x == digits[SET].y); + } +} + + +template +__device__ __forceinline__ void ExtractRanks( + unsigned char * base_partial, + int2 digits[SETS_PER_PASS], + int2 flag_offsets[SETS_PER_PASS], + int2 ranks[SETS_PER_PASS]) +{ + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + ranks[SET].x = base_partial[flag_offsets[SET].x]; + ranks[SET].y = base_partial[flag_offsets[SET].y] + (digits[SET].x == digits[SET].y); + } +} + + +template +__device__ __forceinline__ void UpdateRanks( + int2 digits[SETS_PER_PASS], + int2 ranks[SETS_PER_PASS], + int digit_counts[SETS_PER_PASS][RADIX_DIGITS]) +{ + // N.B.: I wish we could pragma unroll here, but doing so currently + // results in the 3.1 compilier on 64-bit platforms generating bad + // code for SM1.3, resulting in incorrect sorting (e.g., problem size 16) + + if (SETS_PER_PASS > 0) { + ranks[0].x += digit_counts[0][digits[0].x]; + ranks[0].y += digit_counts[0][digits[0].y]; + } + if (SETS_PER_PASS > 1) { + ranks[1].x += digit_counts[1][digits[1].x]; + ranks[1].y += digit_counts[1][digits[1].y]; + } + if (SETS_PER_PASS > 2) { + ranks[2].x += digit_counts[2][digits[2].x]; + ranks[2].y += digit_counts[2][digits[2].y]; + } + if (SETS_PER_PASS > 3) { + ranks[3].x += digit_counts[3][digits[3].x]; + ranks[3].y += digit_counts[3][digits[3].y]; + } +} + +template +__device__ __forceinline__ void UpdateRanks( + int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS], + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], + int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS]) +{ + // N.B.: I wish we could pragma unroll here, but doing so currently + // results in the 3.1 compilier on 64-bit platforms generating bad + // code for SM1.3, resulting in incorrect sorting (e.g., problem size 16) + + if (PASSES_PER_CYCLE > 0) UpdateRanks(digits[0], ranks[0], digit_counts[0]); + if (PASSES_PER_CYCLE > 1) UpdateRanks(digits[1], ranks[1], digit_counts[1]); + if (PASSES_PER_CYCLE > 2) UpdateRanks(digits[2], ranks[2], digit_counts[2]); + if (PASSES_PER_CYCLE > 3) UpdateRanks(digits[3], ranks[3], digit_counts[3]); +} + + + +template +__device__ __forceinline__ void PrefixScanOverLanes( + int raking_segment[], + int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], + int copy_section) +{ + // Upsweep rake + int partial_reduction = SerialReduce(raking_segment); + + // Warpscan reduction in digit warpscan_lane + int warpscan_lane = threadIdx.x >> LOG_RAKING_THREADS_PER_LANE; + int group_prefix = WarpScan( + warpscan[warpscan_lane], + partial_reduction, + copy_section); + + // Downsweep rake + SerialScan(raking_segment, group_prefix); + +} + + +template +__device__ __forceinline__ void RecoverDigitCounts( + int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], + int counts[SETS_PER_PASS], + int copy_section) +{ + int my_lane = threadIdx.x >> 2; + int my_quad_byte = threadIdx.x & 3; + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + unsigned char *warpscan_count = (unsigned char *) &warpscan[my_lane + (SCAN_LANES_PER_SET * SET)][1 + copy_section][RAKING_THREADS_PER_LANE - 1]; + counts[SET] = warpscan_count[my_quad_byte]; + } +} + +template +__device__ __forceinline__ void CorrectUnguardedSetOverflow( + int2 set_digits, + int &set_count) +{ + if (WarpVoteAll(RADIX_DIGITS, set_count <= 1)) { + // All first-pass, first set keys have same digit. + set_count = (threadIdx.x == set_digits.x) ? 256 : 0; + } +} + +template +__device__ __forceinline__ void CorrectUnguardedPassOverflow( + int2 pass_digits[SETS_PER_PASS], + int pass_counts[SETS_PER_PASS]) +{ + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, + // telling me "Advisory: Loop was not unrolled, unexpected call OPs" + + if (SETS_PER_PASS > 0) CorrectUnguardedSetOverflow(pass_digits[0], pass_counts[0]); + if (SETS_PER_PASS > 1) CorrectUnguardedSetOverflow(pass_digits[1], pass_counts[1]); + if (SETS_PER_PASS > 2) CorrectUnguardedSetOverflow(pass_digits[2], pass_counts[2]); + if (SETS_PER_PASS > 3) CorrectUnguardedSetOverflow(pass_digits[3], pass_counts[3]); +} + + +template +__device__ __forceinline__ void CorrectUnguardedCycleOverflow( + int2 cycle_digits[PASSES_PER_CYCLE][SETS_PER_PASS], + int cycle_counts[PASSES_PER_CYCLE][SETS_PER_PASS]) +{ + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, + // telling me "Advisory: Loop was not unrolled, unexpected call OPs" + + if (PASSES_PER_CYCLE > 0) CorrectUnguardedPassOverflow(cycle_digits[0], cycle_counts[0]); + if (PASSES_PER_CYCLE > 1) CorrectUnguardedPassOverflow(cycle_digits[1], cycle_counts[1]); +} + + +template +__device__ __forceinline__ void CorrectLastLaneOverflow(int &count, int extra[1]) +{ + if (WarpVoteAll(RADIX_DIGITS, count == 0) && (threadIdx.x == RADIX_DIGITS - 1)) { + // We're 'f' and we overflowed b/c of invalid 'f' placemarkers; the number of valid items in this set is the count of valid f's + count = extra[0] & 255; + } +} + + +template +__device__ __forceinline__ void CorrectForOverflows( + int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS], + int counts[PASSES_PER_CYCLE][SETS_PER_PASS], + int extra[1]) +{ + if (!UNGUARDED_IO) { + + // Correct any overflow in the partially-filled last lane + int *linear_counts = (int *) counts; + CorrectLastLaneOverflow(linear_counts[SETS_PER_CYCLE - 1], extra); + } + + CorrectUnguardedCycleOverflow(digits, counts); +} + + +template < + typename K, + int BIT, + int RADIX_DIGITS, + int SCAN_LANES_PER_SET, + int SETS_PER_PASS, + int RAKING_THREADS_PER_PASS, + int SCAN_LANES_PER_PASS, + int LOG_RAKING_THREADS_PER_LANE, + int RAKING_THREADS_PER_LANE, + int PARTIALS_PER_SEG, + int PADDED_PARTIALS_PER_LANE, + int PASSES_PER_CYCLE> +__device__ __forceinline__ void ScanPass( + int *base_partial, + int *raking_partial, + int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], + typename VecType::Type keypairs[SETS_PER_PASS], + int2 digits[SETS_PER_PASS], + int2 flag_offsets[SETS_PER_PASS], + int2 ranks[SETS_PER_PASS], + int copy_section) +{ + // Reset smem + #pragma unroll + for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES_PER_PASS; SCAN_LANE++) { + base_partial[SCAN_LANE * PADDED_PARTIALS_PER_LANE] = 0; + } + + // Decode digits for first pass + DecodeDigits( + keypairs, digits, flag_offsets); + + // Encode counts into smem for first pass + PlacePartials( + (unsigned char *) base_partial, + digits, + flag_offsets); + + __syncthreads(); + + // Intra-group prefix scans for first pass + if (threadIdx.x < RAKING_THREADS_PER_PASS) { + + PrefixScanOverLanes( // first pass is offset right by one + raking_partial, + warpscan, + copy_section); + } + + __syncthreads(); + + // Extract ranks + ExtractRanks( + (unsigned char *) base_partial, + digits, + flag_offsets, + ranks); +} + + +/****************************************************************************** + * SM1.3 Local Exchange Routines + * + * Routines for exchanging keys (and values) in shared memory (i.e., local + * scattering) in order to to facilitate coalesced global scattering + ******************************************************************************/ + +template +__device__ __forceinline__ void ScatterSets( + T *d_out, + typename VecType::Type pairs[SETS_PER_PASS], + int2 offsets[SETS_PER_PASS], + const int BASE4, + int extra[1], + PostprocessFunctor postprocess = PostprocessFunctor()) +{ + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + postprocess(pairs[SET].x); + postprocess(pairs[SET].y); + } + + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler makes it 1% slower + + if (SETS_PER_PASS > 0) { + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 0) < extra[0])) + d_out[offsets[0].x] = pairs[0].x; + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 1) < extra[0])) + d_out[offsets[0].y] = pairs[0].y; + } + + if (SETS_PER_PASS > 1) { + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 2) < extra[0])) + d_out[offsets[1].x] = pairs[1].x; + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 3) < extra[0])) + d_out[offsets[1].y] = pairs[1].y; + } + + if (SETS_PER_PASS > 2) { + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 4) < extra[0])) + d_out[offsets[2].x] = pairs[2].x; + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 5) < extra[0])) + d_out[offsets[2].y] = pairs[2].y; + } + + if (SETS_PER_PASS > 3) { + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 6) < extra[0])) + d_out[offsets[3].x] = pairs[3].x; + if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 7) < extra[0])) + d_out[offsets[3].y] = pairs[3].y; + } +} + +template +__device__ __forceinline__ void PushPairs( + T *swap, + typename VecType::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS]) +{ + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + swap[ranks[PASS][SET].x] = pairs[PASS][SET].x; + swap[ranks[PASS][SET].y] = pairs[PASS][SET].y; + } + } +} + +template +__device__ __forceinline__ void ExchangePairs( + T *swap, + typename VecType::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS]) +{ + // Push in Pairs + PushPairs(swap, pairs, ranks); + + __syncthreads(); + + // Extract pairs + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + const int BLOCK = ((PASS * SETS_PER_PASS) + SET) * 2; + pairs[PASS][SET].x = swap[threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 0))]; + pairs[PASS][SET].y = swap[threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 1))]; + } + } +} + + +template < + typename K, + typename V, + int RADIX_DIGITS, + int BIT, + int PASSES_PER_CYCLE, + int SETS_PER_PASS, + bool UNGUARDED_IO, + typename PostprocessFunctor> +__device__ __forceinline__ void SwapAndScatterSm13( + typename VecType::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS], + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], + int4 *exchange, + typename VecType::Type *d_in_values, + K *d_out_keys, + V *d_out_values, + int carry[RADIX_DIGITS], + int extra[1]) +{ + int2 offsets[PASSES_PER_CYCLE][SETS_PER_PASS]; + + // Swap keys according to ranks + ExchangePairs((K*) exchange, keypairs, ranks); + + // Calculate scatter offsets (re-decode digits from keys: it's less work than making a second exchange of digits) + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + const int BLOCK = ((PASS * SETS_PER_PASS) + SET) * 2; + offsets[PASS][SET].x = threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 0)) + carry[DecodeDigit(keypairs[PASS][SET].x)]; + offsets[PASS][SET].y = threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 1)) + carry[DecodeDigit(keypairs[PASS][SET].y)]; + } + } + + // Scatter keys + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + const int BLOCK = PASS * SETS_PER_PASS * 2; + ScatterSets(d_out_keys, keypairs[PASS], offsets[PASS], B40C_RADIXSORT_THREADS * BLOCK, extra); + } + + if (!IsKeysOnly()) { + + __syncthreads(); + + // Read input data + typename VecType::Type datapairs[PASSES_PER_CYCLE][SETS_PER_PASS]; + + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, + // telling me "Advisory: Loop was not unrolled, unexpected control flow" + + if (PASSES_PER_CYCLE > 0) ReadSets >(d_in_values, datapairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra); + if (PASSES_PER_CYCLE > 1) ReadSets >(d_in_values, datapairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); + + // Swap data according to ranks + ExchangePairs((V*) exchange, datapairs, ranks); + + // Scatter data + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + const int BLOCK = PASS * SETS_PER_PASS * 2; + ScatterSets >(d_out_values, datapairs[PASS], offsets[PASS], B40C_RADIXSORT_THREADS * BLOCK, extra); + } + } +} + + +/****************************************************************************** + * SM1.0 Local Exchange Routines + * + * Routines for exchanging keys (and values) in shared memory (i.e., local + * scattering) in order to to facilitate coalesced global scattering + ******************************************************************************/ + +template < + typename T, + int RADIX_DIGITS, + bool UNGUARDED_IO, + typename PostprocessFunctor> +__device__ __forceinline__ void ScatterPass( + T *swapmem, + T *d_out, + int digit_scan[2][RADIX_DIGITS], + int carry[RADIX_DIGITS], + int extra[1], + int base_digit, + PostprocessFunctor postprocess = PostprocessFunctor()) +{ + const int LOG_STORE_TXN_THREADS = B40C_LOG_MEM_BANKS(__CUDA_ARCH__); + const int STORE_TXN_THREADS = 1 << LOG_STORE_TXN_THREADS; + + int store_txn_idx = threadIdx.x & (STORE_TXN_THREADS - 1); + int store_txn_digit = threadIdx.x >> LOG_STORE_TXN_THREADS; + + int my_digit = base_digit + store_txn_digit; + if (my_digit < RADIX_DIGITS) { + + int my_exclusive_scan = digit_scan[1][my_digit - 1]; + int my_inclusive_scan = digit_scan[1][my_digit]; + int my_digit_count = my_inclusive_scan - my_exclusive_scan; + + int my_carry = carry[my_digit] + my_exclusive_scan; + int my_aligned_offset = store_txn_idx - (my_carry & (STORE_TXN_THREADS - 1)); + + while (my_aligned_offset < my_digit_count) { + + if ((my_aligned_offset >= 0) && (UNGUARDED_IO || (my_exclusive_scan + my_aligned_offset < extra[0]))) { + + T datum = swapmem[my_exclusive_scan + my_aligned_offset]; + postprocess(datum); + d_out[my_carry + my_aligned_offset] = datum; + } + my_aligned_offset += STORE_TXN_THREADS; + } + } +} + +template < + typename T, + int RADIX_DIGITS, + int PASSES_PER_CYCLE, + int SETS_PER_PASS, + bool UNGUARDED_IO, + typename PostprocessFunctor> +__device__ __forceinline__ void SwapAndScatterPairs( + typename VecType::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], + T *exchange, + T *d_out, + int carry[RADIX_DIGITS], + int digit_scan[2][RADIX_DIGITS], + int extra[1]) +{ + const int SCATTER_PASS_DIGITS = B40C_RADIXSORT_WARPS * (B40C_WARP_THREADS / B40C_MEM_BANKS(__CUDA_ARCH__)); + const int SCATTER_PASSES = RADIX_DIGITS / SCATTER_PASS_DIGITS; + + // Push in pairs + PushPairs(exchange, pairs, ranks); + + __syncthreads(); + + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, + // telling me "Advisory: Loop was not unrolled, not an innermost loop" + + if (SCATTER_PASSES > 0) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 0); + if (SCATTER_PASSES > 1) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 1); + if (SCATTER_PASSES > 2) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 2); + if (SCATTER_PASSES > 3) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 3); + if (SCATTER_PASSES > 4) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 4); + if (SCATTER_PASSES > 5) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 5); + if (SCATTER_PASSES > 6) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 6); + if (SCATTER_PASSES > 7) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 7); +} + + +template < + typename K, + typename V, + int RADIX_DIGITS, + int PASSES_PER_CYCLE, + int SETS_PER_PASS, + bool UNGUARDED_IO, + typename PostprocessFunctor> +__device__ __forceinline__ void SwapAndScatterSm10( + typename VecType::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS], + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], + int4 *exchange, + typename VecType::Type *d_in_values, + K *d_out_keys, + V *d_out_values, + int carry[RADIX_DIGITS], + int digit_scan[2][RADIX_DIGITS], + int extra[1]) +{ + // Swap and scatter keys + SwapAndScatterPairs( + keypairs, ranks, (K*) exchange, d_out_keys, carry, digit_scan, extra); + + if (!IsKeysOnly()) { + + __syncthreads(); + + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, + // telling me "Advisory: Loop was not unrolled, unexpected control flow" + + // Read input data + typename VecType::Type datapairs[PASSES_PER_CYCLE][SETS_PER_PASS]; + if (PASSES_PER_CYCLE > 0) ReadSets >(d_in_values, datapairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra); + if (PASSES_PER_CYCLE > 1) ReadSets >(d_in_values, datapairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); + + // Swap and scatter data + SwapAndScatterPairs >( + datapairs, ranks, (V*) exchange, d_out_values, carry, digit_scan, extra); + } +} + + +/****************************************************************************** + * Cycle of RADIXSORT_CYCLE_ELEMENTS keys (and values) + ******************************************************************************/ + +template < + typename K, + typename V, + int BIT, + bool UNGUARDED_IO, + int RADIX_DIGITS, + int LOG_SCAN_LANES_PER_SET, + int SCAN_LANES_PER_SET, + int SETS_PER_PASS, + int PASSES_PER_CYCLE, + int LOG_SCAN_LANES_PER_PASS, + int SCAN_LANES_PER_PASS, + int LOG_PARTIALS_PER_LANE, + int LOG_PARTIALS_PER_PASS, + int LOG_RAKING_THREADS_PER_PASS, + int RAKING_THREADS_PER_PASS, + int LOG_RAKING_THREADS_PER_LANE, + int RAKING_THREADS_PER_LANE, + int LOG_PARTIALS_PER_SEG, + int PARTIALS_PER_SEG, + int LOG_PARTIALS_PER_ROW, + int PARTIALS_PER_ROW, + int LOG_SEGS_PER_ROW, + int SEGS_PER_ROW, + int LOG_ROWS_PER_SET, + int LOG_ROWS_PER_LANE, + int ROWS_PER_LANE, + int LOG_ROWS_PER_PASS, + int ROWS_PER_PASS, + int MAX_EXCHANGE_BYTES, + typename PreprocessFunctor, + typename PostprocessFunctor> + +__device__ __forceinline__ void SrtsScanDigitCycle( + typename VecType::Type *d_in_keys, + typename VecType::Type *d_in_values, + K *d_out_keys, + V *d_out_values, + int4 *exchange, + int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], + int carry[RADIX_DIGITS], + int digit_scan[2][RADIX_DIGITS], + int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS], + int extra[1], + int *base_partial, + int *raking_partial) +{ + + const int PADDED_PARTIALS_PER_LANE = ROWS_PER_LANE * (PARTIALS_PER_ROW + 1); + const int SETS_PER_CYCLE = PASSES_PER_CYCLE * SETS_PER_PASS; + + // N.B.: We use the following voodoo incantations to elide the compiler's miserable + // "declared but never referenced" warnings for these (which are actually used for + // template instantiation) + SuppressUnusedConstantWarning(PADDED_PARTIALS_PER_LANE); + SuppressUnusedConstantWarning(SETS_PER_CYCLE); + + typename VecType::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS]; + int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS]; + int2 flag_offsets[PASSES_PER_CYCLE][SETS_PER_PASS]; // a byte offset + int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS]; + + + //------------------------------------------------------------------------- + // Read keys + //------------------------------------------------------------------------- + + // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, + // telling me "Advisory: Loop was not unrolled, unexpected control flow construct" + + // Read Keys + if (PASSES_PER_CYCLE > 0) ReadSets(d_in_keys, keypairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra); + if (PASSES_PER_CYCLE > 1) ReadSets(d_in_keys, keypairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); + + //------------------------------------------------------------------------- + // Lane-scanning Passes + //------------------------------------------------------------------------- + + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + + // First Pass + ScanPass( + base_partial, + raking_partial, + warpscan, + keypairs[PASS], + digits[PASS], + flag_offsets[PASS], + ranks[PASS], + PASSES_PER_CYCLE - PASS - 1); // lower passes get copied right + } + + //------------------------------------------------------------------------- + // Digit-scanning + //------------------------------------------------------------------------- + + // Recover second-half digit-counts, scan across all digit-counts + if (threadIdx.x < RADIX_DIGITS) { + + int counts[PASSES_PER_CYCLE][SETS_PER_PASS]; + + // Recover digit-counts + + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + RecoverDigitCounts( // first pass, offset by 1 + warpscan, + counts[PASS], + PASSES_PER_CYCLE - PASS - 1); // lower passes get copied right + } + + // Check for overflows + CorrectForOverflows( + digits, counts, extra); + + // Scan across my digit counts for each set + int exclusive_total = 0; + int inclusive_total = 0; + + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + inclusive_total += counts[PASS][SET]; + counts[PASS][SET] = exclusive_total; + exclusive_total = inclusive_total; + } + } + + // second half of carry update + int my_carry = carry[threadIdx.x] + digit_scan[1][threadIdx.x]; + + // Perform overflow-free SIMD Kogge-Stone across digits + int digit_prefix = WarpScan( + digit_scan, + inclusive_total, + 0); + + // first-half of carry update + carry[threadIdx.x] = my_carry - digit_prefix; + + #pragma unroll + for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { + + #pragma unroll + for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { + digit_counts[PASS][SET][threadIdx.x] = counts[PASS][SET] + digit_prefix; + } + } + } + + __syncthreads(); + + //------------------------------------------------------------------------- + // Update Ranks + //------------------------------------------------------------------------- + + UpdateRanks(digits, ranks, digit_counts); + + + //------------------------------------------------------------------------- + // Scatter + //------------------------------------------------------------------------- + +#if ((__CUDA_ARCH__ < 130) || FERMI_ECC) + + SwapAndScatterSm10( + keypairs, + ranks, + exchange, + d_in_values, + d_out_keys, + d_out_values, + carry, + digit_scan, + extra); + +#else + + SwapAndScatterSm13( + keypairs, + ranks, + exchange, + d_in_values, + d_out_keys, + d_out_values, + carry, + extra); + +#endif + + __syncthreads(); + +} + + + +/****************************************************************************** + * Scan/Scatter Kernel Entry Point + ******************************************************************************/ + +template < + typename K, + typename V, + int PASS, + int RADIX_BITS, + int BIT, + typename PreprocessFunctor, + typename PostprocessFunctor> +__launch_bounds__ (B40C_RADIXSORT_THREADS, B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(__CUDA_ARCH__)) +__global__ +void ScanScatterDigits( + bool *d_from_alt_storage, + int* d_spine, + K* d_in_keys, + K* d_out_keys, + V* d_in_values, + V* d_out_values, + CtaDecomposition work_decomposition) +{ + + const int RADIX_DIGITS = 1 << RADIX_BITS; + + const int LOG_SCAN_LANES_PER_SET = (RADIX_BITS > 2) ? RADIX_BITS - 2 : 0; // Always at one lane per set + const int SCAN_LANES_PER_SET = 1 << LOG_SCAN_LANES_PER_SET; // N.B.: we have "declared but never referenced" warnings for these, but they're actually used for template instantiation + + const int LOG_SETS_PER_PASS = B40C_RADIXSORT_LOG_SETS_PER_PASS(__CUDA_ARCH__); + const int SETS_PER_PASS = 1 << LOG_SETS_PER_PASS; + + const int LOG_PASSES_PER_CYCLE = B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(__CUDA_ARCH__, K, V); + const int PASSES_PER_CYCLE = 1 << LOG_PASSES_PER_CYCLE; + + const int LOG_SCAN_LANES_PER_PASS = LOG_SETS_PER_PASS + LOG_SCAN_LANES_PER_SET; + const int SCAN_LANES_PER_PASS = 1 << LOG_SCAN_LANES_PER_PASS; + + const int LOG_PARTIALS_PER_LANE = B40C_RADIXSORT_LOG_THREADS; + + const int LOG_PARTIALS_PER_PASS = LOG_SCAN_LANES_PER_PASS + LOG_PARTIALS_PER_LANE; + + const int LOG_RAKING_THREADS_PER_PASS = B40C_RADIXSORT_LOG_RAKING_THREADS_PER_PASS(__CUDA_ARCH__); + const int RAKING_THREADS_PER_PASS = 1 << LOG_RAKING_THREADS_PER_PASS; + + const int LOG_RAKING_THREADS_PER_LANE = LOG_RAKING_THREADS_PER_PASS - LOG_SCAN_LANES_PER_PASS; + const int RAKING_THREADS_PER_LANE = 1 << LOG_RAKING_THREADS_PER_LANE; + + const int LOG_PARTIALS_PER_SEG = LOG_PARTIALS_PER_LANE - LOG_RAKING_THREADS_PER_LANE; + const int PARTIALS_PER_SEG = 1 << LOG_PARTIALS_PER_SEG; + + const int LOG_PARTIALS_PER_ROW = (LOG_PARTIALS_PER_SEG < B40C_LOG_MEM_BANKS(__CUDA_ARCH__)) ? B40C_LOG_MEM_BANKS(__CUDA_ARCH__) : LOG_PARTIALS_PER_SEG; // floor of MEM_BANKS partials per row + const int PARTIALS_PER_ROW = 1 << LOG_PARTIALS_PER_ROW; + const int PADDED_PARTIALS_PER_ROW = PARTIALS_PER_ROW + 1; + + const int LOG_SEGS_PER_ROW = LOG_PARTIALS_PER_ROW - LOG_PARTIALS_PER_SEG; + const int SEGS_PER_ROW = 1 << LOG_SEGS_PER_ROW; + + const int LOG_ROWS_PER_SET = LOG_PARTIALS_PER_PASS - LOG_PARTIALS_PER_ROW; + + const int LOG_ROWS_PER_LANE = LOG_PARTIALS_PER_LANE - LOG_PARTIALS_PER_ROW; + const int ROWS_PER_LANE = 1 << LOG_ROWS_PER_LANE; + + const int LOG_ROWS_PER_PASS = LOG_SCAN_LANES_PER_PASS + LOG_ROWS_PER_LANE; + const int ROWS_PER_PASS = 1 << LOG_ROWS_PER_PASS; + + const int SCAN_LANE_BYTES = ROWS_PER_PASS * PADDED_PARTIALS_PER_ROW * sizeof(int); + const int MAX_EXCHANGE_BYTES = (sizeof(K) > sizeof(V)) ? + B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) * sizeof(K) : + B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) * sizeof(V); + const int SCAN_LANE_INT4S = (B40C_MAX(MAX_EXCHANGE_BYTES, SCAN_LANE_BYTES) + sizeof(int4) - 1) / sizeof(int4); + + + // N.B.: We use the following voodoo incantations to elide the compiler's miserable + // "declared but never referenced" warnings for these (which are actually used for + // template instantiation) + SuppressUnusedConstantWarning(SCAN_LANES_PER_SET); + SuppressUnusedConstantWarning(PARTIALS_PER_SEG); + SuppressUnusedConstantWarning(LOG_ROWS_PER_SET); + SuppressUnusedConstantWarning(ROWS_PER_LANE); + + // scan_lanes is a int4[] to avoid alignment issues when casting to (K *) and/or (V *) + __shared__ int4 scan_lanes[SCAN_LANE_INT4S]; + __shared__ int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE]; // One warpscan per fours-group + __shared__ int carry[RADIX_DIGITS]; + __shared__ int digit_scan[2][RADIX_DIGITS]; + __shared__ int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS]; + __shared__ bool non_trivial_digit_pass; + __shared__ bool from_alt_storage; + + _B40C_REG_MISER_QUALIFIER_ int extra[1]; + _B40C_REG_MISER_QUALIFIER_ int oob[1]; + + extra[0] = (blockIdx.x == gridDim.x - 1) ? work_decomposition.extra_elements_last_block : 0; + + // calculate our threadblock's range + int block_elements, block_offset; + if (blockIdx.x < work_decomposition.num_big_blocks) { + block_offset = work_decomposition.big_block_elements * blockIdx.x; + block_elements = work_decomposition.big_block_elements; + } else { + block_offset = (work_decomposition.normal_block_elements * blockIdx.x) + (work_decomposition.num_big_blocks * B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V)); + block_elements = work_decomposition.normal_block_elements; + } + oob[0] = block_offset + block_elements; // out-of-bounds + + + // location for placing 2-element partial reductions in the first lane of a pass + int row = threadIdx.x >> LOG_PARTIALS_PER_ROW; + int col = threadIdx.x & (PARTIALS_PER_ROW - 1); + int *base_partial = reinterpret_cast(scan_lanes) + (row * PADDED_PARTIALS_PER_ROW) + col; + + // location for raking across all sets within a pass + int *raking_partial = 0; + + if (threadIdx.x < RAKING_THREADS_PER_PASS) { + + // initalize lane warpscans + if (threadIdx.x < RAKING_THREADS_PER_LANE) { + + #pragma unroll + for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES_PER_PASS; SCAN_LANE++) { + warpscan[SCAN_LANE][0][threadIdx.x] = 0; + } + } + + // initialize digit warpscans + if (threadIdx.x < RADIX_DIGITS) { + + // Initialize digit_scan + digit_scan[0][threadIdx.x] = 0; + digit_scan[1][threadIdx.x] = 0; + + // Determine where to read our input + from_alt_storage = (PASS == 0) ? false : d_from_alt_storage[PASS & 0x1]; + + // Read carry in parallel + int spine_digit_offset = FastMul(gridDim.x, threadIdx.x); + int my_digit_carry = d_spine[spine_digit_offset + blockIdx.x]; + carry[threadIdx.x] = my_digit_carry; + + // Determine whether or not we have work to do and setup the next round + // accordingly. Everybody but the first threadblock can determine this + // from the number of non-zero-and-non-oob digit carries. First block + // needs someone else's because he always writes the zero offset. + + int predicate; + if (PreprocessFunctor::MustApply() || PostprocessFunctor::MustApply()) { + + non_trivial_digit_pass = true; + + } else { + + if (blockIdx.x > 0) { + // Non-first CTA : use digit-carry from first block + my_digit_carry = d_spine[spine_digit_offset]; + } + + predicate = ((my_digit_carry > 0) && (my_digit_carry < work_decomposition.num_elements)); + non_trivial_digit_pass = (TallyWarpVote(RADIX_DIGITS, predicate, reinterpret_cast(scan_lanes)) > 0); + } + + // Let the next round know which set of buffers to use + if (blockIdx.x == 0) d_from_alt_storage[(PASS + 1) & 0x1] = from_alt_storage ^ non_trivial_digit_pass; + } + + // initialize raking segment + row = threadIdx.x >> LOG_SEGS_PER_ROW; + col = (threadIdx.x & (SEGS_PER_ROW - 1)) << LOG_PARTIALS_PER_SEG; + raking_partial = reinterpret_cast(scan_lanes) + (row * PADDED_PARTIALS_PER_ROW) + col; + } + + // Sync to acquire non_trivial_digit_pass and from_temp_storage + __syncthreads(); + + // Short-circuit this entire pass + if (!non_trivial_digit_pass) return; + + if (!from_alt_storage) { + + // Scan in tiles of cycle_elements + while (block_offset < oob[0]) { + + SrtsScanDigitCycle( + reinterpret_cast::Type *>((void *) &d_in_keys[block_offset]), + reinterpret_cast::Type *>((void *) &d_in_values[block_offset]), + d_out_keys, + d_out_values, + scan_lanes, + warpscan, + carry, + digit_scan, + digit_counts, + extra, + base_partial, + raking_partial); + + block_offset += B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V); + } + + if (extra[0]) { + + SrtsScanDigitCycle( + reinterpret_cast::Type *>((void *) &d_in_keys[block_offset]), + reinterpret_cast::Type *>((void *) &d_in_values[block_offset]), + d_out_keys, + d_out_values, + scan_lanes, + warpscan, + carry, + digit_scan, + digit_counts, + extra, + base_partial, + raking_partial); + } + + } else { + + // Scan in tiles of cycle_elements + while (block_offset < oob[0]) { + + SrtsScanDigitCycle( + reinterpret_cast::Type *>((void *) &d_out_keys[block_offset]), + reinterpret_cast::Type *>((void *) &d_out_values[block_offset]), + d_in_keys, + d_in_values, + scan_lanes, + warpscan, + carry, + digit_scan, + digit_counts, + extra, + base_partial, + raking_partial); + + block_offset += B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V); + } + + if (extra[0]) { + + SrtsScanDigitCycle( + reinterpret_cast::Type *>((void *) &d_out_keys[block_offset]), + reinterpret_cast::Type *>((void *) &d_out_values[block_offset]), + d_in_keys, + d_in_values, + scan_lanes, + warpscan, + carry, + digit_scan, + digit_counts, + extra, + base_partial, + raking_partial); + } + + } +} + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h new file mode 100644 index 0000000..3d20f4a --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h @@ -0,0 +1,187 @@ +/****************************************************************************** + * + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + * + ******************************************************************************/ + + +/****************************************************************************** + * Top-level histogram/spine scanning kernel + ******************************************************************************/ + +#pragma once + +#include "radixsort_kernel_common.h" + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + +/****************************************************************************** + * Scans a cycle of RADIXSORT_CYCLE_ELEMENTS elements + ******************************************************************************/ + +template +__device__ __forceinline__ void SrtsScanCycle( + int *smem_offset, + int *smem_segment, + int warpscan[2][B40C_WARP_THREADS], + int4 *in, + int4 *out, + int &carry) +{ + int4 datum; + + // read input data + datum = in[threadIdx.x]; + + smem_offset[0] = datum.x + datum.y + datum.z + datum.w; + + __syncthreads(); + + if (threadIdx.x < B40C_WARP_THREADS) { + + int partial_reduction = SerialReduce(smem_segment); + + int seed = WarpScan(warpscan, partial_reduction, 0); + seed += carry; + + SerialScan(smem_segment, seed); + + carry += warpscan[1][B40C_WARP_THREADS - 1]; + } + + __syncthreads(); + + int part0 = smem_offset[0]; + int part1; + + part1 = datum.x + part0; + datum.x = part0; + part0 = part1 + datum.y; + datum.y = part1; + + part1 = datum.z + part0; + datum.z = part0; + part0 = part1 + datum.w; + datum.w = part1; + + out[threadIdx.x] = datum; +} + + +/****************************************************************************** + * Spine/histogram Scan Kernel Entry Point + ******************************************************************************/ + +template +__global__ void SrtsScanSpine( + int *d_ispine, + int *d_ospine, + int normal_block_elements) +{ + const int LOG_PARTIALS = B40C_RADIXSORT_LOG_THREADS; + const int PARTIALS = 1 << LOG_PARTIALS; + + const int LOG_PARTIALS_PER_SEG = LOG_PARTIALS - B40C_LOG_WARP_THREADS; + const int PARTIALS_PER_SEG = 1 << LOG_PARTIALS_PER_SEG; + + const int LOG_PARTIALS_PER_ROW = (LOG_PARTIALS_PER_SEG < B40C_LOG_MEM_BANKS(__CUDA_ARCH__)) ? B40C_LOG_MEM_BANKS(__CUDA_ARCH__) : LOG_PARTIALS_PER_SEG; // floor of 32 elts per row + const int PARTIALS_PER_ROW = 1 << LOG_PARTIALS_PER_ROW; + + const int LOG_SEGS_PER_ROW = LOG_PARTIALS_PER_ROW - LOG_PARTIALS_PER_SEG; + const int SEGS_PER_ROW = 1 << LOG_SEGS_PER_ROW; + + const int SMEM_ROWS = PARTIALS / PARTIALS_PER_ROW; + + __shared__ int smem[SMEM_ROWS][PARTIALS_PER_ROW + 1]; + __shared__ int warpscan[2][B40C_WARP_THREADS]; + + // WAR spurious unused constant warning + SuppressUnusedConstantWarning(PARTIALS_PER_SEG); + + int *smem_segment = 0; + int carry = 0; + + int row = threadIdx.x >> LOG_PARTIALS_PER_ROW; + int col = threadIdx.x & (PARTIALS_PER_ROW - 1); + int *smem_offset = &smem[row][col]; + + if (blockIdx.x > 0) { + return; + } + + if (threadIdx.x < B40C_WARP_THREADS) { + + // two segs per row, odd segs are offset by 8 + row = threadIdx.x >> LOG_SEGS_PER_ROW; + col = (threadIdx.x & (SEGS_PER_ROW - 1)) << LOG_PARTIALS_PER_SEG; + smem_segment = &smem[row][col]; + + if (threadIdx.x < B40C_WARP_THREADS) { + carry = 0; + warpscan[0][threadIdx.x] = 0; + } + } + + // scan the spine in blocks of cycle_elements + int block_offset = 0; + while (block_offset < normal_block_elements) { + + SrtsScanCycle( + smem_offset, + smem_segment, + warpscan, + reinterpret_cast((void *) &d_ispine[block_offset]), + reinterpret_cast((void *) &d_ospine[block_offset]), + carry); + + block_offset += B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS; + } +} + + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h b/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h new file mode 100644 index 0000000..6db7931 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h @@ -0,0 +1,96 @@ +/** + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + */ + +#pragma once + +#include + +namespace thrust { +namespace system { +namespace cuda { +namespace detail { +namespace detail { +namespace b40c_thrust { + +//------------------------------------------------------------------------------ +// Vector types +//------------------------------------------------------------------------------ + +template struct VecType; + + +// +// Define general vector types +// + +template +struct VecType { + K x; + typedef K Type; +}; + +template +struct VecType { + K x; + K y; + typedef VecType Type; +}; + +template +struct VecType { + K x; + K y; + K z; + K w; + typedef VecType Type; +}; + +// +// Specialize certain built-in vector types +// + +#define B40C_DEFINE_VECTOR_TYPE(base_type,short_type) \ + template<> struct VecType { typedef short_type##1 Type; }; \ + template<> struct VecType { typedef short_type##2 Type; }; \ + template<> struct VecType { typedef short_type##4 Type; }; + +B40C_DEFINE_VECTOR_TYPE(char, char) +B40C_DEFINE_VECTOR_TYPE(short, short) +B40C_DEFINE_VECTOR_TYPE(int, int) +B40C_DEFINE_VECTOR_TYPE(long, long) +B40C_DEFINE_VECTOR_TYPE(long long, longlong) +B40C_DEFINE_VECTOR_TYPE(unsigned char, uchar) +B40C_DEFINE_VECTOR_TYPE(unsigned short, ushort) +B40C_DEFINE_VECTOR_TYPE(unsigned int, uint) +B40C_DEFINE_VECTOR_TYPE(unsigned long, ulong) +B40C_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) +B40C_DEFINE_VECTOR_TYPE(float, float) +B40C_DEFINE_VECTOR_TYPE(double, double) + +#undef B40C_DEFINE_VECTOR_TYPE + +} // end namespace b40c_thrust +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/balanced_path.h b/compat/thrust/system/cuda/detail/detail/balanced_path.h new file mode 100644 index 0000000..51e4f5b --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/balanced_path.h @@ -0,0 +1,156 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace balanced_path_detail +{ + +template +__host__ __device__ void BinarySearchIteration(It data, int& begin, int& end, + T key, int shift, Comp comp) { + + IntT scale = (1<< shift) - 1; + int mid = (int)((begin + scale * end)>> shift); + + T key2 = data[mid]; + bool pred = UpperBound ? !comp(key, key2) : comp(key2, key); + if(pred) begin = (int)mid + 1; + else end = mid; +} + +template +__host__ __device__ int BinarySearch(It data, int count, T key, Comp comp) { + int begin = 0; + int end = count; + while(begin < end) + BinarySearchIteration(data, begin, end, key, 1, comp); + return begin; +} + +template +__host__ __device__ int BiasedBinarySearch(It data, int count, T key, + IntT levels, Comp comp) { + int begin = 0; + int end = count; + + if(levels >= 4 && begin < end) + BinarySearchIteration(data, begin, end, key, 9, comp); + if(levels >= 3 && begin < end) + BinarySearchIteration(data, begin, end, key, 7, comp); + if(levels >= 2 && begin < end) + BinarySearchIteration(data, begin, end, key, 5, comp); + if(levels >= 1 && begin < end) + BinarySearchIteration(data, begin, end, key, 4, comp); + + while(begin < end) + BinarySearchIteration(data, begin, end, key, 1, comp); + return begin; +} + +template +__host__ __device__ int MergePath(It1 a, int aCount, It2 b, int bCount, int diag, Comp comp) +{ + typedef typename thrust::iterator_traits::value_type T; + + int begin = thrust::max(0, diag - bCount); + int end = thrust::min(diag, aCount); + + while(begin < end) + { + int mid = (begin + end)>> 1; + T aKey = a[mid]; + T bKey = b[diag - 1 - mid]; + bool pred = UpperBound ? comp(aKey, bKey) : !comp(bKey, aKey); + if(pred) begin = mid + 1; + else end = mid; + } + return begin; +} + + +} // end namespace balanced_path_detail + + +template +__host__ __device__ +thrust::pair + balanced_path(RandomAccessIterator1 first1, Size1 n1, + RandomAccessIterator2 first2, Size1 n2, + Size1 diag, + Size2 levels, + Compare comp) +{ + typedef typename thrust::iterator_traits::value_type T; + + Size1 aIndex = balanced_path_detail::MergePath(first1, n1, first2, n2, diag, comp); + Size1 bIndex = diag - aIndex; + + bool star = false; + if(bIndex < n2) + { + T x = first2[bIndex]; + + // Search for the beginning of the duplicate run in both A and B. + Size1 aStart = balanced_path_detail::BiasedBinarySearch(first1, aIndex, x, levels, comp); + Size1 bStart = balanced_path_detail::BiasedBinarySearch(first2, bIndex, x, levels, comp); + + // The distance between x's merge path and its lower_bound is its rank. + // We add up the a and b ranks and evenly distribute them to + // get a stairstep path. + Size1 aRun = aIndex - aStart; + Size1 bRun = bIndex - bStart; + Size1 xCount = aRun + bRun; + + // Attempt to advance b and regress a. + Size1 bAdvance = thrust::max(xCount >> 1, xCount - aRun); + Size1 bEnd = thrust::min(n2, bStart + bAdvance + 1); + Size1 bRunEnd = balanced_path_detail::BinarySearch(first2 + bIndex, bEnd - bIndex, x, comp) + bIndex; + bRun = bRunEnd - bStart; + + bAdvance = thrust::min(bAdvance, bRun); + Size1 aAdvance = xCount - bAdvance; + + bool roundUp = (aAdvance == bAdvance + 1) && (bAdvance < bRun); + aIndex = aStart + aAdvance; + + if(roundUp) star = true; + } + + return thrust::make_pair(aIndex, (diag - aIndex) + star); +} + + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h b/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h new file mode 100644 index 0000000..2bbd658 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h @@ -0,0 +1,156 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + + +template class BasePolicy> + class cached_temporary_allocator + : public BasePolicy > +{ + private: + typedef thrust::detail::temporary_allocator base_allocator_type; + typedef thrust::detail::allocator_traits traits; + typedef typename traits::pointer allocator_pointer; + typedef std::multimap free_blocks_type; + typedef std::map allocated_blocks_type; + + base_allocator_type m_base_allocator; + free_blocks_type free_blocks; + allocated_blocks_type allocated_blocks; + + void free_all() + { + // deallocate all outstanding blocks in both lists + for(free_blocks_type::iterator i = free_blocks.begin(); + i != free_blocks.end(); + ++i) + { + // transform the pointer to allocator_pointer before calling deallocate + traits::deallocate(m_base_allocator, allocator_pointer(reinterpret_cast(i->second)), i->first); + } + + for(allocated_blocks_type::iterator i = allocated_blocks.begin(); + i != allocated_blocks.end(); + ++i) + { + // transform the pointer to allocator_pointer before calling deallocate + traits::deallocate(m_base_allocator, allocator_pointer(reinterpret_cast(i->first)), i->second); + } + } + + public: + cached_temporary_allocator(thrust::execution_policy &system) + : m_base_allocator(system) + {} + + ~cached_temporary_allocator() + { + // free all allocations when cached_allocator goes out of scope + free_all(); + } + + void *allocate(std::ptrdiff_t num_bytes) + { + void *result = 0; + + // search the cache for a free block + free_blocks_type::iterator free_block = free_blocks.find(num_bytes); + + if(free_block != free_blocks.end()) + { + // get the pointer + result = free_block->second; + + // erase from the free_blocks map + free_blocks.erase(free_block); + } + else + { + // no allocation of the right size exists + // create a new one with m_base_allocator + // allocate memory and convert to raw pointer + result = thrust::raw_pointer_cast(traits::allocate(m_base_allocator, num_bytes)); + } + + // insert the allocated pointer into the allocated_blocks map + allocated_blocks.insert(std::make_pair(result, num_bytes)); + + return result; + } + + void deallocate(void *ptr) + { + // erase the allocated block from the allocated blocks map + allocated_blocks_type::iterator iter = allocated_blocks.find(ptr); + std::ptrdiff_t num_bytes = iter->second; + allocated_blocks.erase(iter); + + // insert the block into the free blocks map + free_blocks.insert(std::make_pair(num_bytes, ptr)); + } +}; + + +// overload get_temporary_buffer on cached_temporary_allocator +// note that we take a reference to cached_temporary_allocator +template class BasePolicy> + thrust::pair + get_temporary_buffer(cached_temporary_allocator &alloc, std::ptrdiff_t n) +{ + // ask the allocator for sizeof(T) * n bytes + T* result = reinterpret_cast(alloc.allocate(sizeof(T) * n)); + + // return the pointer and the number of elements allocated + return thrust::make_pair(result,n); +} + + +// overload return_temporary_buffer on cached_temporary_allocator +// an overloaded return_temporary_buffer should always accompany +// an overloaded get_temporary_buffer +template class BasePolicy> + void return_temporary_buffer(cached_temporary_allocator &alloc, Pointer p) +{ + // return the pointer to the allocator + alloc.deallocate(thrust::raw_pointer_cast(p)); +} + + +} // end detail +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/detail/fast_scan.h b/compat/thrust/system/cuda/detail/detail/fast_scan.h new file mode 100644 index 0000000..d095a4a --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/fast_scan.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file fast_scan.h + * \brief A fast scan for primitive types. + */ + +#pragma once + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace fast_scan +{ + +template +OutputIterator inclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryFunction binary_op); + +template +OutputIterator exclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + const T init, + BinaryFunction binary_op); + +} // end namespace fast_scan +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include "fast_scan.inl" + diff --git a/compat/thrust/system/cuda/detail/detail/fast_scan.inl b/compat/thrust/system/cuda/detail/detail/fast_scan.inl new file mode 100644 index 0000000..b02763d --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/fast_scan.inl @@ -0,0 +1,753 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + + +namespace thrust +{ +namespace detail +{ + +// forward declaration of temporary_array +template class temporary_array; + +} // end detail + +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace fast_scan +{ +namespace fast_scan_detail +{ + + +// TODO tune this +template +struct inclusive_scan_block_size +{ + private: + static const unsigned int max_memory = 16384 - 256 - 2 * sizeof(ValueType); + static const unsigned int max_block_size = max_memory / sizeof(ValueType); + static const unsigned int default_block_size = 7 * 32; + static const unsigned int block_size = (max_block_size < default_block_size) ? max_block_size : default_block_size; + + public: + static const unsigned int pass1 = block_size; + static const unsigned int pass2 = block_size; + static const unsigned int pass3 = block_size; +}; + +// TODO tune this +template +struct exclusive_scan_block_size +{ + private: + static const unsigned int max_memory = 16384 - 256 - 2 * sizeof(ValueType); + static const unsigned int max_block_size = max_memory / sizeof(ValueType); + static const unsigned int default_block_size = 5 * 32; + static const unsigned int block_size = (max_block_size < default_block_size) ? max_block_size : default_block_size; + + public: + static const unsigned int pass1 = block_size; + static const unsigned int pass2 = block_size; + static const unsigned int pass3 = block_size; +}; + + +template +__device__ __thrust_forceinline__ +void scan_block(Context context, SharedArray array, BinaryFunction binary_op) +{ + typedef typename thrust::iterator_value::type T; + + T val = array[context.thread_index()]; + + if (CTA_SIZE > 1) { if(context.thread_index() >= 1) { T tmp = array[context.thread_index() - 1]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 2) { if(context.thread_index() >= 2) { T tmp = array[context.thread_index() - 2]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 4) { if(context.thread_index() >= 4) { T tmp = array[context.thread_index() - 4]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 8) { if(context.thread_index() >= 8) { T tmp = array[context.thread_index() - 8]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 16) { if(context.thread_index() >= 16) { T tmp = array[context.thread_index() - 16]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 32) { if(context.thread_index() >= 32) { T tmp = array[context.thread_index() - 32]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 64) { if(context.thread_index() >= 64) { T tmp = array[context.thread_index() - 64]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 128) { if(context.thread_index() >= 128) { T tmp = array[context.thread_index() - 128]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 256) { if(context.thread_index() >= 256) { T tmp = array[context.thread_index() - 256]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 512) { if(context.thread_index() >= 512) { T tmp = array[context.thread_index() - 512]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 1024) { if(context.thread_index() >= 1024) { T tmp = array[context.thread_index() - 1024]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } +} + +template +__device__ __thrust_forceinline__ +void scan_block_n(Context context, SharedArray array, const unsigned int n, BinaryFunction binary_op) +{ + typedef typename thrust::iterator_value::type T; + + T val = array[context.thread_index()]; + + if (CTA_SIZE > 1) { if(context.thread_index() < n && context.thread_index() >= 1) { T tmp = array[context.thread_index() - 1]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 2) { if(context.thread_index() < n && context.thread_index() >= 2) { T tmp = array[context.thread_index() - 2]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 4) { if(context.thread_index() < n && context.thread_index() >= 4) { T tmp = array[context.thread_index() - 4]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 8) { if(context.thread_index() < n && context.thread_index() >= 8) { T tmp = array[context.thread_index() - 8]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 16) { if(context.thread_index() < n && context.thread_index() >= 16) { T tmp = array[context.thread_index() - 16]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 32) { if(context.thread_index() < n && context.thread_index() >= 32) { T tmp = array[context.thread_index() - 32]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 64) { if(context.thread_index() < n && context.thread_index() >= 64) { T tmp = array[context.thread_index() - 64]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 128) { if(context.thread_index() < n && context.thread_index() >= 128) { T tmp = array[context.thread_index() - 128]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 256) { if(context.thread_index() < n && context.thread_index() >= 256) { T tmp = array[context.thread_index() - 256]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 512) { if(context.thread_index() < n && context.thread_index() >= 512) { T tmp = array[context.thread_index() - 512]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } + if (CTA_SIZE > 1024) { if(context.thread_index() < n && context.thread_index() >= 1024) { T tmp = array[context.thread_index() - 1024]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } +} + +template +__device__ __thrust_forceinline__ +void load_block(Context context, + const unsigned int n, + InputIterator input, + ValueType (&sdata)[K][CTA_SIZE + 1]) +{ + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = k*CTA_SIZE + context.thread_index(); + + if (FullBlock || offset < n) + { + InputIterator temp = input + offset; + sdata[offset % K][offset / K] = *temp; + } + } + + context.barrier(); +} + +template +__device__ __thrust_forceinline__ +void store_block(Context context, + const unsigned int n, + OutputIterator output, + ValueType (&sdata)[K][CTA_SIZE + 1], + ValueType& carry) +{ + if (Inclusive) + { + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = k*CTA_SIZE + context.thread_index(); + + if (FullBlock || offset < n) + { + OutputIterator temp = output + offset; + *temp = sdata[offset % K][offset / K]; + } + } + } + else + { + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = k*CTA_SIZE + context.thread_index(); + + if (FullBlock || offset < n) + { + OutputIterator temp = output + offset; + *temp = (offset == 0) ? carry : sdata[(offset - 1) % K][(offset - 1) / K]; + } + } + } +} + +template +__device__ __thrust_forceinline__ +void upsweep_body(Context context, + const unsigned int n, + const bool carry_in, + InputIterator input, + BinaryFunction binary_op, + ValueType (&sdata)[K][CTA_SIZE + 1], + ValueType& carry) +{ + // read data + load_block(context, n, input, sdata); + + // copy into local array + ValueType ldata[K]; + for (unsigned int k = 0; k < K; k++) + ldata[k] = sdata[k][context.thread_index()]; + + // carry in + if (context.thread_index() == 0 && carry_in) + { + // XXX WAR sm_10 issue + ValueType tmp = carry; + ldata[0] = binary_op(tmp, ldata[0]); + } + + // scan local values + for(unsigned int k = 1; k < K; k++) + { + const unsigned int offset = K * context.thread_index() + k; + + if (FullBlock || offset < n) + ldata[k] = binary_op(ldata[k-1],ldata[k]); + } + + sdata[K - 1][context.thread_index()] = ldata[K - 1]; + + context.barrier(); + + // second level scan + if (FullBlock && sizeof(ValueType) > 1) // TODO investigate why this WAR is necessary + scan_block(context, sdata[K - 1], binary_op); + else + scan_block_n(context, sdata[K - 1], n / K, binary_op); + + // store carry out + if (FullBlock) + { + if (context.thread_index() == CTA_SIZE - 1) + carry = sdata[K - 1][context.thread_index()]; + } + else + { + if (context.thread_index() == (n - 1) / K) + { + ValueType sum; + + for (unsigned int k = 0; k < K; k++) + if ((n - 1) % K == k) + sum = ldata[k]; + + if (context.thread_index() > 0) + { + // WAR sm_10 issue + ValueType tmp = sdata[K - 1][context.thread_index() - 1]; + sum = binary_op(tmp, sum); + } + + carry = sum; + } + } + + context.barrier(); +} + +template +__device__ __thrust_forceinline__ +void scan_body(Context context, + const unsigned int n, + const bool carry_in, + InputIterator input, + OutputIterator output, + BinaryFunction binary_op, + ValueType (&sdata)[K][CTA_SIZE + 1], + ValueType& carry) +{ + // read data + load_block(context, n, input, sdata); + + // copy into local array + ValueType ldata[K]; + for (unsigned int k = 0; k < K; k++) + ldata[k] = sdata[k][context.thread_index()]; + + // carry in + if (context.thread_index() == 0 && carry_in) + { + // XXX WAR sm_10 issue + ValueType tmp = carry; + ldata[0] = binary_op(tmp, ldata[0]); + } + + // scan local values + for(unsigned int k = 1; k < K; k++) + { + const unsigned int offset = K * context.thread_index() + k; + + if (FullBlock || offset < n) + ldata[k] = binary_op(ldata[k-1],ldata[k]); + } + + sdata[K - 1][context.thread_index()] = ldata[K - 1]; + + context.barrier(); + + // second level scan + if (FullBlock) + scan_block(context, sdata[K - 1], binary_op); + else + scan_block_n(context, sdata[K - 1], n / K, binary_op); + + // update local values + if (context.thread_index() > 0) + { + ValueType left = sdata[K - 1][context.thread_index() - 1]; + + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = K * context.thread_index() + k; + + if (FullBlock || offset < n) + ldata[k] = binary_op(left, ldata[k]); + } + } + + for (unsigned int k = 0; k < K; k++) + sdata[k][context.thread_index()] = ldata[k]; + + context.barrier(); + + // write data + store_block(context, n, output, sdata, carry); + + // store carry out + if (context.thread_index() == 0) + { + if (FullBlock) + carry = sdata[K - 1][CTA_SIZE - 1]; + else + carry = sdata[(n - 1) % K][(n - 1) / K]; // note: this must come after the local update + } + + context.barrier(); +} + +template +struct upsweep_intervals_closure +{ + InputIterator input; + ValueType * block_results; // TODO change this to ValueIterator + BinaryFunction binary_op; + Decomposition decomp; + Context context; + + typedef Context context_type; + + upsweep_intervals_closure(InputIterator input, + ValueType * block_results, + BinaryFunction binary_op, + Decomposition decomp, + Context context = Context()) + : input(input), block_results(block_results), binary_op(binary_op), decomp(decomp), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename Decomposition::index_type IndexType; + + const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; + +#if __CUDA_ARCH__ >= 200 + const unsigned int SMEM = (48 * 1024); +#else + const unsigned int SMEM = (16 * 1024) - 256; +#endif + const unsigned int MAX_K = ((SMEM - 1 * sizeof(ValueType)) / (sizeof(ValueType) * (CTA_SIZE + 1))); + const unsigned int K = (MAX_K < 6) ? MAX_K : 6; + + __shared__ uninitialized sdata; // padded to avoid bank conflicts + + __shared__ uninitialized carry; // storage for carry out + if(context.thread_index() == 0) carry.construct(); + + context.barrier(); + + thrust::system::detail::internal::index_range interval = decomp[context.block_index()]; + + IndexType base = interval.begin(); + + input += base; + + const unsigned int unit_size = K * CTA_SIZE; + + bool carry_in = false; + + // process full units + while (base + unit_size <= interval.end()) + { + const unsigned int n = unit_size; + upsweep_body(context, n, carry_in, input, binary_op, sdata.get(), carry.get()); + base += unit_size; + input += unit_size; + carry_in = true; + } + + // process partially full unit at end of input (if necessary) + if (base < interval.end()) + { + const unsigned int n = interval.end() - base; + upsweep_body(context, n, carry_in, input, binary_op, sdata.get(), carry.get()); + } + + // write interval sum + if (context.thread_index() == 0) + block_results[context.block_index()] = carry; + } +}; + + +template +struct downsweep_intervals_closure +{ + InputIterator input; + OutputIterator output; + ValueType * block_results; + BinaryFunction binary_op; + Decomposition decomp; + Context context; + + typedef Context context_type; + + downsweep_intervals_closure(InputIterator input, + OutputIterator output, + ValueType * block_results, + BinaryFunction binary_op, + Decomposition decomp, + Context context = Context()) + : input(input), output(output), block_results(block_results), binary_op(binary_op), decomp(decomp), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename Decomposition::index_type IndexType; + + const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; + +#if __CUDA_ARCH__ >= 200 + const unsigned int SMEM = (48 * 1024); +#else + const unsigned int SMEM = (16 * 1024) - 256; +#endif + const unsigned int MAX_K = ((SMEM - 1 * sizeof(ValueType))/ (sizeof(ValueType) * (CTA_SIZE + 1))); + const unsigned int K = (MAX_K < 6) ? MAX_K : 6; + + __shared__ uninitialized sdata; // padded to avoid bank conflicts + + __shared__ uninitialized carry; // storage for carry in and carry out + if(context.thread_index() == 0) carry.construct(); + + context.barrier(); + + thrust::system::detail::internal::index_range interval = decomp[context.block_index()]; + + IndexType base = interval.begin(); + + input += base; + output += base; + + const unsigned int unit_size = K * CTA_SIZE; + + bool carry_in = (Inclusive && context.block_index() == 0) ? false : true; + + if (carry_in) + { + if (context.thread_index() == 0) + carry = block_results[context.block_index()]; + context.barrier(); + } + + // process full units + while (base + unit_size <= interval.end()) + { + const unsigned int n = unit_size; + scan_body(context, n, carry_in, input, output, binary_op, sdata.get(), carry.get()); + base += K * CTA_SIZE; + input += K * CTA_SIZE; + output += K * CTA_SIZE; + carry_in = true; + } + + // process partially full unit at end of input (if necessary) + if (base < interval.end()) + { + const unsigned int n = interval.end() - base; + scan_body(context, n, carry_in, input, output, binary_op, sdata.get(), carry.get()); + } + } +}; + + +} // end namespace fast_scan_detail + + +template +OutputIterator inclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryFunction binary_op) +{ + using namespace fast_scan_detail; + + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + typedef typename thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + typedef unsigned int IndexType; + typedef thrust::system::detail::internal::uniform_decomposition Decomposition; + typedef thrust::detail::temporary_array ValueArray; + + if (first == last) + return output; + + Decomposition decomp = thrust::system::cuda::detail::default_decomposition(last - first); + + ValueArray block_results(exec, decomp.size()); + + // compute sum over each interval + if (thrust::detail::is_commutative::value) + { + // use reduce_intervals for commutative operators + thrust::system::cuda::detail::reduce_intervals(exec, first, block_results.begin(), binary_op, decomp); + } + else + { + const static unsigned int ThreadsPerBlock = inclusive_scan_block_size::pass1; + typedef detail::statically_blocked_thread_array Context; + + typedef upsweep_intervals_closure Closure; + Closure closure(first, + thrust::raw_pointer_cast(&block_results[0]), + binary_op, + decomp); + detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); + } + + // second level inclusive scan of per-block results + { + const static unsigned int ThreadsPerBlock = inclusive_scan_block_size::pass2; + typedef detail::statically_blocked_thread_array Context; + + typedef downsweep_intervals_closure Closure; + Closure closure(thrust::raw_pointer_cast(&block_results[0]), + thrust::raw_pointer_cast(&block_results[0]), + thrust::raw_pointer_cast(&block_results[0]), // not used + binary_op, + Decomposition(decomp.size(), 1, 1)); + detail::launch_closure(closure, 1, ThreadsPerBlock); + } + + // update intervals with result of second level scan + { + const static unsigned int ThreadsPerBlock = inclusive_scan_block_size::pass3; + typedef detail::statically_blocked_thread_array Context; + + typedef downsweep_intervals_closure Closure; + Closure closure(first, + output, + thrust::raw_pointer_cast(&block_results[0]) - 1, // shift block results + binary_op, + decomp); + detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); + } + + return output + (last - first); +} + + +template +OutputIterator exclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + const T init, + BinaryFunction binary_op) +{ + using namespace fast_scan_detail; + + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + typedef typename thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + typedef unsigned int IndexType; + typedef thrust::system::detail::internal::uniform_decomposition Decomposition; + typedef thrust::detail::temporary_array ValueArray; + + if (first == last) + return output; + + Decomposition decomp = thrust::system::cuda::detail::default_decomposition(last - first); + + ValueArray block_results(exec, decomp.size() + 1); + + // compute sum over each interval + if (thrust::detail::is_commutative::value) + { + // use reduce_intervals for commutative operators + thrust::system::cuda::detail::reduce_intervals(exec, first, block_results.begin() + 1, binary_op, decomp); + } + else + { + const static unsigned int ThreadsPerBlock = exclusive_scan_block_size::pass1; + typedef detail::statically_blocked_thread_array Context; + + typedef upsweep_intervals_closure Closure; + Closure closure(first, + thrust::raw_pointer_cast(&block_results[0]) + 1, + binary_op, + decomp); + detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); + } + + // place init before per-block results + block_results[0] = init; + + // second level inclusive scan of per-block results + { + const static unsigned int ThreadsPerBlock = exclusive_scan_block_size::pass2; + typedef detail::statically_blocked_thread_array Context; + + typedef downsweep_intervals_closure Closure; + Closure closure(thrust::raw_pointer_cast(&block_results[0]), + thrust::raw_pointer_cast(&block_results[0]), + thrust::raw_pointer_cast(&block_results[0]), // not used + binary_op, + Decomposition(decomp.size() + 1, 1, 1)); + detail::launch_closure(closure, 1, ThreadsPerBlock); + } + + // update intervals with result of second level scan + { + const static unsigned int ThreadsPerBlock = exclusive_scan_block_size::pass3; + typedef detail::statically_blocked_thread_array Context; + + typedef downsweep_intervals_closure Closure; + Closure closure(first, + output, + thrust::raw_pointer_cast(&block_results[0]), // shift block results + binary_op, + decomp); + detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); + } + + return output + (last - first); +} + + +} // end namespace fast_scan +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + diff --git a/compat/thrust/system/cuda/detail/detail/launch_calculator.h b/compat/thrust/system/cuda/detail/detail/launch_calculator.h new file mode 100644 index 0000000..5126aa6 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/launch_calculator.h @@ -0,0 +1,82 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +class launch_calculator +{ + device_properties_t properties; + function_attributes_t attributes; + + public: + + launch_calculator(void); + + launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes); + + thrust::tuple with_variable_block_size(void) const; + + template + thrust::tuple with_variable_block_size(UnaryFunction block_size_to_smem_size) const; + + thrust::tuple with_variable_block_size_available_smem(void) const; + + private: + + /*! Returns a pair (num_threads_per_block, num_blocks_per_multiprocessor) + * where num_threads_per_block is a valid block size for an instance of Closure + * chosen by a heuristic and num_blocks_per_multiprocessor is the maximum + * number of such blocks that can execute on a streaming multiprocessor at once. + */ + thrust::pair default_block_configuration() const; + + /*! Returns a pair (num_threads_per_block, num_blocks_per_multiprocessor) + * where num_threads_per_block is a valid block size for an instance of Closure + * chosen by a heuristic and num_blocks_per_multiprocessor is the maximum + * number of such blocks that can execute on a streaming multiprocessor at once. + * + * \param block_size_to_smem_size Mapping from num_threads_per_block to number of + * dynamically-allocated bytes of shared memory + */ + template + thrust::pair default_block_configuration(UnaryFunction block_size_to_smem_size) const; +}; + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/launch_calculator.inl b/compat/thrust/system/cuda/detail/detail/launch_calculator.inl new file mode 100644 index 0000000..b851d5f --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/launch_calculator.inl @@ -0,0 +1,103 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// do not attempt to compile this file with any other compiler +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +launch_calculator::launch_calculator(void) + : properties(device_properties()), + attributes(closure_attributes()) +{} + +template +launch_calculator::launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes) + : properties(properties), + attributes(attributes) +{} + +template + template +thrust::pair launch_calculator::default_block_configuration(UnaryFunction block_size_to_smem_size) const +{ + // choose a block size + std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties, block_size_to_smem_size); + + // choose a subscription rate + std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block; + + return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor); +} + + +template +thrust::pair launch_calculator::default_block_configuration(void) const +{ + // choose a block size + std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties); + + // choose a subscription rate + std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block; + + return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor); +} + +template +thrust::tuple launch_calculator::with_variable_block_size(void) const +{ + thrust::pair config = default_block_configuration(); + return thrust::tuple(config.second * properties.multiProcessorCount, config.first, 0); +} + +template + template +thrust::tuple launch_calculator::with_variable_block_size(UnaryFunction block_size_to_smem_size) const +{ + thrust::pair config = default_block_configuration(block_size_to_smem_size); + return thrust::tuple(config.second * properties.multiProcessorCount, config.first, block_size_to_smem_size(config.first)); +} + +template +thrust::tuple launch_calculator::with_variable_block_size_available_smem(void) const +{ + thrust::pair config = default_block_configuration(); + size_t smem_per_block = proportional_smem_allocation(properties, attributes, config.second); + return thrust::tuple(config.second * properties.multiProcessorCount, config.first, smem_per_block); +} + +} // end detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + diff --git a/compat/thrust/system/cuda/detail/detail/launch_closure.h b/compat/thrust/system/cuda/detail/detail/launch_closure.h new file mode 100644 index 0000000..c2e6c43 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/launch_closure.h @@ -0,0 +1,114 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +struct launch_bounds +{ + typedef thrust::detail::integral_constant ThreadsPerBlock; + typedef thrust::detail::integral_constant BlocksPerMultiprocessor; +}; + +struct thread_array : public launch_bounds<> +{ +// CUDA built-in variables require nvcc +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; } + __device__ __thrust_forceinline__ unsigned int thread_count(void) const { return blockDim.x * gridDim.x; } +#else + __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int thread_count(void) const { return 0; } +#endif // THRUST_DEVICE_COMPILER_NVCC +}; + +struct blocked_thread_array : public launch_bounds<> +{ +// CUDA built-in variables require nvcc +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; } + __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return blockDim.x; } + __device__ __thrust_forceinline__ unsigned int block_index(void) const { return blockIdx.x; } + __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return gridDim.x; } + __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return block_dimension() * block_index() + thread_index(); } + __device__ __thrust_forceinline__ void barrier(void) { __syncthreads(); } +#else + __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int block_index(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return 0; } + __device__ __thrust_forceinline__ void barrier(void) { } +#endif // THRUST_DEVICE_COMPILER_NVCC +}; + +template +struct statically_blocked_thread_array : public launch_bounds<_ThreadsPerBlock,1> +{ +// CUDA built-in variables require nvcc +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; } + __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return _ThreadsPerBlock; } // minor optimization + __device__ __thrust_forceinline__ unsigned int block_index(void) const { return blockIdx.x; } + __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return gridDim.x; } + __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return block_dimension() * block_index() + thread_index(); } + __device__ __thrust_forceinline__ void barrier(void) { __syncthreads(); } +#else + __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int block_index(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return 0; } + __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return 0; } + __device__ __thrust_forceinline__ void barrier(void) { } +#endif // THRUST_DEVICE_COMPILER_NVCC +}; + +template + void launch_closure(Closure f, Size1 num_blocks, Size2 block_size); + +template + void launch_closure(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size); + +/*! Returns a copy of the cudaFuncAttributes structure + * that is associated with a given Closure + */ +template +function_attributes_t closure_attributes(void); + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/launch_closure.inl b/compat/thrust/system/cuda/detail/detail/launch_closure.inl new file mode 100644 index 0000000..ce39cfc --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/launch_closure.inl @@ -0,0 +1,207 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ + +// XXX WAR circular inclusion problems with this forward declaration +template class temporary_array; + +} // end detail + +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC +template +__global__ __launch_bounds__(Closure::context_type::ThreadsPerBlock::value, Closure::context_type::BlocksPerMultiprocessor::value) +void launch_closure_by_value(Closure f) +{ + f(); +} + +template +__global__ __launch_bounds__(Closure::context_type::ThreadsPerBlock::value, Closure::context_type::BlocksPerMultiprocessor::value) +void launch_closure_by_pointer(const Closure *f) +{ + // copy to registers + Closure f_reg = *f; + f_reg(); +} +#else +template +void launch_closure_by_value(Closure) {} + +template +void launch_closure_by_pointer(const Closure *) {} + +#endif // THRUST_DEVICE_COMPILER_NVCC + +template + struct closure_launcher_base +{ + typedef void (*launch_function_t)(Closure); + + static launch_function_t get_launch_function(void) + { + return launch_closure_by_value; + } + + template + static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) + { +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + if(num_blocks > 0) + { + launch_closure_by_value<<<(unsigned int) num_blocks, (unsigned int) block_size, (unsigned int) smem_size>>>(f); + synchronize_if_enabled("launch_closure_by_value"); + } +#endif // THRUST_DEVICE_COMPILER_NVCC + } +}; // end closure_launcher_base + + +template + struct closure_launcher_base +{ + typedef void (*launch_function_t)(const Closure *); + + static launch_function_t get_launch_function(void) + { + return launch_closure_by_pointer; + } + + template + static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) + { +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + if(num_blocks > 0) + { + // use temporary storage for the closure + // XXX use of cuda::tag is too specific here + thrust::cuda::tag cuda_tag; + thrust::host_system_tag host_tag; + thrust::detail::temporary_array closure_storage(cuda_tag, host_tag, &f, &f + 1); + + // launch + detail::launch_closure_by_pointer<<<(unsigned int) num_blocks, (unsigned int) block_size, (unsigned int) smem_size>>>((&closure_storage[0]).get()); + synchronize_if_enabled("launch_closure_by_pointer"); + } +#endif // THRUST_DEVICE_COMPILER_NVCC + } +}; + + +template + struct closure_launcher + : public closure_launcher_base +{ + typedef closure_launcher_base super_t; + + static inline const device_properties_t& device_properties(void) + { + return device_properties(); + } + + static inline function_attributes_t function_attributes(void) + { + return thrust::system::cuda::detail::function_attributes(super_t::get_launch_function()); + } + + template + static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) + { + super_t::launch(f,num_blocks,block_size,smem_size); + } +}; + +template + void launch_closure(Closure f, Size num_blocks) +{ + launch_calculator calculator; + launch_closure(f, num_blocks, thrust::get<1>(calculator.with_variable_block_size())); +} // end launch_closure() + +template + void launch_closure(Closure f, Size1 num_blocks, Size2 block_size) +{ + launch_closure(f, num_blocks, block_size, 0u); +} // end launch_closure() + +template + void launch_closure(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) +{ + closure_launcher::launch(f, num_blocks, block_size, smem_size); +} // end launch_closure() + + +template +function_attributes_t closure_attributes(void) +{ + typedef closure_launcher Launcher; + + // cache the result of function_attributes(), because it is slow + // only cache the first few devices + static const int max_num_devices = 16; + + static bool attributes_exist[max_num_devices] = {0}; + static function_attributes_t function_attributes[max_num_devices] = {}; + + // XXX device_id ought to be an argument to this function + int device_id = current_device(); + + if(device_id >= max_num_devices) + { + return thrust::system::cuda::detail::function_attributes(Launcher::get_launch_function()); + } + + if(!attributes_exist[device_id]) + { + function_attributes[device_id] = thrust::system::cuda::detail::function_attributes(Launcher::get_launch_function()); + + // disallow the compiler to move the write to attributes_exist[device_id] + // before the initialization of function_attributes[device_id] + __thrust_compiler_fence(); + + attributes_exist[device_id] = true; + } + + return function_attributes[device_id]; +} + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/set_operation.h b/compat/thrust/system/cuda/detail/detail/set_operation.h new file mode 100644 index 0000000..5475731 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/set_operation.h @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template + RandomAccessIterator3 set_operation(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp, + SetOperation set_op); + + +} // end detail +} // end detail +} // end cuda +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/set_operation.inl b/compat/thrust/system/cuda/detail/detail/set_operation.inl new file mode 100644 index 0000000..3f14379 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/set_operation.inl @@ -0,0 +1,639 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace set_operation_detail +{ + + +using thrust::system::cuda::detail::detail::statically_blocked_thread_array; +using thrust::detail::uint16_t; +using thrust::detail::uint32_t; + + +// empirically determined on sm_20 +// value_types larger than this will fail to launch if placed in smem +template + struct stage_through_smem +{ + static const bool value = sizeof(T) <= 6 * sizeof(uint32_t); +}; + + +// max_input_size <= 32 +template +inline __device__ + OutputIterator serial_bounded_copy_if(Size max_input_size, + InputIterator first, + uint32_t mask, + OutputIterator result) +{ + for(Size i = 0; i < max_input_size; ++i, ++first) + { + if((1< + struct find_partition_offsets_functor +{ + Size partition_size; + InputIterator1 first1; + InputIterator2 first2; + Size n1, n2; + Compare comp; + + find_partition_offsets_functor(Size partition_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + Compare comp) + : partition_size(partition_size), + first1(first1), first2(first2), + n1(last1 - first1), n2(last2 - first2), + comp(comp) + {} + + inline __host__ __device__ + thrust::pair operator()(Size i) const + { + Size diag = thrust::min(n1 + n2, i * partition_size); + + // XXX the correctness of balanced_path depends critically on the ll suffix below + // why??? + return balanced_path(first1, n1, first2, n2, diag, 4ll, comp); + } +}; + + +template + OutputIterator find_partition_offsets(thrust::cuda::execution_policy &exec, + Size num_partitions, + Size partition_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp) +{ + find_partition_offsets_functor f(partition_size, first1, last1, first2, last2, comp); + + return thrust::transform(exec, + thrust::counting_iterator(0), + thrust::counting_iterator(num_partitions), + result, + f); +} + + +namespace block +{ + + +template +inline __device__ +T right_neighbor(statically_blocked_thread_array &ctx, const T &x, const T &boundary) +{ + // stage this shift to conserve smem + const unsigned int storage_size = block_size / 2; + __shared__ uninitialized_array shared; + + T result = x; + + unsigned int tid = ctx.thread_index(); + + if(0 < tid && tid <= storage_size) + { + shared[tid - 1] = x; + } + + ctx.barrier(); + + if(tid < storage_size) + { + result = shared[tid]; + } + + ctx.barrier(); + + tid -= storage_size; + if(0 < tid && tid <= storage_size) + { + shared[tid - 1] = x; + } + else if(tid == 0) + { + shared[storage_size-1] = boundary; + } + + ctx.barrier(); + + if(tid < storage_size) + { + result = shared[tid]; + } + + ctx.barrier(); + + return result; +} + + +template +inline __device__ + unsigned int bounded_count_set_operation_n(statically_blocked_thread_array &ctx, + InputIterator1 first1, uint16_t n1, + InputIterator2 first2, uint16_t n2, + Compare comp, + SetOperation set_op) +{ + unsigned int thread_idx = ctx.thread_index(); + + // find partition offsets + uint16_t diag = thrust::min(n1 + n2, thread_idx * work_per_thread); + thrust::pair thread_input_begin = balanced_path(first1, n1, first2, n2, diag, 2, comp); + thrust::pair thread_input_end = block::right_neighbor(ctx, thread_input_begin, thrust::make_pair(n1,n2)); + + __shared__ uint16_t s_thread_output_size[block_size]; + + // work_per_thread + 1 to accomodate a "starred" partition returned from balanced_path above + s_thread_output_size[thread_idx] = + set_op.count(work_per_thread + 1, + first1 + thread_input_begin.first, first1 + thread_input_end.first, + first2 + thread_input_begin.second, first2 + thread_input_end.second, + comp); + + ctx.barrier(); + + // reduce per-thread counts + thrust::system::cuda::detail::block::inplace_inclusive_scan(ctx, s_thread_output_size); + return s_thread_output_size[ctx.block_dimension() - 1]; +} + + +inline __device__ int pop_count(unsigned int x) +{ +// guard use of __popc from other compilers +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + return __popc(x); +#else + return x; +#endif +} + + + +template +inline __device__ + OutputIterator bounded_set_operation_n(statically_blocked_thread_array &ctx, + InputIterator1 first1, uint16_t n1, + InputIterator2 first2, uint16_t n2, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + unsigned int thread_idx = ctx.thread_index(); + + // find partition offsets + uint16_t diag = thrust::min(n1 + n2, thread_idx * work_per_thread); + thrust::pair thread_input_begin = balanced_path(first1, n1, first2, n2, diag, 2, comp); + thrust::pair thread_input_end = block::right_neighbor(ctx, thread_input_begin, thrust::make_pair(n1,n2)); + + typedef typename thrust::iterator_value::type value_type; + // +1 to accomodate a "starred" partition returned from balanced_path above + uninitialized_array sparse_result; + uint32_t active_mask = + set_op(work_per_thread + 1, + first1 + thread_input_begin.first, first1 + thread_input_end.first, + first2 + thread_input_begin.second, first2 + thread_input_end.second, + sparse_result.begin(), + comp); + + __shared__ uint16_t s_thread_output_size[block_size]; + s_thread_output_size[thread_idx] = pop_count(active_mask); + + ctx.barrier(); + + // scan to turn per-thread counts into output indices + uint16_t block_output_size = thrust::system::cuda::detail::block::inplace_exclusive_scan(ctx, s_thread_output_size, 0u); + + serial_bounded_copy_if(work_per_thread + 1, sparse_result.begin(), active_mask, result + s_thread_output_size[thread_idx]); + + ctx.barrier(); + + return result + block_output_size; +} + + +template +inline __device__ + typename thrust::iterator_difference::type + count_set_operation(statically_blocked_thread_array &ctx, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + Compare comp, + SetOperation set_op) +{ + typedef typename thrust::iterator_difference::type difference; + + difference result = 0; + + thrust::pair remaining_input_size = thrust::make_pair(last1 - first1, last2 - first2); + + // iterate until the input is consumed + while(remaining_input_size.first + remaining_input_size.second > 0) + { + // find the end of this subpartition's input + // -1 to accomodate "starred" partitions + uint16_t max_subpartition_size = block_size * work_per_thread - 1; + difference diag = thrust::min(remaining_input_size.first + remaining_input_size.second, max_subpartition_size); + thrust::pair subpartition_size = balanced_path(first1, remaining_input_size.first, first2, remaining_input_size.second, diag, 4ll, comp); + + typedef typename thrust::iterator_value::type value_type; + if(stage_through_smem::value) + { + // load the input into __shared__ storage + __shared__ uninitialized_array s_input; + + value_type *s_input_end1 = thrust::system::cuda::detail::block::copy_n(ctx, first1, subpartition_size.first, s_input.begin()); + value_type *s_input_end2 = thrust::system::cuda::detail::block::copy_n(ctx, first2, subpartition_size.second, s_input_end1); + + result += block::bounded_count_set_operation_n(ctx, + s_input.begin(), subpartition_size.first, + s_input_end1, subpartition_size.second, + comp, + set_op); + } + else + { + result += block::bounded_count_set_operation_n(ctx, + first1, subpartition_size.first, + first2, subpartition_size.second, + comp, + set_op); + } + + // advance input + first1 += subpartition_size.first; + first2 += subpartition_size.second; + + // decrement remaining size + remaining_input_size.first -= subpartition_size.first; + remaining_input_size.second -= subpartition_size.second; + } + + return result; +} + + +template +inline __device__ +OutputIterator set_operation(statically_blocked_thread_array &ctx, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + typedef typename thrust::iterator_difference::type difference; + + thrust::pair remaining_input_size = thrust::make_pair(last1 - first1, last2 - first2); + + // iterate until the input is consumed + while(remaining_input_size.first + remaining_input_size.second > 0) + { + // find the end of this subpartition's input + // -1 to accomodate "starred" partitions + uint16_t max_subpartition_size = block_size * work_per_thread - 1; + difference diag = thrust::min(remaining_input_size.first + remaining_input_size.second, max_subpartition_size); + thrust::pair subpartition_size = balanced_path(first1, remaining_input_size.first, first2, remaining_input_size.second, diag, 4ll, comp); + + typedef typename thrust::iterator_value::type value_type; + if(stage_through_smem::value) + { + // load the input into __shared__ storage + __shared__ uninitialized_array s_input; + + value_type *s_input_end1 = thrust::system::cuda::detail::block::copy_n(ctx, first1, subpartition_size.first, s_input.begin()); + value_type *s_input_end2 = thrust::system::cuda::detail::block::copy_n(ctx, first2, subpartition_size.second, s_input_end1); + + result = block::bounded_set_operation_n(ctx, + s_input.begin(), subpartition_size.first, + s_input_end1, subpartition_size.second, + result, + comp, + set_op); + } + else + { + result = block::bounded_set_operation_n(ctx, + first1, subpartition_size.first, + first2, subpartition_size.second, + result, + comp, + set_op); + } + + // advance input + first1 += subpartition_size.first; + first2 += subpartition_size.second; + + // decrement remaining size + remaining_input_size.first -= subpartition_size.first; + remaining_input_size.second -= subpartition_size.second; + } + + return result; +} + + +} // end namespace block + + +template + inline __device__ void count_set_operation(statically_blocked_thread_array &ctx, + InputIterator1 input_partition_offsets, + Size num_partitions, + InputIterator2 first1, + InputIterator3 first2, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + // consume partitions + for(Size partition_idx = ctx.block_index(); + partition_idx < num_partitions; + partition_idx += ctx.grid_dimension()) + { + typedef typename thrust::iterator_difference::type difference; + + // find the partition + thrust::pair block_input_begin = input_partition_offsets[partition_idx]; + thrust::pair block_input_end = input_partition_offsets[partition_idx + 1]; + + // count the size of the set operation + difference count = block::count_set_operation(ctx, + first1 + block_input_begin.first, first1 + block_input_end.first, + first2 + block_input_begin.second, first2 + block_input_end.second, + comp, + set_op); + + if(ctx.thread_index() == 0) + { + result[partition_idx] = count; + } + } +} + + +template + struct count_set_operation_closure +{ + typedef statically_blocked_thread_array context_type; + + InputIterator1 input_partition_offsets; + Size num_partitions; + InputIterator2 first1; + InputIterator3 first2; + OutputIterator result; + Compare comp; + SetOperation set_op; + + count_set_operation_closure(InputIterator1 input_partition_offsets, + Size num_partitions, + InputIterator2 first1, + InputIterator3 first2, + OutputIterator result, + Compare comp, + SetOperation set_op) + : input_partition_offsets(input_partition_offsets), + num_partitions(num_partitions), + first1(first1), + first2(first2), + result(result), + comp(comp), + set_op(set_op) + {} + + inline __device__ void operator()() const + { + context_type ctx; + count_set_operation(ctx, input_partition_offsets, num_partitions, first1, first2, result, comp, set_op); + } +}; + + +template + count_set_operation_closure + make_count_set_operation_closure(InputIterator1 input_partition_offsets, + Size num_partitions, + InputIterator2 first1, + InputIterator3 first2, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + typedef count_set_operation_closure result_type; + return result_type(input_partition_offsets,num_partitions,first1,first2,result,comp,set_op); +} + + +template +inline __device__ + void set_operation(statically_blocked_thread_array &ctx, + InputIterator1 input_partition_offsets, + Size num_partitions, + InputIterator2 first1, + InputIterator3 first2, + InputIterator4 output_partition_offsets, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + // consume partitions + for(Size partition_idx = ctx.block_index(); + partition_idx < num_partitions; + partition_idx += ctx.grid_dimension()) + { + typedef typename thrust::iterator_difference::type difference; + + // find the partition + thrust::pair block_input_begin = input_partition_offsets[partition_idx]; + thrust::pair block_input_end = input_partition_offsets[partition_idx + 1]; + + // do the set operation across the partition + block::set_operation(ctx, + first1 + block_input_begin.first, first1 + block_input_end.first, + first2 + block_input_begin.second, first2 + block_input_end.second, + result + output_partition_offsets[partition_idx], + comp, + set_op); + } +} + + +template + struct set_operation_closure +{ + typedef statically_blocked_thread_array context_type; + + InputIterator1 input_partition_offsets; + Size num_partitions; + InputIterator2 first1; + InputIterator3 first2; + InputIterator4 output_partition_offsets; + OutputIterator result; + Compare comp; + SetOperation set_op; + + set_operation_closure(InputIterator1 input_partition_offsets, + Size num_partitions, + InputIterator2 first1, + InputIterator3 first2, + InputIterator4 output_partition_offsets, + OutputIterator result, + Compare comp, + SetOperation set_op) + : input_partition_offsets(input_partition_offsets), + num_partitions(num_partitions), + first1(first1), + first2(first2), + output_partition_offsets(output_partition_offsets), + result(result), + comp(comp), + set_op(set_op) + {} + + inline __device__ void operator()() const + { + context_type ctx; + set_operation(ctx, input_partition_offsets, num_partitions, first1, first2, output_partition_offsets, result, comp, set_op); + } +}; + + +template + set_operation_closure + make_set_operation_closure(InputIterator1 input_partition_offsets, + Size num_partitions, + InputIterator2 first1, + InputIterator3 first2, + InputIterator4 output_partition_offsets, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + typedef set_operation_closure result_type; + return result_type(input_partition_offsets,num_partitions,first1,first2,output_partition_offsets,result,comp,set_op); +} + + +} // end namespace set_operation_detail + + +template + OutputIterator set_operation(thrust::cuda::execution_policy &exec, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp, + SetOperation set_op) +{ + using thrust::system::cuda::detail::device_properties; + using thrust::system::cuda::detail::detail::launch_closure; + namespace d = thrust::system::cuda::detail::detail::set_operation_detail; + + typedef typename thrust::iterator_difference::type difference; + + const difference n1 = last1 - first1; + const difference n2 = last2 - first2; + + // handle empty input + if(n1 == 0 && n2 == 0) + { + return result; + } + + const thrust::detail::uint16_t work_per_thread = 15; + const thrust::detail::uint16_t threads_per_block = 128; + const thrust::detail::uint16_t work_per_block = threads_per_block * work_per_thread; + + // -1 because balanced_path adds a single element to the end of a "starred" partition, increasing its size by one + const thrust::detail::uint16_t maximum_partition_size = work_per_block - 1; + const difference num_partitions = thrust::detail::util::divide_ri(n1 + n2, maximum_partition_size); + + // find input partition offsets + // +1 to handle the end of the input elegantly + thrust::detail::temporary_array, DerivedPolicy> input_partition_offsets(0, exec, num_partitions + 1); + d::find_partition_offsets(exec, input_partition_offsets.size(), maximum_partition_size, first1, last1, first2, last2, input_partition_offsets.begin(), comp); + + const difference num_blocks = thrust::min(device_properties().maxGridSize[0], num_partitions); + + // find output partition offsets + // +1 to store the total size of the total + thrust::detail::temporary_array output_partition_offsets(0, exec, num_partitions + 1); + launch_closure(d::make_count_set_operation_closure(input_partition_offsets.begin(), num_partitions, first1, first2, output_partition_offsets.begin(), comp, set_op), + num_blocks, + threads_per_block); + + // turn the output partition counts into offsets to output partitions + thrust::exclusive_scan(exec, output_partition_offsets.begin(), output_partition_offsets.end(), output_partition_offsets.begin()); + + // run the set op kernel + launch_closure(d::make_set_operation_closure(input_partition_offsets.begin(), num_partitions, first1, first2, output_partition_offsets.begin(), result, comp, set_op), + num_blocks, + threads_per_block); + + return result + output_partition_offsets[num_partitions]; +} + + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h new file mode 100644 index 0000000..23f3254 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h @@ -0,0 +1,63 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file stable_merge_sort_dev.h + * \brief Defines the interface for a stable merge implementation on CUDA + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +void stable_merge_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + +template +void stable_merge_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_begin, + RandomAccessIterator1 keys_end, + RandomAccessIterator2 values_begin, + StrictWeakOrdering comp); + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl new file mode 100644 index 0000000..0c69803 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl @@ -0,0 +1,1103 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file stable_merge_sort.inl + * \brief Inline file for stable_merge_sort.h. + * \note This algorithm is based on the one described + * in "Designing Efficient Sorting Algorithms for + * Manycore GPUs", by Satish, Harris, and Garland. + */ + +#include + +#include +#include + +#include + +#include // for log2 +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace stable_merge_sort_detail +{ + + +template + struct is_block_size_valid +{ + // assume sm_10 limits + static const unsigned int max_num_smem_bytes = 16384; + + // CUDA steals 256 for itself for kernel parms + static const unsigned int num_reserved_smem_bytes = 256; + + // the number of bytes available to our kernels + static const unsigned int num_available_smem_bytes = max_num_smem_bytes - num_reserved_smem_bytes; + + // merge_small_tiles_by_key_closure is the hungriest kernel + // the block_size it uses is 2x the size of all the other kernels + // this merge_small_tiles_by_key_closure's smem requirements: + // 2 * block_size_x2 * sizeof(Key) + // + 2 * block_size_x2 * sizeof(Key) + // + 2 * block_size_x2 * sizeof(Value) + // ================================ + // 4 * (block_size) * (2 * sizeof(Key) + sizeof(Value)) + static const unsigned int num_needed_smem_bytes = 4 * (1 << log_block_size) * (2 * sizeof(Key) + sizeof(Value)); + + static const bool value = num_needed_smem_bytes <= num_available_smem_bytes; +}; + + + +// choose a (log) block_size to use for our kernels +template + struct select_log_block_size + : thrust::detail::eval_if< + is_block_size_valid::value, + thrust::detail::integral_constant, + select_log_block_size + >::type +{}; + + +// don't recurse lower than block_size < 128 +template + struct select_log_block_size<6, Key, Value> +{ + // no block size exists which can satisfy the storage demands +}; + + +template + struct block_size +{ + // prefer block_size == 512, go lower if we need to + static const unsigned int value = 1 << select_log_block_size<8, Key, Value>::value; +}; + + +template +inline unsigned int max_grid_size(Size block_size) +{ + const device_properties_t& properties = device_properties(); + + const unsigned int max_threads = properties.maxThreadsPerMultiProcessor * properties.multiProcessorCount; + const unsigned int max_blocks = properties.maxGridSize[0]; + + return std::min(max_blocks, 3 * max_threads / block_size); +} // end max_grid_size() + + +// Base case for the merge algorithm: merges data where tile_size <= block_size. +// Works by loading two or more tiles into shared memory and doing a binary search. +template +struct merge_small_tiles_by_key_closure +{ + typedef Context context_type; + + RandomAccessIterator1 keys_first; + RandomAccessIterator2 values_first; + const unsigned int n; + const unsigned int log_tile_size; + RandomAccessIterator3 keys_result; + RandomAccessIterator4 values_result; + StrictWeakOrdering comp; + context_type context; + + // these members are derivable from block_size, n, and log_tile_size + unsigned int index_of_last_block; + unsigned int index_of_last_tile_in_last_block; + unsigned int size_of_last_tile; + + merge_small_tiles_by_key_closure + (RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + const unsigned int n, + const unsigned int log_tile_size, + RandomAccessIterator3 keys_result, + RandomAccessIterator4 values_result, + StrictWeakOrdering comp, + Context context = Context()) + : keys_first(keys_first), values_first(values_first), + n(n), + log_tile_size(log_tile_size), + keys_result(keys_result), values_result(values_result), + comp(comp), + context(context) + { + // compute the number of tiles, including a possible partial tile + unsigned int tile_size = 1 << log_tile_size; + unsigned int num_tiles = thrust::detail::util::divide_ri(n, tile_size); + unsigned int partial_tile_size = n % tile_size; + + // compute the number of logical thread blocks, including a possible partial block + unsigned int tiles_per_block = block_size / tile_size; + unsigned int num_blocks = thrust::detail::util::divide_ri(num_tiles, tiles_per_block); + unsigned int partial_block_size = num_tiles % tiles_per_block; + + // compute the number of tiles in the last block, which might be of partial size + unsigned int number_of_tiles_in_last_block = partial_block_size ? partial_block_size : tiles_per_block; + + size_of_last_tile = partial_tile_size ? partial_tile_size : tile_size; + index_of_last_tile_in_last_block = number_of_tiles_in_last_block - 1; + index_of_last_block = num_blocks - 1; + } + + unsigned int grid_size() const + { + const unsigned int max_num_blocks = max_grid_size(block_size); + const unsigned int num_logical_blocks = index_of_last_block + 1; + return thrust::min(num_logical_blocks, max_num_blocks); + } + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename iterator_value::type KeyType; + typedef typename iterator_value::type ValueType; + + // load (2*block_size) elements into shared memory. These (2*block_size) elements belong to (2*block_size)/tile_size different tiles. + __shared__ uninitialized_array key; + __shared__ uninitialized_array outkey; + __shared__ uninitialized_array outvalue; + + const unsigned int grid_size = context.grid_dimension() * context.block_dimension(); + + unsigned int block_idx = context.block_index(); + + // the global index of this task + unsigned int i = context.thread_index() + context.block_index() * context.block_dimension(); + + // advance iterators + keys_first += i; + values_first += i; + keys_result += i; + values_result += i; + + for(; + block_idx <= index_of_last_block; + block_idx += context.grid_dimension(), i += grid_size, keys_first += grid_size, values_first += grid_size, keys_result += grid_size, values_result += grid_size) + { + KeyType my_key; + + // copy over inputs to shared memory + if(i < n) + { + key[context.thread_index()] = my_key = *keys_first; + } // end if + + // the tile to which the element belongs + unsigned int tile_index = context.thread_index()>>log_tile_size; + + // figure out the index and size of the other tile + unsigned int other_tile_index = tile_index^1; + unsigned int other_tile_size = (1< + class static_strided_integer_range +{ + // XXX cudafe doesn't like this private for some reason + //private: + public: + typedef typename thrust::counting_iterator counting_iterator; + + struct stride_functor + : public thrust::unary_function + { + inline __host__ __device__ + unsigned int operator()(unsigned int i) const + { + return stride * i; + } + }; + + public: + typedef typename thrust::transform_iterator iterator; + + static_strided_integer_range(unsigned int num_strides) + : m_begin(iterator(counting_iterator(0), stride_functor())), + m_end(iterator(counting_iterator(num_strides), stride_functor())) + {} + + iterator begin() const + { + return m_begin; + } + + iterator end() const + { + return m_end; + } + + private: + iterator m_begin, m_end; +}; + + +///////////////////// Find the rank of each extracted element in both arrays //////////////////////////////////////// +///////////////////// This breaks up the array into independent segments to merge //////////////////////////////////////// +// Inputs: d_splitters, d_splittes_pos: the merged array of splitters with corresponding positions. +// d_srcData: input data, datasize: number of entries in d_srcData +// N_SPLITTERS the number of splitters, log_blocksize: log of the size of each block of sorted data +// log_num_merged_splitters_per_tile = log of the number of merged splitters. ( = log_blocksize - 7). +// Output: d_rank1, d_rank2: ranks of each splitter in d_splitters in the block to which it belongs +// (say i) and its corresponding block (block i+1). +template +struct rank_splitters_closure +{ + typedef Context context_type; + + static const unsigned int log_block_size = thrust::detail::mpl::math::log2::value; + + RandomAccessIterator1 splitters_first; + RandomAccessIterator2 splitters_pos_first; + RandomAccessIterator3 keys_first; + RandomAccessIterator4 ranks_result1; + RandomAccessIterator4 ranks_result2; + unsigned int num_splitters; + unsigned int num_keys; + unsigned int log_tile_size; + thrust::detail::device_function< + StrictWeakOrdering, + bool + > comp; + context_type context; + + // this member is derivable from those received in the constructor + unsigned int log_num_merged_splitters_per_tile; + + rank_splitters_closure(RandomAccessIterator1 splitters_first, + RandomAccessIterator2 splitters_pos_first, + RandomAccessIterator3 keys_first, + unsigned int num_splitters, + unsigned int num_keys, + unsigned int log_tile_size, + RandomAccessIterator4 ranks_result1, + RandomAccessIterator4 ranks_result2, + StrictWeakOrdering comp, + context_type context = context_type()) + : splitters_first(splitters_first), splitters_pos_first(splitters_pos_first), + keys_first(keys_first), + ranks_result1(ranks_result1), ranks_result2(ranks_result2), + num_splitters(num_splitters), num_keys(num_keys), + log_tile_size(log_tile_size), + comp(comp), context(context) + { + // the number of splitters in each tile before merging + const unsigned int log_num_splitters_per_tile = log_tile_size - log_block_size; + + // the number of splitters in each merged tile + log_num_merged_splitters_per_tile = log_num_splitters_per_tile + 1; + } + + inline unsigned int grid_size() const + { + unsigned int num_blocks = num_splitters / block_size; + if(num_splitters % block_size) ++num_blocks; + + // compute the maximum number of block_size we can launch on this arch + const unsigned int max_num_blocks = max_grid_size(block_size); + + return min(num_blocks, max_num_blocks); + } + + /*! this member function returns the index of the (odd,even) block pair + * that the splitter of interest belongs to + * \param splitter_idx The index of the splitter in the splitters list + * \return The global index of the (odd,even) block pair + */ + __device__ __thrust_forceinline__ + unsigned int block_pair_idx(unsigned int splitter_idx) const + { + return splitter_idx >> log_num_merged_splitters_per_tile; + } + + /*! This member function returns the end of the search range in the other tile in + * which the splitter of interest needs to be ranked. + * \param splitter_idx The index of the splitter in the splitters array + * \param splitter_global_idx The index of the splitter in the global array of elements + * \param tile_idx The index of the tile to which the splitter belongs. + * \return The half-open interval in the other tile in which the splitter needs to be ranked. + * [first_index_to_search, size_of_interval) + */ + __device__ __thrust_forceinline__ + thrust::pair search_interval(unsigned int splitter_idx, unsigned int splitter_global_idx, unsigned int tile_idx) const + { + // We want to compute the ranks of the splitter in d_srcData1 and d_srcData2 + // for instance, if the splitter belongs to d_srcData1, then + // (1) the rank in d_srcData1 is simply given by its splitter_global_idx + // (2) to find the rank in d_srcData2, we first find the block in d_srcData2 where inp appears. + // We do this by noting that we have already merged/sorted splitters, and thus the rank + // of inp in the elements of d_srcData2 that are present in splitters is given by + // position of inp in d_splitters - rank of inp in elements of d_srcData1 in splitters + // = i - splitter_global_idx + // This also gives us the block of d_srcData2 that the splitter belongs in, since we have one + // element in splitters per block of d_srcData2. + + // We now perform a binary search over this block of d_srcData2 to find the rank of inp in d_srcData2. + // start and end are the start and end indices of this block in d_srcData2, forming the bounds of the binary search. + // Note that this binary search is in global memory with uncoalesced loads. However, we only find the ranks + // of a small set of elements, one per splitter: thus it is not the performance bottleneck. + + // the local index of the splitter within the (odd, even) block pair. + const unsigned int splitter_block_pair_idx = splitter_idx - (block_pair_idx(splitter_idx)<> log_block_size; + + // find the end of the search range in the other tile + unsigned int end = (( splitter_block_pair_idx - block_tile_idx) << log_block_size); + + // begin by assuming the search range is the size of a full block + unsigned int other_block_size = block_size; + + // the index of the other tile can be found with + const unsigned int other_tile_idx = tile_idx ^ 1; + + // the size of the other tile can be less than tile_size if the it is the last tile. + unsigned int other_tile_size = min(1 << log_tile_size, num_keys - (other_tile_idx< other_tile_size) + { + // the other block has partial size + end = other_tile_size; + other_block_size = num_keys % block_size; + } + else if(end == 0) + { + // when the search range is empty + // the other_block_size is 0 + other_block_size = 0; + } + + // the search range begins other_block_size elements before the end + unsigned int start = end - other_block_size; + + return thrust::make_pair(start,other_block_size); + } + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename iterator_value::type KeyType; + typedef typename iterator_value::type IndexType; + + const unsigned int grid_size = context.grid_dimension() * context.block_dimension(); + + unsigned int splitter_idx = context.thread_index() + context.block_index() * context.block_dimension(); + + // advance iterators + splitters_first += splitter_idx; + splitters_pos_first += splitter_idx; + ranks_result1 += splitter_idx; + ranks_result2 += splitter_idx; + + for(; + splitter_idx < num_splitters; + splitter_idx += grid_size, splitters_first += grid_size, splitters_pos_first += grid_size, ranks_result1 += grid_size, ranks_result2 += grid_size) + { + // the index of the splitter within the global array of elements + IndexType splitter_global_idx = *splitters_pos_first; + + // the tile to which the splitter belongs. + unsigned int tile_idx = (splitter_global_idx >> log_tile_size); + + // the index of the "other" tile which which tile_idx must be merged. + unsigned int other_tile_idx = tile_idx^1; + + // compute the interval in the other tile to search + unsigned int start, n; + thrust::tie(start,n) = search_interval(splitter_idx, splitter_global_idx, tile_idx); + + // point to the beginning of the other tile + RandomAccessIterator3 other_tile_begin = keys_first + (other_tile_idx< + void rank_splitters(RandomAccessIterator1 splitters_first, + RandomAccessIterator1 splitters_last, + RandomAccessIterator2 splitter_positions_first, + RandomAccessIterator3 keys_first, + RandomAccessIterator3 keys_last, + size_t log_tile_size, + RandomAccessIterator4 ranks_result1, + RandomAccessIterator4 ranks_result2, + StrictWeakOrdering comp) +{ + typedef rank_splitters_closure< + block_size, + RandomAccessIterator1, + RandomAccessIterator2, + RandomAccessIterator3, + RandomAccessIterator4, + StrictWeakOrdering, + detail::statically_blocked_thread_array + > Closure; + + Closure closure(splitters_first, + splitter_positions_first, + keys_first, + splitters_last - splitters_first, + keys_last - keys_first, + log_tile_size, + ranks_result1, + ranks_result2, + comp); + + detail::launch_closure(closure, closure.grid_size(), block_size); +} + + +template +__device__ + void copy_n(Context context, + RandomAccessIterator1 first1, + RandomAccessIterator2 first2, + Size n, + RandomAccessIterator3 result1, + RandomAccessIterator4 result2) +{ + for(Size i = context.thread_index(); + i < n; + i += context.block_dimension()) + { + result1[i] = first1[i]; + result2[i] = first2[i]; + } +} + + +///////////////////// MERGE TWO INDEPENDENT SEGMENTS USING BINARY SEARCH IN SHARED MEMORY //////////////////////////////////////// +// NOTE: This is the most compute-intensive part of the algorithm. +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Thread block i merges entries between rank[i] and rank[i+1]. These can be independently +// merged and concatenated, as noted above. +// Each thread in the thread block i does a binary search of one element between rank[i] -> rank[i+1] in the +// other array. + +// Inputs: srcdatakey, value: inputs +// log_blocksize, log_num_merged_splitters_per_tile: as in previous functions +// Outputs: resultdatakey, resultdatavalue: output merged arrays are written here. +template +struct merge_subtiles_by_key_closure +{ + typedef Context context_type; + static const unsigned int log_block_size = thrust::detail::mpl::math::log2::value; + + RandomAccessIterator1 keys_first; + RandomAccessIterator2 values_first; + unsigned int n; + RandomAccessIterator3 ranks_first1; + RandomAccessIterator4 ranks_first2; + const unsigned int tile_size; + const unsigned int num_splitters; + RandomAccessIterator5 keys_result; + RandomAccessIterator6 values_result; + StrictWeakOrdering comp; + Context context; + + // this member is derivable from the constructor parameters + unsigned int log_num_merged_splitters_per_tile; + + merge_subtiles_by_key_closure + (RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + unsigned int n, + RandomAccessIterator3 ranks_first1, + RandomAccessIterator4 ranks_first2, + const unsigned int log_tile_size, + const unsigned int num_splitters, + RandomAccessIterator5 keys_result, + RandomAccessIterator6 values_result, + StrictWeakOrdering comp, + Context context = Context()) + : keys_first(keys_first), values_first(values_first), n(n), + ranks_first1(ranks_first1), ranks_first2(ranks_first2), + tile_size(1 << log_tile_size), + num_splitters(num_splitters), + keys_result(keys_result), values_result(values_result), + comp(comp), context(context) + { + const unsigned int log_num_splitters_per_tile = log_tile_size - log_block_size; + log_num_merged_splitters_per_tile = log_num_splitters_per_tile + 1; + } + + unsigned int grid_size() const + { + const unsigned int max_num_blocks = max_grid_size(block_size); + return thrust::min(num_splitters, max_num_blocks); + } + + __device__ __thrust_forceinline__ + unsigned int even_offset(unsigned int oddeven_blockid) const + { + return oddeven_blockid << (log_num_merged_splitters_per_tile + log_block_size); + } + + __device__ __thrust_forceinline__ + void get_partition(unsigned int partition_idx, unsigned int oddeven_blockid, + unsigned int &rank1, unsigned int &size1, + unsigned int &rank2, unsigned int &size2) const + { + // XXX this logic would be much improved if we were guaranteed that there was + // an element at ranks_first[1] + // XXX we could eliminate the need for local_blockIdx, log_num_merged_splitters_per_block, tile_size, and n + + // the index of the merged splitter within the splitters for the odd-even block pair. + unsigned int local_blockIdx = partition_idx - (oddeven_blockid< n) + { + size2 = n - tile_size - even_offset(oddeven_blockid); + } // end if + + // measure each array relative to its beginning + size1 -= rank1; + size2 -= rank2; + } + + template + __device__ __thrust_forceinline__ + void do_it(KeyType *s_keys, ValueType *s_values) + { + // advance iterators + unsigned int i = context.block_index(); + ranks_first1 += i; + ranks_first2 += i; + + // Thread Block i merges the sub-block associated with splitter i: rank[i] -> rank[i+1] in a particular odd-even block pair. + for(; + i < num_splitters; + i += context.grid_dimension(), ranks_first1 += context.grid_dimension(), ranks_first2 += context.grid_dimension()) + { + // the (odd, even) block pair that the splitter belongs to. + unsigned int oddeven_blockid = i >> log_num_merged_splitters_per_tile; + + // start1 & start2 store rank[i] and rank[i+1] indices in arrays 1 and 2. + // size1 & size2 store the number of of elements between rank[i] & rank[i+1] in arrays 1 & 2. + unsigned int rank1, rank2, size1, size2; + get_partition(i, oddeven_blockid, rank1, size1, rank2, size2); + + // find where the odd,even arrays begin + RandomAccessIterator1 even_keys_first = keys_first + even_offset(oddeven_blockid); + RandomAccessIterator1 odd_keys_first = even_keys_first + tile_size; + + RandomAccessIterator2 even_values_first = values_first + even_offset(oddeven_blockid); + RandomAccessIterator2 odd_values_first = even_values_first + tile_size; + + // load tiles into smem + copy_n(context, even_keys_first + rank1, even_values_first + rank1, size1, s_keys, s_values); + copy_n(context, odd_keys_first + rank2, odd_values_first + rank2, size2, s_keys + size1, s_values + size1); + + context.barrier(); + + // merge the arrays in-place + block::inplace_merge_by_key_n(context, s_keys, s_values, size1, size2, comp); + + context.barrier(); + + // write tiles to gmem + unsigned int dst_offset = even_offset(oddeven_blockid) + rank1 + rank2; + copy_n(context, s_keys, s_values, size1 + size2, keys_result + dst_offset, values_result + dst_offset); + + context.barrier(); + } // end for i + } + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename iterator_value::type KeyType; + typedef typename iterator_value::type ValueType; + + __shared__ uninitialized_array s_keys; + __shared__ uninitialized_array s_values; + + do_it(s_keys.data(), s_values.data()); + } +}; // merge_subtiles_by_key_closure + +// merge_subtiles_by_key() merges each sub-tile independently. As explained in rank_splitters(), +// the sub-tiles are defined by the ranks of the splitter elements d_rank1 and d_rank2 in the odd and even tiles resp. +// It can be easily shown that each sub-tile cannot contain more than block_size elements of either the odd or even tile. + +// the function calls merge_subblocks_binarysearch_kernel() for the remaining N_splitterS sub-tiles +// We use 1 thread block per splitter: For instance, thread block 0 will merge rank1[0] -> rank1[1] of array i with +// rank2[0] -> rank2[1] of array i^1, with i being the thread block to which the splitter belongs. + +// We implement each sub-tile merge using a binary search. We compute the rank of each element belonging to a sub-tile +// of an odd numbered tile in the corresponding sub-tile of its even numbered pair. It then adds this rank to +// the index of the element in its own sub-tile to find the output index of the element in the merged sub-tile. + +template + void merge_subtiles_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + RandomAccessIterator3 splitters_pos_first, + RandomAccessIterator3 splitters_pos_last, + RandomAccessIterator4 ranks_first1, + RandomAccessIterator5 ranks_first2, + RandomAccessIterator6 keys_result, + RandomAccessIterator7 values_result, + unsigned int log_tile_size, + StrictWeakOrdering comp) +{ + typedef typename iterator_value::type KeyType; + typedef typename iterator_value::type ValueType; + + const unsigned int block_size = stable_merge_sort_detail::block_size::value; + + typedef merge_subtiles_by_key_closure< + block_size, + RandomAccessIterator1, + RandomAccessIterator2, + RandomAccessIterator4, + RandomAccessIterator5, + RandomAccessIterator6, + RandomAccessIterator7, + StrictWeakOrdering, + detail::statically_blocked_thread_array + > Closure; + + Closure closure(keys_first, + values_first, + keys_last - keys_first, + ranks_first1, + ranks_first2, + log_tile_size, + splitters_pos_last - splitters_pos_first, + keys_result, + values_result, + comp); + + detail::launch_closure(closure, closure.grid_size(), block_size); +} + + +template + void merge_small_tiles_by_key(execution_policy &, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + size_t log_tile_size, + RandomAccessIterator3 keys_result, + RandomAccessIterator4 values_result, + StrictWeakOrdering comp) +{ + typedef merge_small_tiles_by_key_closure< + block_size, + RandomAccessIterator1, + RandomAccessIterator2, + RandomAccessIterator3, + RandomAccessIterator4, + StrictWeakOrdering, + detail::statically_blocked_thread_array + > Closure; + + Closure closure(keys_first, values_first, keys_last - keys_first, log_tile_size, keys_result, values_result, comp); + + detail::launch_closure(closure, closure.grid_size(), block_size); +} // end merge_small_tiles_by_key() + + +template + void merge_tiles_by_key_recursive(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + RandomAccessIterator3 keys_result, + RandomAccessIterator4 values_result, + size_t log_tile_size, + StrictWeakOrdering comp) +{ + typedef typename iterator_value::type KeyType; + typedef typename iterator_value::type ValueType; + + const size_t tile_size = 1<::value; + + // Case (a): tile_size <= block_size + if(tile_size <= block_size) + { + return merge_small_tiles_by_key<2*block_size>(exec, keys_first, keys_last, values_first, log_tile_size, keys_result, values_result, comp); + } // end if + + // Case (b) tile_size >= block_size + + // step 1 of the recursive case: gather one splitter per block_size entries in each odd-even tile pair. + thrust::detail::temporary_array splitters(exec, thrust::detail::util::divide_ri(keys_last - keys_first, block_size)); + static_strided_integer_range splitters_pos(splitters.size()); + thrust::gather(exec, splitters_pos.begin(), splitters_pos.end(), keys_first, splitters.begin()); + + // step 2 of the recursive case: merge the splitters & their positions + thrust::detail::temporary_array merged_splitters(exec, splitters.size()); + thrust::detail::temporary_array merged_splitters_pos(exec, splitters.size()); + + const unsigned int log_block_size = thrust::detail::mpl::math::log2::value; + size_t log_num_splitters_per_tile = log_tile_size - log_block_size; + merge_tiles_by_key_recursive(exec, + splitters.begin(), + splitters.end(), + splitters_pos.begin(), + merged_splitters.begin(), + merged_splitters_pos.begin(), + log_num_splitters_per_tile, + comp); + + // step 3 of the recursive case: find the ranks of each splitter in the respective two tiles. + // reuse the merged_splitters_pos storage + thrust::detail::temporary_array &rank1 = merged_splitters_pos; + thrust::detail::temporary_array rank2(exec, rank1.size()); + + rank_splitters(merged_splitters.begin(), + merged_splitters.end(), + merged_splitters_pos.begin(), + keys_first, + keys_last, + log_tile_size, + rank1.begin(), + rank2.begin(), + comp); + + // step 4 of the recursive case: merge each sub-tile independently in parallel. + merge_subtiles_by_key(keys_first, + keys_last, + values_first, + merged_splitters_pos.begin(), + merged_splitters_pos.end(), + rank1.begin(), + rank2.begin(), + keys_result, + values_result, + log_tile_size, + comp); +} + + +template + void merge_tiles_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + size_t n, + RandomAccessIterator3 keys_result, + RandomAccessIterator4 values_result, + unsigned int log_tile_size, + StrictWeakOrdering comp) +{ + const unsigned int tile_size = 1 << log_tile_size; + const size_t num_tiles = thrust::detail::util::divide_ri(n, tile_size); + + // if there is an odd number of tiles, we should exclude the last one + // without a twin in merge_recursive + const size_t last_tile_offset = (num_tiles%2)?((num_tiles-1)*tile_size):n; + + merge_tiles_by_key_recursive(exec, + keys_first, + keys_first + last_tile_offset, + values_first, + keys_result, + values_result, + log_tile_size, + comp); + + // copy the last tile without a twin, should it exist + if(last_tile_offset < n) + { + thrust::copy(exec, keys_first + last_tile_offset, keys_first + n, keys_result + last_tile_offset); + thrust::copy(exec, values_first + last_tile_offset, values_first + n, values_result + last_tile_offset); + } // end if +} // end merge_tiles_by_key() + + +} // end stable_merge_sort_detail + + + +template +void stable_merge_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // XXX it's potentially unsafe to pass the same array for keys & values + thrust::system::cuda::detail::detail::stable_merge_sort_by_key(exec, first, last, first, comp); +} + + +template + void stable_merge_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_traits::value_type KeyType; + typedef typename thrust::iterator_traits::value_type ValueType; + + // compute the block_size based on the types we're sorting + const unsigned int block_size = stable_merge_sort_detail::block_size::value; + + // XXX WAR unused variable warning issued by nvcc + (void) block_size; + + // first, sort each tile of block_size elements + stable_sort_by_count(exec, keys_first, keys_last, values_first, comp); + + // merge tiles if there is more than one + const size_t n = keys_last - keys_first; + if(n > block_size) + { + // allocate scratch space + using namespace thrust::detail; + using namespace stable_merge_sort_detail; + temporary_array temp_keys(exec, n); + temporary_array temp_values(exec, n); + + // use a caching allocator for the calls to merge_tiles_by_key + // XXX unfortunately g++-4.2 can't deal with this special execution policy +#if defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION < 40300 + execution_policy &merge_exec = exec; +#else + cached_temporary_allocator merge_exec(exec); +#endif + + // The log(n) iterations start here. Each call to 'merge' merges an odd-even pair of tiles + unsigned int log_tile_size = thrust::detail::mpl::math::log2::value; + bool ping = true; + for(; (1u << log_tile_size) < n; ++log_tile_size, ping = !ping) + { + // we ping-pong back and forth + if(ping) + { + merge_tiles_by_key(merge_exec, keys_first, values_first, n, temp_keys.begin(), temp_values.begin(), log_tile_size, comp); + } // end if + else + { + merge_tiles_by_key(merge_exec, temp_keys.begin(), temp_values.begin(), n, keys_first, values_first, log_tile_size, comp); + } // end else + } // end for + + // this is to make sure that our data is finally in the data and keys arrays + // and not in the temporary arrays + if(!ping) + { + thrust::copy(exec, temp_keys.begin(), temp_keys.end(), keys_first); + thrust::copy(exec, temp_values.begin(), temp_values.end(), values_first); + } // end if + } // end if +} // end stable_merge_sort_by_key() + + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + diff --git a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h new file mode 100644 index 0000000..8449a17 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +void stable_primitive_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last); + +template +void stable_primitive_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl new file mode 100644 index 0000000..d6f4c77 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl @@ -0,0 +1,159 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace stable_primitive_sort_detail +{ + + +template + struct enable_if_bool_sort + : thrust::detail::enable_if< + thrust::detail::is_same< + bool, + typename thrust::iterator_value::type + >::value + > +{}; + + +template + struct disable_if_bool_sort + : thrust::detail::disable_if< + thrust::detail::is_same< + bool, + typename thrust::iterator_value::type + >::value + > +{}; + + +template + typename enable_if_bool_sort::type + stable_primitive_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + // use stable_partition if we're sorting bool + // stable_partition puts true values first, so we need to logical_not + thrust::stable_partition(exec, first, last, thrust::logical_not()); +} + + +template + typename disable_if_bool_sort::type + stable_primitive_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + // call stable_radix_sort + thrust::system::cuda::detail::detail::stable_radix_sort(exec,first,last); +} + + +struct logical_not_first +{ + template + __host__ __device__ + bool operator()(Tuple t) + { + return !thrust::get<0>(t); + } +}; + + +template + typename enable_if_bool_sort::type + stable_primitive_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + // use stable_partition if we're sorting bool + // stable_partition puts true values first, so we need to logical_not + thrust::stable_partition(exec, + thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)), + thrust::make_zip_iterator(thrust::make_tuple(keys_last, values_first)), + logical_not_first()); +} + + +template + typename disable_if_bool_sort::type + stable_primitive_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + // call stable_radix_sort_by_key + thrust::system::cuda::detail::detail::stable_radix_sort_by_key(exec, keys_first, keys_last, values_first); +} + + + +} + +template +void stable_primitive_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + thrust::system::cuda::detail::detail::stable_primitive_sort_detail::stable_primitive_sort(exec,first,last); +} + +template +void stable_primitive_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + thrust::system::cuda::detail::detail::stable_primitive_sort_detail::stable_primitive_sort_by_key(exec, keys_first, keys_last, values_first); +} + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h new file mode 100644 index 0000000..7a8b996 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file stable_radix_sort_dev.h + * \brief Defines the interface for a stable radix sort implementation on CUDA + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +void stable_radix_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last); + +template +void stable_radix_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl new file mode 100644 index 0000000..9ea1977 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl @@ -0,0 +1,220 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// do not attempt to compile this file with any other compiler +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + +#include +#include +#include +#include + +#include +#include +#include +#include + + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + +template +void stable_radix_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + typedef typename thrust::iterator_value::type K; + + unsigned int num_elements = last - first; + + // ensure data is properly aligned + if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first), 2*sizeof(K))) + { + thrust::detail::temporary_array aligned_keys(exec, first, last); + stable_radix_sort(exec, aligned_keys.begin(), aligned_keys.end()); + thrust::copy(exec, aligned_keys.begin(), aligned_keys.end(), first); + return; + } + + thrust::system::cuda::detail::detail::b40c_thrust::RadixSortingEnactor sorter(num_elements); + thrust::system::cuda::detail::detail::b40c_thrust::RadixSortStorage storage; + + // allocate temporary buffers + thrust::detail::temporary_array temp_keys(exec, num_elements); + thrust::detail::temporary_array temp_spine(exec, sorter.SpineElements()); + thrust::detail::temporary_array temp_from_alt(exec, 2); + + // define storage + storage.d_keys = thrust::raw_pointer_cast(&*first); + storage.d_alt_keys = thrust::raw_pointer_cast(&temp_keys[0]); + storage.d_spine = thrust::raw_pointer_cast(&temp_spine[0]); + storage.d_from_alt_storage = thrust::raw_pointer_cast(&temp_from_alt[0]); + + // perform the sort + sorter.EnactSort(storage); + + // radix sort sometimes leaves results in the alternate buffers + if (storage.using_alternate_storage) + { + thrust::copy(exec, temp_keys.begin(), temp_keys.end(), first); + } +} + +/////////////////////// +// Key-Value Sorting // +/////////////////////// + +// sort values directly +template +void stable_radix_sort_by_key(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + thrust::detail::true_type) +{ + typedef typename thrust::iterator_value::type K; + typedef typename thrust::iterator_value::type V; + + unsigned int num_elements = last1 - first1; + + // ensure data is properly aligned + if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first1), 2*sizeof(K))) + { + thrust::detail::temporary_array aligned_keys(exec, first1, last1); + stable_radix_sort_by_key(exec, aligned_keys.begin(), aligned_keys.end(), first2); + thrust::copy(exec, aligned_keys.begin(), aligned_keys.end(), first1); + return; + } + if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first2), 2*sizeof(V))) + { + thrust::detail::temporary_array aligned_values(exec, first2, first2 + num_elements); + stable_radix_sort_by_key(exec, first1, last1, aligned_values.begin()); + thrust::copy(exec, aligned_values.begin(), aligned_values.end(), first2); + return; + } + + thrust::system::cuda::detail::detail::b40c_thrust::RadixSortingEnactor sorter(num_elements); + thrust::system::cuda::detail::detail::b40c_thrust::RadixSortStorage storage; + + // allocate temporary buffers + thrust::detail::temporary_array temp_keys(exec, num_elements); + thrust::detail::temporary_array temp_values(exec, num_elements); + thrust::detail::temporary_array temp_spine(exec, sorter.SpineElements()); + thrust::detail::temporary_array temp_from_alt(exec, 2); + + // define storage + storage.d_keys = thrust::raw_pointer_cast(&*first1); + storage.d_values = thrust::raw_pointer_cast(&*first2); + storage.d_alt_keys = thrust::raw_pointer_cast(&temp_keys[0]); + storage.d_alt_values = thrust::raw_pointer_cast(&temp_values[0]); + storage.d_spine = thrust::raw_pointer_cast(&temp_spine[0]); + storage.d_from_alt_storage = thrust::raw_pointer_cast(&temp_from_alt[0]); + + // perform the sort + sorter.EnactSort(storage); + + // radix sort sometimes leaves results in the alternate buffers + if (storage.using_alternate_storage) + { + thrust::copy(exec, temp_keys.begin(), temp_keys.end(), first1); + thrust::copy(exec, temp_values.begin(), temp_values.end(), first2); + } +} + + +// sort values indirectly +template +void stable_radix_sort_by_key(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + thrust::detail::false_type) +{ + typedef typename thrust::iterator_value::type V; + + unsigned int num_elements = last1 - first1; + + // sort with integer values and then permute the real values accordingly + thrust::detail::temporary_array permutation(exec, num_elements); + thrust::sequence(exec, permutation.begin(), permutation.end()); + + stable_radix_sort_by_key(exec, first1, last1, permutation.begin()); + + // copy values into temp vector and then permute + thrust::detail::temporary_array temp_values(exec, first2, first2 + num_elements); + + // permute values + thrust::gather(exec, + permutation.begin(), permutation.end(), + temp_values.begin(), + first2); +} + + +template +void stable_radix_sort_by_key(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2) +{ + typedef typename thrust::iterator_value::type V; + + // decide how to handle values + static const bool sort_values_directly = thrust::detail::is_trivial_iterator::value && + thrust::detail::is_arithmetic::value && + sizeof(V) <= 8; // TODO profile this + + // XXX WAR unused variable warning + (void) sort_values_directly; + + stable_radix_sort_by_key(exec, first1, last1, first2, + thrust::detail::integral_constant()); +} + +} // end namespace detail +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + + +#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + diff --git a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h new file mode 100644 index 0000000..b563654 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + + +template +void stable_sort_by_count(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + Compare comp); + + +} // end detail +} // end detail +} // end cuda +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl new file mode 100644 index 0000000..5efb36b --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl @@ -0,0 +1,179 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#include +#include + + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ +namespace stable_sort_by_count_detail +{ + + +template +struct stable_sort_by_count_closure +{ + typedef Context context_type; + + RandomAccessIterator1 keys_first; + RandomAccessIterator2 values_first; + StrictWeakOrdering comp; // XXX this should probably be thrust::detail::device_function + const unsigned int n; + context_type context; + + stable_sort_by_count_closure(RandomAccessIterator1 keys_first, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp, + const unsigned int n, + context_type context = context_type()) + : keys_first(keys_first), + values_first(values_first), + comp(comp), + n(n), + context(context) + {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename iterator_value::type KeyType; + typedef typename iterator_value::type ValueType; + + __shared__ uninitialized_array s_keys; + __shared__ uninitialized_array s_data; + + const unsigned int grid_size = context.grid_dimension() * context.block_dimension(); + + // block_offset records the global index of this block's 0th thread + unsigned int block_offset = context.block_index() * block_size; + unsigned int i = context.thread_index() + block_offset; + + // advance iterators + keys_first += i; + values_first += i; + + for(; + block_offset < n; + block_offset += grid_size, i += grid_size, keys_first += grid_size, values_first += grid_size) + { + context.barrier(); + // copy input to shared + if(i < n) + { + s_keys[context.thread_index()] = *keys_first; + s_data[context.thread_index()] = *values_first; + } // end if + context.barrier(); + + // this block could be partially full + unsigned int length = block_size; + if(block_offset + block_size > n) + { + length = n - block_offset; + } // end if + + // run merge_sort over the block + block::merging_sort(context, s_keys.begin(), s_data.begin(), length, comp); + + // write result + if(i < n) + { + *keys_first = s_keys[context.thread_index()]; + *values_first = s_data[context.thread_index()]; + } // end if + } // end for i + } + + + static size_t max_grid_size() + { + const device_properties_t& properties = device_properties(); + + const unsigned int max_threads = properties.maxThreadsPerMultiProcessor * properties.multiProcessorCount; + const unsigned int max_blocks = properties.maxGridSize[0]; + + return thrust::min(max_blocks, 3 * max_threads / block_size); + } // end max_grid_size() + + + size_t grid_size() const + { + // compute the maximum number of blocks we can launch on this arch + const unsigned int max_num_blocks = max_grid_size(); + + // first, sort within each block + size_t num_blocks = n / block_size; + if(n % block_size) ++num_blocks; + + return thrust::min(num_blocks, max_num_blocks); + } // end grid_size() +}; // stable_sort_by_count_closure + + +} // end stable_sort_by_count_detail + + +template +void stable_sort_by_count(execution_policy &, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + Compare comp) +{ + typedef stable_sort_by_count_detail::stable_sort_by_count_closure< + count, + RandomAccessIterator1, + RandomAccessIterator2, + Compare, + detail::statically_blocked_thread_array + > Closure; + + Closure closure(keys_first, values_first, comp, keys_last - keys_first); + + // do an odd-even sort per block of data + detail::launch_closure(closure, closure.grid_size(), count); +} // end stable_sort_by_count() + + +} // end detail +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/detail/uninitialized.h b/compat/thrust/system/cuda/detail/detail/uninitialized.h new file mode 100644 index 0000000..a3e3dd2 --- /dev/null +++ b/compat/thrust/system/cuda/detail/detail/uninitialized.h @@ -0,0 +1,261 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + + +template + class uninitialized +{ + private: + typename aligned_storage< + sizeof(T), + alignment_of::value + >::type storage; + + __device__ __thrust_forceinline__ const T* ptr() const + { + return reinterpret_cast(storage.data); + } + + __device__ __thrust_forceinline__ T* ptr() + { + return reinterpret_cast(storage.data); + } + + public: + // copy assignment + __device__ __thrust_forceinline__ uninitialized &operator=(const T &other) + { + T& self = *this; + self = other; + return *this; + } + + __device__ __thrust_forceinline__ T& get() + { + return *ptr(); + } + + __device__ __thrust_forceinline__ const T& get() const + { + return *ptr(); + } + + __device__ __thrust_forceinline__ operator T& () + { + return get(); + } + + __device__ __thrust_forceinline__ operator const T&() const + { + return get(); + } + + __thrust_forceinline__ __device__ void construct() + { + ::new(ptr()) T(); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg &a) + { + ::new(ptr()) T(a); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2) + { + ::new(ptr()) T(a1,a2); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3) + { + ::new(ptr()) T(a1,a2,a3); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4) + { + ::new(ptr()) T(a1,a2,a3,a4); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5) + { + ::new(ptr()) T(a1,a2,a3,a4,a5); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6) + { + ::new(ptr()) T(a1,a2,a3,a4,a5,a6); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7) + { + ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8) + { + ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8, const Arg9 &a9) + { + ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8,a9); + } + + template + __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8, const Arg9 &a9, const Arg10 &a10) + { + ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10); + } + + __thrust_forceinline__ __device__ void destroy() + { + T& self = *this; + self.~T(); + } +}; + + +template + class uninitialized_array +{ + public: + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef pointer iterator; + typedef const_pointer const_iterator; + typedef std::size_t size_type; + + __thrust_forceinline__ __device__ iterator begin() + { + return data(); + } + + __thrust_forceinline__ __device__ const_iterator begin() const + { + return data(); + } + + __thrust_forceinline__ __device__ iterator end() + { + return begin() + size(); + } + + __thrust_forceinline__ __device__ const_iterator end() const + { + return begin() + size(); + } + + __thrust_forceinline__ __device__ const_iterator cbegin() const + { + return begin(); + } + + __thrust_forceinline__ __device__ const_iterator cend() const + { + return end(); + } + + __thrust_forceinline__ __device__ size_type size() const + { + return N; + } + + __thrust_forceinline__ __device__ bool empty() const + { + return false; + } + + __thrust_forceinline__ __device__ T* data() + { + return impl.get(); + } + + __thrust_forceinline__ __device__ const T* data() const + { + return impl.get(); + } + + // element access + __thrust_forceinline__ __device__ reference operator[](size_type n) + { + return data()[n]; + } + + __thrust_forceinline__ __device__ const_reference operator[](size_type n) const + { + return data()[n]; + } + + __thrust_forceinline__ __device__ reference front() + { + return *data(); + } + + __thrust_forceinline__ __device__ const_reference front() const + { + return *data(); + } + + __thrust_forceinline__ __device__ reference back() + { + return data()[size() - size_type(1)]; + } + + __thrust_forceinline__ __device__ const_reference back() const + { + return data()[size() - size_type(1)]; + } + + private: + uninitialized impl; +}; + + +} // end detail +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/equal.h b/compat/thrust/system/cuda/detail/equal.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/equal.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/error.inl b/compat/thrust/system/cuda/detail/error.inl new file mode 100644 index 0000000..41b928f --- /dev/null +++ b/compat/thrust/system/cuda/detail/error.inl @@ -0,0 +1,95 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace system +{ + + +error_code make_error_code(cuda::errc::errc_t e) +{ + return error_code(static_cast(e), cuda_category()); +} // end make_error_code() + + +error_condition make_error_condition(cuda::errc::errc_t e) +{ + return error_condition(static_cast(e), cuda_category()); +} // end make_error_condition() + + +namespace cuda +{ + +namespace detail +{ + + +class cuda_error_category + : public error_category +{ + public: + inline cuda_error_category(void) {} + + inline virtual const char *name(void) const + { + return "cuda"; + } + + inline virtual std::string message(int ev) const + { + static const std::string unknown_err("Unknown error"); + const char *c_str = ::cudaGetErrorString(static_cast(ev)); + return c_str ? std::string(c_str) : unknown_err; + } + + inline virtual error_condition default_error_condition(int ev) const + { + using namespace cuda::errc; + + if(ev < ::cudaErrorApiFailureBase) + { + return make_error_condition(static_cast(ev)); + } + + return system_category().default_error_condition(ev); + } +}; // end cuda_error_category + +} // end detail + +} // end namespace cuda + + +const error_category &cuda_category(void) +{ + static const cuda::detail::cuda_error_category result; + return result; +} + + +} // end namespace system + +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/execution_policy.h b/compat/thrust/system/cuda/detail/execution_policy.h new file mode 100644 index 0000000..7dae04c --- /dev/null +++ b/compat/thrust/system/cuda/detail/execution_policy.h @@ -0,0 +1,131 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +// put the canonical tag in the same ns as the backend's entry points +namespace detail +{ + +// this awkward sequence of definitions arise +// from the desire both for tag to derive +// from execution_policy and for execution_policy +// to convert to tag (when execution_policy is not +// an ancestor of tag) + +// forward declaration of tag +struct tag; + +// forward declaration of execution_policy +template struct execution_policy; + +// specialize execution_policy for tag +template<> + struct execution_policy + : thrust::execution_policy +{}; + +// tag's definition comes before the +// generic definition of execution_policy +struct tag : execution_policy {}; + +// allow conversion to tag when it is not a successor +template + struct execution_policy + : thrust::execution_policy +{ + // allow conversion to tag + inline operator tag () const + { + return tag(); + } +}; + + +template + struct cross_system + : thrust::execution_policy > +{ + inline __host__ __device__ + cross_system(thrust::execution_policy &system1, + thrust::execution_policy &system2) + : system1(system1), system2(system2) + {} + + thrust::execution_policy &system1; + thrust::execution_policy &system2; + + inline __host__ __device__ + cross_system rotate() const + { + return cross_system(system2,system1); + } +}; + + +// overloads of select_system + +// cpp interop +template +inline __host__ __device__ +cross_system select_system(const execution_policy &system1, const thrust::cpp::execution_policy &system2) +{ + thrust::execution_policy &non_const_system1 = const_cast&>(system1); + thrust::cpp::execution_policy &non_const_system2 = const_cast&>(system2); + return cross_system(non_const_system1,non_const_system2); +} + + +template +inline __host__ __device__ +cross_system select_system(const thrust::cpp::execution_policy &system1, execution_policy &system2) +{ + thrust::cpp::execution_policy &non_const_system1 = const_cast&>(system1); + thrust::execution_policy &non_const_system2 = const_cast&>(system2); + return cross_system(non_const_system1,non_const_system2); +} + + +} // end detail + +// alias execution_policy and tag here +using thrust::system::cuda::detail::execution_policy; +using thrust::system::cuda::detail::tag; + +} // end cuda +} // end system + +// alias items at top-level +namespace cuda +{ + +using thrust::system::cuda::execution_policy; +using thrust::system::cuda::tag; + +} // end cuda +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/extern_shared_ptr.h b/compat/thrust/system/cuda/detail/extern_shared_ptr.h new file mode 100644 index 0000000..5f34cc8 --- /dev/null +++ b/compat/thrust/system/cuda/detail/extern_shared_ptr.h @@ -0,0 +1,58 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + class extern_shared_ptr +{ +// don't attempt to compile with any compiler other than nvcc +// due to use of __shared__ below +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + public: + __device__ + inline operator T * (void) + { + extern __shared__ int4 smem[]; + return reinterpret_cast(smem); + } + + __device__ + inline operator const T * (void) const + { + extern __shared__ int4 smem[]; + return reinterpret_cast(smem); + } +#endif // THRUST_DEVICE_COMPILER_NVCC +}; // end extern_shared_ptr + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/extrema.h b/compat/thrust/system/cuda/detail/extrema.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/extrema.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/fill.h b/compat/thrust/system/cuda/detail/fill.h new file mode 100644 index 0000000..9c753bb --- /dev/null +++ b/compat/thrust/system/cuda/detail/fill.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file fill.h + * \brief Device implementation of fill. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + void fill(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &value); + +template + OutputIterator fill_n(execution_policy &exec, + OutputIterator first, + Size n, + const T &value); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/fill.inl b/compat/thrust/system/cuda/detail/fill.inl new file mode 100644 index 0000000..3c1feb8 --- /dev/null +++ b/compat/thrust/system/cuda/detail/fill.inl @@ -0,0 +1,178 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file fill.inl + * \brief Inline file for fill.h. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace detail +{ + + +template + WidePtr widen_raw_ptr(T *ptr) +{ + typedef thrust::detail::pointer_traits WideTraits; + typedef typename WideTraits::element_type WideT; + + // carefully widen the pointer to avoid warnings about conversions between differently aligned types on ARM + WideT *wide_raw_ptr = static_cast(static_cast(ptr)); + + return WideTraits::pointer_to(*wide_raw_ptr); +} + + +template + Pointer wide_fill_n(execution_policy &exec, + Pointer first, + Size n, + const T &value) +{ + typedef typename thrust::iterator_value::type OutputType; + + size_t ALIGNMENT_BOUNDARY = 128; // begin copying blocks at this byte boundary + + WideType wide_exemplar; + OutputType narrow_exemplars[sizeof(WideType) / sizeof(OutputType)]; + + for (size_t i = 0; i < sizeof(WideType) / sizeof(OutputType); i++) + narrow_exemplars[i] = static_cast(value); + + // cast through char * to avoid type punning warnings + for (size_t i = 0; i < sizeof(WideType); i++) + reinterpret_cast(&wide_exemplar)[i] = reinterpret_cast(narrow_exemplars)[i]; + + OutputType *first_raw = thrust::raw_pointer_cast(first); + OutputType *last_raw = first_raw + n; + + OutputType *block_first_raw = (thrust::min)(first_raw + n, thrust::detail::util::align_up(first_raw, ALIGNMENT_BOUNDARY)); + OutputType *block_last_raw = (thrust::max)(block_first_raw, thrust::detail::util::align_down(last_raw, sizeof(WideType))); + + // rebind Pointer to WideType + typedef typename thrust::detail::rebind_pointer::type WidePtr; + + // point to the widened range + // XXX since we've got an execution policy, we probably don't even need to deal with rebinding pointers + WidePtr block_first_wide = widen_raw_ptr(block_first_raw); + WidePtr block_last_wide = widen_raw_ptr(block_last_raw); + + thrust::generate(exec, first, Pointer(block_first_raw), thrust::detail::fill_functor(value)); + thrust::generate(exec, block_first_wide, block_last_wide, thrust::detail::fill_functor(wide_exemplar)); + thrust::generate(exec, Pointer(block_last_raw), first + n, thrust::detail::fill_functor(value)); + + return first + n; +} + +template + OutputIterator fill_n(execution_policy &exec, + OutputIterator first, + Size n, + const T &value, + thrust::detail::false_type) +{ + thrust::detail::fill_functor func(value); + return thrust::generate_n(exec, first, n, func); +} + +template + OutputIterator fill_n(execution_policy &exec, + OutputIterator first, + Size n, + const T &value, + thrust::detail::true_type) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + + if ( thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first)) ) + { + if (compute_capability() < 20) + { + // 32-bit writes are faster on G80 and GT200 + typedef unsigned int WideType; + wide_fill_n(exec, &*first, n, value); + } + else + { + // 64-bit writes are faster on Fermi + typedef unsigned long long WideType; + wide_fill_n(exec, &*first, n, value); + } + + return first + n; + } + else + { + return fill_n(exec, first, n, value, thrust::detail::false_type()); + } +} + +} // end detail + +template + OutputIterator fill_n(execution_policy &exec, + OutputIterator first, + Size n, + const T &value) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + + // we're compiling with nvcc, launch a kernel + const bool use_wide_fill = thrust::detail::is_trivial_iterator::value + && thrust::detail::has_trivial_assign::value + && (sizeof(OutputType) == 1 || sizeof(OutputType) == 2 || sizeof(OutputType) == 4); + + // XXX WAR usused variable warning + (void)use_wide_fill; + + return detail::fill_n(exec, first, n, value, thrust::detail::integral_constant()); +} + +template + void fill(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &value) +{ + thrust::system::cuda::detail::fill_n(exec, first, thrust::distance(first,last), value); +} // end fill() + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/find.h b/compat/thrust/system/cuda/detail/find.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/find.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/for_each.h b/compat/thrust/system/cuda/detail/for_each.h new file mode 100644 index 0000000..56be13b --- /dev/null +++ b/compat/thrust/system/cuda/detail/for_each.h @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.h + * \brief Defines the interface for a function that executes a + * function or functional for each value in a given range. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + RandomAccessIterator for_each(execution_policy &s, + RandomAccessIterator first, + RandomAccessIterator last, + UnaryFunction f); + +template + RandomAccessIterator for_each_n(execution_policy &s, + RandomAccessIterator first, + Size n, + UnaryFunction f); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/for_each.inl b/compat/thrust/system/cuda/detail/for_each.inl new file mode 100644 index 0000000..be6e561 --- /dev/null +++ b/compat/thrust/system/cuda/detail/for_each.inl @@ -0,0 +1,199 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.inl + * \brief Inline file for for_each.h. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace for_each_n_detail +{ + + +template +struct for_each_n_closure +{ + typedef void result_type; + typedef Context context_type; + + RandomAccessIterator first; + Size n; + thrust::detail::device_function f; + Context context; + + for_each_n_closure(RandomAccessIterator first, + Size n, + UnaryFunction f, + Context context = Context()) + : first(first), n(n), f(f), context(context) + {} + + __device__ __thrust_forceinline__ + result_type operator()(void) + { + const Size grid_size = context.block_dimension() * context.grid_dimension(); + + Size i = context.linear_index(); + + // advance iterator + first += i; + + while(i < n) + { + f(*first); + i += grid_size; + first += grid_size; + } + } +}; // end for_each_n_closure + + +template +thrust::tuple configure_launch(Size n) +{ + // calculate launch configuration + detail::launch_calculator calculator; + + thrust::tuple config = calculator.with_variable_block_size(); + size_t max_blocks = thrust::get<0>(config); + size_t block_size = thrust::get<1>(config); + size_t num_blocks = thrust::min(max_blocks, thrust::detail::util::divide_ri(n, block_size)); + + return thrust::make_tuple(num_blocks, block_size); +} + + +template +bool use_big_closure(Size n, unsigned int little_grid_size) +{ + // use the big closure when n will not fit within an unsigned int + // or if incrementing an unsigned int by little_grid_size would overflow + // the counter + + Size threshold = std::numeric_limits::max(); + + bool result = (sizeof(Size) > sizeof(unsigned int)) && (n > threshold); + + if(!result) + { + // check if we'd overflow the little closure's counter + unsigned int little_n = static_cast(n); + + if((little_n - 1u) + little_grid_size < little_n) + { + result = true; + } + } + + return result; +} + + +} // end for_each_n_detail + + +template +RandomAccessIterator for_each_n(execution_policy &, + RandomAccessIterator first, + Size n, + UnaryFunction f) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + if(n <= 0) return first; // empty range + + // create two candidate closures to implement the for_each + // choose between them based on the whether we can fit n into a smaller integer + // and whether or not we'll overflow the closure's counter + + typedef detail::blocked_thread_array Context; + typedef for_each_n_detail::for_each_n_closure BigClosure; + typedef for_each_n_detail::for_each_n_closure LittleClosure; + + BigClosure big_closure(first, n, f); + LittleClosure little_closure(first, static_cast(n), f); + + thrust::tuple little_config = for_each_n_detail::configure_launch(n); + + unsigned int little_grid_size = thrust::get<0>(little_config) * thrust::get<1>(little_config); + + if(for_each_n_detail::use_big_closure(n, little_grid_size)) + { + // launch the big closure + thrust::tuple big_config = for_each_n_detail::configure_launch(n); + detail::launch_closure(big_closure, thrust::get<0>(big_config), thrust::get<1>(big_config)); + } + else + { + // launch the little closure + detail::launch_closure(little_closure, thrust::get<0>(little_config), thrust::get<1>(little_config)); + } + + return first + n; +} + + +template + InputIterator for_each(execution_policy &exec, + InputIterator first, + InputIterator last, + UnaryFunction f) +{ + return cuda::detail::for_each_n(exec, first, thrust::distance(first,last), f); +} // end for_each() + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/gather.h b/compat/thrust/system/cuda/detail/gather.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/gather.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/generate.h b/compat/thrust/system/cuda/detail/generate.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/generate.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/get_value.h b/compat/thrust/system/cuda/detail/get_value.h new file mode 100644 index 0000000..273023f --- /dev/null +++ b/compat/thrust/system/cuda/detail/get_value.h @@ -0,0 +1,93 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +namespace +{ + + +template +inline __host__ __device__ + typename thrust::iterator_value::type + get_value_msvc2005_war(execution_policy &exec, Pointer ptr) +{ + typedef typename thrust::iterator_value::type result_type; + + // XXX war nvbugs/881631 + struct war_nvbugs_881631 + { + __host__ inline static result_type host_path(execution_policy &exec, Pointer ptr) + { + // when called from host code, implement with assign_value + // note that this requires a type with default constructor + result_type result; + + thrust::host_system_tag host_tag; + cross_system systems(host_tag, exec); + assign_value(systems, &result, ptr); + + return result; + } + + __device__ inline static result_type device_path(execution_policy &, Pointer ptr) + { + // when called from device code, just do simple deref + return *thrust::raw_pointer_cast(ptr); + } + }; + +#ifndef __CUDA_ARCH__ + return war_nvbugs_881631::host_path(exec, ptr); +#else + return war_nvbugs_881631::device_path(exec, ptr); +#endif // __CUDA_ARCH__ +} // end get_value_msvc2005_war() + + +} // end anon namespace + + +template +inline __host__ __device__ + typename thrust::iterator_value::type + get_value(execution_policy &exec, Pointer ptr) +{ + return get_value_msvc2005_war(exec,ptr); +} // end get_value() + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h b/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h new file mode 100644 index 0000000..e6c0d28 --- /dev/null +++ b/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h @@ -0,0 +1,39 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to check for the existence of macros +// such as __host__ and __device__, which may already be defined by thrust +// and to undefine them before entering cuda_runtime_api.h (which will redefine them) + +// we only try to do this stuff if cuda/include/host_defines.h has been included +#if !defined(__HOST_DEFINES_H__) + +#ifdef __host__ +#undef __host__ +#endif // __host__ + +#ifdef __device__ +#undef __device__ +#endif // __device__ + +#endif // __HOST_DEFINES_H__ + +#include + diff --git a/compat/thrust/system/cuda/detail/inner_product.h b/compat/thrust/system/cuda/detail/inner_product.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/inner_product.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/iter_swap.h b/compat/thrust/system/cuda/detail/iter_swap.h new file mode 100644 index 0000000..9b2bcf0 --- /dev/null +++ b/compat/thrust/system/cuda/detail/iter_swap.h @@ -0,0 +1,65 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template +inline __host__ __device__ +void iter_swap(tag, Pointer1 a, Pointer2 b) +{ + // XXX war nvbugs/881631 + struct war_nvbugs_881631 + { + __host__ inline static void host_path(Pointer1 a, Pointer2 b) + { + thrust::swap_ranges(a, a + 1, b); + } + + __device__ inline static void device_path(Pointer1 a, Pointer2 b) + { + using thrust::swap; + swap(*thrust::raw_pointer_cast(a), + *thrust::raw_pointer_cast(b)); + } + }; + +#ifndef __CUDA_ARCH__ + return war_nvbugs_881631::host_path(a,b); +#else + return war_nvbugs_881631::device_path(a,b); +#endif // __CUDA_ARCH__ +} // end iter_swap() + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/logical.h b/compat/thrust/system/cuda/detail/logical.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/logical.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/malloc_and_free.h b/compat/thrust/system/cuda/detail/malloc_and_free.h new file mode 100644 index 0000000..676dd7c --- /dev/null +++ b/compat/thrust/system/cuda/detail/malloc_and_free.h @@ -0,0 +1,71 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +// note that malloc returns a raw pointer to avoid +// depending on the heavyweight thrust/system/cuda/memory.h header +template + void *malloc(execution_policy &, std::size_t n) +{ + void *result = 0; + + cudaError_t error = cudaMalloc(reinterpret_cast(&result), n); + + if(error) + { + throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(error).c_str()); + } // end if + + return result; +} // end malloc() + + +template + void free(execution_policy &, Pointer ptr) +{ + cudaError_t error = cudaFree(thrust::raw_pointer_cast(ptr)); + + if(error) + { + throw thrust::system_error(error, thrust::cuda_category()); + } // end error +} // end free() + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/memory.inl b/compat/thrust/system/cuda/detail/memory.inl new file mode 100644 index 0000000..998b54e --- /dev/null +++ b/compat/thrust/system/cuda/detail/memory.inl @@ -0,0 +1,94 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ + +// XXX WAR an issue with MSVC 2005 (cl v14.00) incorrectly implementing +// pointer_raw_pointer for pointer by specializing it here +// note that we specialize it here, before the use of raw_pointer_cast +// below, which causes pointer_raw_pointer's instantiation +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) +namespace detail +{ + +template + struct pointer_raw_pointer< thrust::cuda::pointer > +{ + typedef typename thrust::cuda::pointer::raw_pointer type; +}; // end pointer_raw_pointer + +} // end detail +#endif + +namespace system +{ +namespace cuda +{ + + +template + template + reference & + reference + ::operator=(const reference &other) +{ + return super_t::operator=(other); +} // end reference::operator=() + +template + reference & + reference + ::operator=(const value_type &x) +{ + return super_t::operator=(x); +} // end reference::operator=() + +template +__host__ __device__ +void swap(reference a, reference b) +{ + a.swap(b); +} // end swap() + +pointer malloc(std::size_t n) +{ + tag cuda_tag; + return pointer(thrust::system::cuda::detail::malloc(cuda_tag, n)); +} // end malloc() + +template +pointer malloc(std::size_t n) +{ + pointer raw_ptr = thrust::system::cuda::malloc(sizeof(T) * n); + return pointer(reinterpret_cast(raw_ptr.get())); +} // end malloc() + +void free(pointer ptr) +{ + tag cuda_tag; + return thrust::system::cuda::detail::free(cuda_tag, ptr.get()); +} // end free() + +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/merge.h b/compat/thrust/system/cuda/detail/merge.h new file mode 100644 index 0000000..e01b705 --- /dev/null +++ b/compat/thrust/system/cuda/detail/merge.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + RandomAccessIterator3 merge(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + StrictWeakOrdering comp); + +} // end detail +} // end cuda +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/merge.inl b/compat/thrust/system/cuda/detail/merge.inl new file mode 100644 index 0000000..bf7516f --- /dev/null +++ b/compat/thrust/system/cuda/detail/merge.inl @@ -0,0 +1,285 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace merge_detail +{ + + +template +__device__ __thrust_forceinline__ +thrust::pair + partition_search(RandomAccessIterator1 first1, + RandomAccessIterator2 first2, + Size diag, + Size lower_bound1, + Size upper_bound1, + Size lower_bound2, + Size upper_bound2, + Compare comp) +{ + Size begin = thrust::max(lower_bound1, diag - upper_bound2); + Size end = thrust::min(diag - lower_bound2, upper_bound1); + + while(begin < end) + { + Size mid = (begin + end) / 2; + Size index1 = mid; + Size index2 = diag - mid - 1; + + if(comp(first2[index2], first1[index1])) + { + end = mid; + } + else + { + begin = mid + 1; + } + } + + return thrust::make_pair(begin, diag - begin); +} + + +template +__device__ __thrust_forceinline__ +void merge_n(Context &ctx, + RandomAccessIterator1 first1, + Size n1, + RandomAccessIterator2 first2, + Size n2, + RandomAccessIterator3 result, + Compare comp_, + unsigned int work_per_thread) +{ + const unsigned int block_size = ctx.block_dimension(); + thrust::detail::device_function comp(comp_); + typedef typename thrust::iterator_value::type value_type1; + typedef typename thrust::iterator_value::type value_type2; + + Size result_size = n1 + n2; + + // this is just oversubscription_rate * block_size * work_per_thread + // but it makes no sense to send oversubscription_rate as an extra parameter + Size work_per_block = thrust::detail::util::divide_ri(result_size, ctx.grid_dimension()); + + using thrust::system::cuda::detail::detail::uninitialized; + __shared__ uninitialized > s_block_input_begin; + + Size result_block_offset = ctx.block_index() * work_per_block; + + // find where this block's input begins in both input sequences + if(ctx.thread_index() == 0) + { + s_block_input_begin = (ctx.block_index() == 0) ? + thrust::pair(0,0) : + partition_search(first1, first2, + result_block_offset, + Size(0), n1, + Size(0), n2, + comp); + } + + ctx.barrier(); + + // iterate to consume this block's input + Size work_per_iteration = block_size * work_per_thread; + thrust::pair block_input_end = s_block_input_begin; + block_input_end.first += work_per_iteration; + block_input_end.second += work_per_iteration; + Size result_block_offset_last = result_block_offset + thrust::min(work_per_block, result_size - result_block_offset); + + for(; + result_block_offset < result_block_offset_last; + result_block_offset += work_per_iteration, + block_input_end.first += work_per_iteration, + block_input_end.second += work_per_iteration + ) + { + // find where this thread's input begins in both input sequences for this iteration + thrust::pair thread_input_begin = + partition_search(first1, first2, + Size(result_block_offset + ctx.thread_index() * work_per_thread), + s_block_input_begin.get().first, thrust::min(block_input_end.first , n1), + s_block_input_begin.get().second, thrust::min(block_input_end.second, n2), + comp); + + ctx.barrier(); + + // XXX the performance impact of not keeping x1 & x2 + // in registers is about 10% for int32 + uninitialized x1; + uninitialized x2; + + // XXX this is just a serial merge -- try to simplify or abstract this loop + Size i = result_block_offset + ctx.thread_index() * work_per_thread; + Size last_i = i + thrust::min(work_per_thread, result_size - thread_input_begin.first - thread_input_begin.second); + for(; + i < last_i; + ++i) + { + // optionally load x1 & x2 + bool output_x2 = true; + if(thread_input_begin.second < n2) + { + x2 = first2[thread_input_begin.second]; + } + else + { + output_x2 = false; + } + + if(thread_input_begin.first < n1) + { + x1 = first1[thread_input_begin.first]; + + if(output_x2) + { + output_x2 = comp(x2.get(), x1.get()); + } + } + + result[i] = output_x2 ? x2.get() : x1.get(); + + if(output_x2) + { + ++thread_input_begin.second; + } + else + { + ++thread_input_begin.first; + } + } // end for + + // the block's last thread has conveniently located the + // beginning of the next iteration's input + if(ctx.thread_index() == block_size-1) + { + s_block_input_begin = thread_input_begin; + } + ctx.barrier(); + } // end for +} // end merge_n + + +template + struct merge_n_closure +{ + typedef thrust::system::cuda::detail::detail::blocked_thread_array context_type; + + RandomAccessIterator1 first1; + Size n1; + RandomAccessIterator2 first2; + Size n2; + RandomAccessIterator3 result; + Compare comp; + Size work_per_thread; + + merge_n_closure(RandomAccessIterator1 first1, Size n1, RandomAccessIterator2 first2, Size n2, RandomAccessIterator3 result, Compare comp, Size work_per_thread) + : first1(first1), n1(n1), first2(first2), n2(n2), result(result), comp(comp), work_per_thread(work_per_thread) + {} + + __device__ __forceinline__ + void operator()() + { + context_type ctx; + merge_n(ctx, first1, n1, first2, n2, result, comp, work_per_thread); + } +}; + + +// returns (work_per_thread, threads_per_block, oversubscription_factor) +template + thrust::tuple + tunables(RandomAccessIterator1, RandomAccessIterator1, RandomAccessIterator2, RandomAccessIterator2, RandomAccessIterator3, Compare comp) +{ + // determined by empirical testing on GTX 480 + // ~4500 Mkeys/s on GTX 480 + const unsigned int work_per_thread = 5; + const unsigned int threads_per_block = 128; + const unsigned int oversubscription_factor = 30; + + return thrust::make_tuple(work_per_thread, threads_per_block, oversubscription_factor); +} + + +} // end merge_detail + + +template +RandomAccessIterator3 merge(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp) +{ + typedef typename thrust::iterator_difference::type Size; + Size n1 = last1 - first1; + Size n2 = last2 - first2; + typename thrust::iterator_difference::type n = n1 + n2; + + // empty result + if(n <= 0) return result; + + unsigned int work_per_thread = 0, threads_per_block = 0, oversubscription_factor = 0; + thrust::tie(work_per_thread,threads_per_block,oversubscription_factor) + = merge_detail::tunables(first1, last1, first2, last2, result, comp); + + const unsigned int work_per_block = work_per_thread * threads_per_block; + + const unsigned int num_processors = device_properties().multiProcessorCount; + const unsigned int num_blocks = thrust::min(oversubscription_factor * num_processors, thrust::detail::util::divide_ri(n, work_per_block)); + + typedef merge_detail::merge_n_closure closure_type; + closure_type closure(first1, n1, first2, n2, result, comp, work_per_thread); + + detail::launch_closure(closure, num_blocks, threads_per_block); + + return result + n1 + n2; +} // end merge() + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/mismatch.h b/compat/thrust/system/cuda/detail/mismatch.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/mismatch.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/par.h b/compat/thrust/system/cuda/detail/par.h new file mode 100644 index 0000000..e56128c --- /dev/null +++ b/compat/thrust/system/cuda/detail/par.h @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +struct par_t : thrust::system::cuda::detail::execution_policy +{ + par_t() : thrust::system::cuda::detail::execution_policy() {} + + template + thrust::detail::execute_with_allocator + operator()(Allocator &alloc) const + { + return thrust::detail::execute_with_allocator(alloc); + } +}; + + +} // end detail + + +static const detail::par_t par; + + +} // end cuda +} // end system + + +// alias par here +namespace cuda +{ + + +using thrust::system::cuda::par; + + +} // end cuda +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/partition.h b/compat/thrust/system/cuda/detail/partition.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/partition.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/reduce.h b/compat/thrust/system/cuda/detail/reduce.h new file mode 100644 index 0000000..d188f60 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reduce.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief Reduce a sequence of elements with a given length. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template + OutputType reduce(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputType init, + BinaryFunction binary_op); + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/reduce.inl b/compat/thrust/system/cuda/detail/reduce.inl new file mode 100644 index 0000000..66b4ac7 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reduce.inl @@ -0,0 +1,275 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.inl + * \brief Inline file for reduce.h + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +namespace reduce_detail +{ + +/* + * Reduce a vector of n elements using binary_op() + * + * The order of reduction is not defined, so binary_op() should + * be a commutative (and associative) operator such as + * (integer) addition. Since floating point operations + * do not completely satisfy these criteria, the result is + * generally not the same as a consecutive reduction of + * the elements. + * + * Uses the same pattern as reduce6() in the CUDA SDK + * + */ +template +struct unordered_reduce_closure +{ + InputIterator input; + Size n; + T init; + OutputIterator output; + BinaryFunction binary_op; + unsigned int shared_array_size; + + typedef Context context_type; + context_type context; + + unordered_reduce_closure(InputIterator input, Size n, T init, OutputIterator output, BinaryFunction binary_op, unsigned int shared_array_size, Context context = Context()) + : input(input), n(n), init(init), output(output), binary_op(binary_op), shared_array_size(shared_array_size), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename thrust::iterator_value::type OutputType; + extern_shared_ptr shared_array; + + Size grid_size = context.block_dimension() * context.grid_dimension(); + + Size i = context.linear_index(); + + input += i; + + // compute reduction with all blockDim.x threads + OutputType sum = thrust::raw_reference_cast(*input); + + i += grid_size; + input += grid_size; + + while (i < n) + { + OutputType val = thrust::raw_reference_cast(*input); + + sum = binary_op(sum, val); + + i += grid_size; + input += grid_size; + } + + // write first shared_array_size values into shared memory + if (context.thread_index() < shared_array_size) + shared_array[context.thread_index()] = sum; + + // accumulate remaining values (if any) to shared memory in stages + if (context.block_dimension() > shared_array_size) + { + unsigned int lb = shared_array_size; + unsigned int ub = shared_array_size + lb; + + while (lb < context.block_dimension()) + { + context.barrier(); + + if (lb <= context.thread_index() && context.thread_index() < ub) + { + OutputType tmp = shared_array[context.thread_index() - lb]; + shared_array[context.thread_index() - lb] = binary_op(tmp, sum); + } + + lb += shared_array_size; + ub += shared_array_size; + } + } + + context.barrier(); + + block::reduce_n(context, shared_array, thrust::min(context.block_dimension(), shared_array_size), binary_op); + + if (context.thread_index() == 0) + { + OutputType tmp = shared_array[0]; + + if (context.grid_dimension() == 1) + tmp = binary_op(init, tmp); + + output += context.block_index(); + *output = tmp; + } + } +}; + + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + +template + OutputType reduce(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputType init, + BinaryFunction binary_op) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + typedef typename thrust::iterator_difference::type difference_type; + + difference_type n = thrust::distance(first,last); + + if (n == 0) + return init; + + typedef thrust::detail::temporary_array OutputArray; + typedef typename OutputArray::iterator OutputIterator; + + typedef detail::blocked_thread_array Context; + typedef unordered_reduce_closure Closure; + + function_attributes_t attributes = detail::closure_attributes(); + + // TODO chose this in a more principled manner + size_t threshold = thrust::max(2 * attributes.maxThreadsPerBlock, 1024); + + device_properties_t properties = device_properties(); + + // launch configuration + size_t num_blocks; + size_t block_size; + size_t array_size; + size_t smem_bytes; + + // first level reduction + if (static_cast(n) < threshold) + { + num_blocks = 1; + block_size = thrust::min(static_cast(n), static_cast(attributes.maxThreadsPerBlock)); + array_size = thrust::min(block_size, (properties.sharedMemPerBlock - attributes.sharedSizeBytes) / sizeof(OutputType)); + smem_bytes = sizeof(OutputType) * array_size; + } + else + { + detail::launch_calculator calculator; + + thrust::tuple config = calculator.with_variable_block_size_available_smem(); + + num_blocks = thrust::min(thrust::get<0>(config), static_cast(n) / thrust::get<1>(config)); + block_size = thrust::get<1>(config); + array_size = thrust::min(block_size, thrust::get<2>(config) / sizeof(OutputType)); + smem_bytes = sizeof(OutputType) * array_size; + } + + // TODO assert(n <= num_blocks * block_size); + // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory" + + OutputArray output(exec, num_blocks); + + Closure closure(first, n, init, output.begin(), binary_op, array_size); + + //std::cout << "Launching " << num_blocks << " blocks of kernel with " << block_size << " threads and " << smem_bytes << " shared memory per block " << std::endl; + + detail::launch_closure(closure, num_blocks, block_size, smem_bytes); + + // second level reduction + if (num_blocks > 1) + { + typedef detail::blocked_thread_array Context; + typedef unordered_reduce_closure Closure; + + function_attributes_t attributes = detail::closure_attributes(); + + num_blocks = 1; + block_size = thrust::min(output.size(), static_cast(attributes.maxThreadsPerBlock)); + array_size = thrust::min(block_size, (properties.sharedMemPerBlock - attributes.sharedSizeBytes) / sizeof(OutputType)); + smem_bytes = sizeof(OutputType) * array_size; + + // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory" + + Closure closure(output.begin(), output.size(), init, output.begin(), binary_op, array_size); + + //std::cout << "Launching " << num_blocks << " blocks of kernel with " << block_size << " threads and " << smem_bytes << " shared memory per block " << std::endl; + + detail::launch_closure(closure, num_blocks, block_size, smem_bytes); + } + + return output[0]; +} // end reduce + +} // end reduce_detail + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + +template + OutputType reduce(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputType init, + BinaryFunction binary_op) +{ + return reduce_detail::reduce(exec, first, last, init, binary_op); +} // end reduce() + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/reduce_by_key.h b/compat/thrust/system/cuda/detail/reduce_by_key.h new file mode 100644 index 0000000..9b8ec10 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reduce_by_key.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce_by_key.h + * \brief CUDA implementation of reduce_by_key + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + thrust::pair + reduce_by_key(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/reduce_by_key.inl b/compat/thrust/system/cuda/detail/reduce_by_key.inl new file mode 100644 index 0000000..18dc1e4 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reduce_by_key.inl @@ -0,0 +1,705 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace reduce_by_key_detail +{ + +template +struct tail_flag_functor +{ + BinaryPredicate binary_pred; // NB: this must be the first member for performance reasons + IndexType n; + + typedef FlagType result_type; + + tail_flag_functor(IndexType n, BinaryPredicate binary_pred) + : n(n), binary_pred(binary_pred) + {} + + // XXX why is this noticably faster? (it may read past the end of input) + //FlagType operator()(const thrust::tuple& t) const + + template + __host__ __device__ __thrust_forceinline__ + FlagType operator()(const Tuple& t) + { + if (thrust::get<0>(t) == (n - 1) || !binary_pred(thrust::get<1>(t), thrust::get<2>(t))) + return 1; + else + return 0; + } +}; + + +template +__device__ __thrust_forceinline__ +FlagType load_flags(Context context, + const unsigned int n, + FlagIterator iflags, + FlagType (&sflag)[CTA_SIZE]) +{ + FlagType flag_bits = 0; + + // load flags in unordered fashion + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = k*CTA_SIZE + context.thread_index(); + + if (FullBlock || offset < n) + { + FlagIterator temp = iflags + offset; + if (*temp) + flag_bits |= FlagType(1) << k; + } + } + + sflag[context.thread_index()] = flag_bits; + + context.barrier(); + + flag_bits = 0; + + // obtain flags for iflags[K * context.thread_index(), K * context.thread_index() + K) + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = K * context.thread_index() + k; + + if (FullBlock || offset < n) + { + flag_bits |= ((sflag[offset % CTA_SIZE] >> (offset / CTA_SIZE)) & FlagType(1)) << k; + } + } + + context.barrier(); + + sflag[context.thread_index()] = flag_bits; + + context.barrier(); + + return flag_bits; +} + +template +__device__ __thrust_forceinline__ +void load_values(Context context, + const unsigned int n, + InputIterator2 ivals, + ValueType (&sdata)[K][CTA_SIZE + 1]) +{ + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = k*CTA_SIZE + context.thread_index(); + + if (FullBlock || offset < n) + { + InputIterator2 temp = ivals + offset; + sdata[offset % K][offset / K] = *temp; + } + } + + context.barrier(); +} + + +template +__device__ __thrust_forceinline__ +void reduce_by_key_body(Context context, + const unsigned int n, + InputIterator1 ikeys, + InputIterator2 ivals, + OutputIterator1 okeys, + OutputIterator2 ovals, + BinaryPredicate binary_pred, + BinaryFunction binary_op, + FlagIterator iflags, + FlagType (&sflag)[CTA_SIZE], + ValueType (&sdata)[K][CTA_SIZE + 1], + bool& carry_in, + IndexType& carry_index, + ValueType& carry_value) +{ + // load flags + const FlagType flag_bits = load_flags(context, n, iflags, sflag); + const FlagType flag_count = __popc(flag_bits); // TODO hide this behind a template + const FlagType left_flag = (context.thread_index() == 0) ? 0 : sflag[context.thread_index() - 1]; + const FlagType head_flag = (context.thread_index() == 0 || flag_bits & ((1 << (K - 1)) - 1) || left_flag & (1 << (K - 1))) ? 1 : 0; + + context.barrier(); + + // scan flag counts + sflag[context.thread_index()] = flag_count; context.barrier(); + + block::inclusive_scan(context, sflag, thrust::plus()); + + const FlagType output_position = (context.thread_index() == 0) ? 0 : sflag[context.thread_index() - 1]; + const FlagType num_outputs = sflag[CTA_SIZE - 1]; + + context.barrier(); + + // shuffle keys and write keys out + if (!thrust::detail::is_discard_iterator::value) + { + // XXX this could be improved + for (unsigned int i = 0; i < num_outputs; i += CTA_SIZE) + { + FlagType position = output_position; + + for(unsigned int k = 0; k < K; k++) + { + if (flag_bits & (FlagType(1) << k)) + { + if (i <= position && position < i + CTA_SIZE) + sflag[position - i] = K * context.thread_index() + k; + position++; + } + } + + context.barrier(); + + if (i + context.thread_index() < num_outputs) + { + InputIterator1 tmp1 = ikeys + sflag[context.thread_index()]; + OutputIterator1 tmp2 = okeys + (i + context.thread_index()); + *tmp2 = *tmp1; + } + + context.barrier(); + } + } + + // load values + load_values (context, n, ivals, sdata); + + ValueType ldata[K]; + for (unsigned int k = 0; k < K; k++) + ldata[k] = sdata[k][context.thread_index()]; + + // carry in (if necessary) + if (context.thread_index() == 0 && carry_in) + { + // XXX WAR sm_10 issue + ValueType tmp1 = carry_value; + ldata[0] = binary_op(tmp1, ldata[0]); + } + + context.barrier(); + + // sum local values + { + for(unsigned int k = 1; k < K; k++) + { + const unsigned int offset = K * context.thread_index() + k; + + if (FullBlock || offset < n) + { + if (!(flag_bits & (FlagType(1) << (k - 1)))) + ldata[k] = binary_op(ldata[k - 1], ldata[k]); + } + } + } + + // second level segmented scan + { + // use head flags for segmented scan + sflag[context.thread_index()] = head_flag; sdata[K - 1][context.thread_index()] = ldata[K - 1]; context.barrier(); + + if (FullBlock) + block::inclusive_scan_by_flag(context, sflag, sdata[K-1], binary_op); + else + block::inclusive_scan_by_flag_n(context, sflag, sdata[K-1], n, binary_op); + } + + // update local values + if (context.thread_index() > 0) + { + unsigned int update_bits = (flag_bits << 1) | (left_flag >> (K - 1)); +// TODO remove guard +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + unsigned int update_count = __ffs(update_bits) - 1u; // NB: this might wrap around to UINT_MAX +#else + unsigned int update_count = 0; +#endif // THRUST_DEVICE_COMPILER_NVCC + + if (!FullBlock && (K + 1) * context.thread_index() > n) + update_count = thrust::min(n - K * context.thread_index(), update_count); + + ValueType left = sdata[K - 1][context.thread_index() - 1]; + + for(unsigned int k = 0; k < K; k++) + { + if (k < update_count) + ldata[k] = binary_op(left, ldata[k]); + } + } + + context.barrier(); + + // store carry out + if (FullBlock) + { + if (context.thread_index() == CTA_SIZE - 1) + { + carry_value = ldata[K - 1]; + carry_in = (flag_bits & (FlagType(1) << (K - 1))) ? false : true; + carry_index = num_outputs; + } + } + else + { + if (context.thread_index() == (n - 1) / K) + { + for (unsigned int k = 0; k < K; k++) + if (k == (n - 1) % K) + carry_value = ldata[k]; + carry_in = (flag_bits & (FlagType(1) << ((n - 1) % K))) ? false : true; + carry_index = num_outputs; + } + } + + // shuffle values + { + FlagType position = output_position; + + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = K * context.thread_index() + k; + + if (FullBlock || offset < n) + { + if (flag_bits & (FlagType(1) << k)) + { + sdata[position / CTA_SIZE][position % CTA_SIZE] = ldata[k]; + position++; + } + } + } + } + + context.barrier(); + + + // write values out + for(unsigned int k = 0; k < K; k++) + { + const unsigned int offset = CTA_SIZE * k + context.thread_index(); + + if (offset < num_outputs) + { + OutputIterator2 tmp = ovals + offset; + *tmp = sdata[k][context.thread_index()]; + } + } + + context.barrier(); +} + +template +struct reduce_by_key_closure +{ + InputIterator1 ikeys; + InputIterator2 ivals; + OutputIterator1 okeys; + OutputIterator2 ovals; + BinaryPredicate binary_pred; + BinaryFunction binary_op; + FlagIterator iflags; + IndexIterator interval_counts; + ValueIterator interval_values; + BoolIterator interval_carry; + Decomposition decomp; + Context context; + + typedef Context context_type; + + reduce_by_key_closure(InputIterator1 ikeys, + InputIterator2 ivals, + OutputIterator1 okeys, + OutputIterator2 ovals, + BinaryPredicate binary_pred, + BinaryFunction binary_op, + FlagIterator iflags, + IndexIterator interval_counts, + ValueIterator interval_values, + BoolIterator interval_carry, + Decomposition decomp, + Context context = Context()) + : ikeys(ikeys), ivals(ivals), okeys(okeys), ovals(ovals), binary_pred(binary_pred), binary_op(binary_op), + iflags(iflags), interval_counts(interval_counts), interval_values(interval_values), interval_carry(interval_carry), + decomp(decomp), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename thrust::iterator_value::type KeyType; + typedef typename thrust::iterator_value::type ValueType; + typedef typename Decomposition::index_type IndexType; + typedef typename thrust::iterator_value::type FlagType; + + const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; + +// TODO centralize this mapping (__CUDA_ARCH__ -> smem bytes) +#if __CUDA_ARCH__ >= 200 + const unsigned int SMEM = (48 * 1024); +#else + const unsigned int SMEM = (16 * 1024) - 256; +#endif + const unsigned int SMEM_FIXED = CTA_SIZE * sizeof(FlagType) + sizeof(ValueType) + sizeof(IndexType) + sizeof(bool); + const unsigned int BOUND_1 = (SMEM - SMEM_FIXED) / ((CTA_SIZE + 1) * sizeof(ValueType)); + const unsigned int BOUND_2 = 8 * sizeof(FlagType); + const unsigned int BOUND_3 = 6; + + // TODO replace this with a static_min::value + const unsigned int K = (BOUND_1 < BOUND_2) ? (BOUND_1 < BOUND_3 ? BOUND_1 : BOUND_3) : (BOUND_2 < BOUND_3 ? BOUND_2 : BOUND_3); + + __shared__ detail::uninitialized sflag; + __shared__ detail::uninitialized sdata; // padded to avoid bank conflicts + + __shared__ detail::uninitialized carry_value; // storage for carry in and carry out + __shared__ detail::uninitialized carry_index; + __shared__ detail::uninitialized carry_in; + + typename Decomposition::range_type interval = decomp[context.block_index()]; + //thrust::system::detail::internal::index_range interval = decomp[context.block_index()]; + + + if (context.thread_index() == 0) + { + carry_in = false; // act as though the previous segment terminated just before us + + if (context.block_index() == 0) + { + carry_index = 0; + } + else + { + interval_counts += (context.block_index() - 1); + carry_index = *interval_counts; + } + } + + context.barrier(); + + IndexType base = interval.begin(); + + // advance input and output iterators + ikeys += base; + ivals += base; + iflags += base; + okeys += carry_index; + ovals += carry_index; + + const unsigned int unit_size = K * CTA_SIZE; + + // process full units + while (base + unit_size <= interval.end()) + { + const unsigned int n = unit_size; + reduce_by_key_body(context, n, ikeys, ivals, okeys, ovals, binary_pred, binary_op, iflags, sflag.get(), sdata.get(), carry_in.get(), carry_index.get(), carry_value.get()); + base += unit_size; + ikeys += unit_size; + ivals += unit_size; + iflags += unit_size; + okeys += carry_index; + ovals += carry_index; + } + + // process partially full unit at end of input (if necessary) + if (base < interval.end()) + { + const unsigned int n = interval.end() - base; + reduce_by_key_body(context, n, ikeys, ivals, okeys, ovals, binary_pred, binary_op, iflags, sflag.get(), sdata.get(), carry_in.get(), carry_index.get(), carry_value.get()); + } + + if (context.thread_index() == 0) + { + interval_values += context.block_index(); + interval_carry += context.block_index(); + *interval_values = carry_value; + *interval_carry = carry_in; + } + } +}; // end reduce_by_key_closure + +template +struct DefaultPolicy +{ + // typedefs + typedef unsigned int FlagType; + typedef typename thrust::iterator_traits::difference_type IndexType; + typedef typename thrust::iterator_traits::value_type KeyType; + typedef thrust::system::detail::internal::uniform_decomposition Decomposition; + + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator2 is a "pure" output iterator + // TemporaryType = InputIterator2::value_type + // else + // TemporaryType = OutputIterator2::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + typedef typename thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + // XXX WAR problem on sm_11 + // TODO tune this + const static unsigned int ThreadsPerBlock = (thrust::detail::is_pod::value) ? 256 : 192; + + DefaultPolicy(InputIterator1 first1, InputIterator1 last1) + : decomp(default_decomposition(last1 - first1)) + {} + + // member variables + Decomposition decomp; +}; + +template + thrust::pair + reduce_by_key(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op, + Policy policy) +{ + typedef typename Policy::FlagType FlagType; + typedef typename Policy::Decomposition Decomposition; + typedef typename Policy::IndexType IndexType; + typedef typename Policy::KeyType KeyType; + typedef typename Policy::ValueType ValueType; + + // temporary arrays + typedef thrust::detail::temporary_array IndexArray; + typedef thrust::detail::temporary_array KeyArray; + typedef thrust::detail::temporary_array ValueArray; + typedef thrust::detail::temporary_array BoolArray; + + Decomposition decomp = policy.decomp; + + // input size + IndexType n = keys_last - keys_first; + + if (n == 0) + return thrust::make_pair(keys_output, values_output); + + IndexArray interval_counts(exec, decomp.size()); + ValueArray interval_values(exec, decomp.size()); + BoolArray interval_carry(exec, decomp.size()); + + // an ode to c++11 auto + typedef thrust::counting_iterator CountingIterator; + typedef thrust::transform_iterator< + tail_flag_functor, + thrust::zip_iterator< + thrust::tuple + > + > FlagIterator; + + FlagIterator iflag= thrust::make_transform_iterator + (thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), keys_first, keys_first + 1)), + tail_flag_functor(n, binary_pred)); + + // count number of tail flags per interval + thrust::system::cuda::detail::reduce_intervals(exec, iflag, interval_counts.begin(), thrust::plus(), decomp); + + thrust::inclusive_scan(exec, + interval_counts.begin(), interval_counts.end(), + interval_counts.begin(), + thrust::plus()); + + // determine output size + const IndexType N = interval_counts[interval_counts.size() - 1]; + + const static unsigned int ThreadsPerBlock = Policy::ThreadsPerBlock; + typedef typename IndexArray::iterator IndexIterator; + typedef typename ValueArray::iterator ValueIterator; + typedef typename BoolArray::iterator BoolIterator; + typedef detail::statically_blocked_thread_array Context; + typedef reduce_by_key_closure Closure; + Closure closure + (keys_first, values_first, + keys_output, values_output, + binary_pred, binary_op, + iflag, + interval_counts.begin(), + interval_values.begin(), + interval_carry.begin(), + decomp); + detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); + + if (decomp.size() > 1) + { + ValueArray interval_values2(exec, decomp.size()); + IndexArray interval_counts2(exec, decomp.size()); + BoolArray interval_carry2(exec, decomp.size()); + + IndexType N2 = + thrust::reduce_by_key + (exec, + thrust::make_zip_iterator(thrust::make_tuple(interval_counts.begin(), interval_carry.begin())), + thrust::make_zip_iterator(thrust::make_tuple(interval_counts.end(), interval_carry.end())), + interval_values.begin(), + thrust::make_zip_iterator(thrust::make_tuple(interval_counts2.begin(), interval_carry2.begin())), + interval_values2.begin(), + thrust::equal_to< thrust::tuple >(), + binary_op).first + - + thrust::make_zip_iterator(thrust::make_tuple(interval_counts2.begin(), interval_carry2.begin())); + + thrust::transform_if + (exec, + interval_values2.begin(), interval_values2.begin() + N2, + thrust::make_permutation_iterator(values_output, interval_counts2.begin()), + interval_carry2.begin(), + thrust::make_permutation_iterator(values_output, interval_counts2.begin()), + binary_op, + thrust::identity()); + } + + return thrust::make_pair(keys_output + N, values_output + N); +} + +} // end namespace reduce_by_key_detail + + +template + thrust::pair + reduce_by_key(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + return reduce_by_key_detail::reduce_by_key + (exec, + keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op, + reduce_by_key_detail::DefaultPolicy(keys_first, keys_last)); +} // end reduce_by_key() + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + diff --git a/compat/thrust/system/cuda/detail/reduce_intervals.h b/compat/thrust/system/cuda/detail/reduce_intervals.h new file mode 100644 index 0000000..505d136 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reduce_intervals.h @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce_intervals.h + * \brief CUDA implementations of reduce_intervals algorithms. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template +void reduce_intervals(execution_policy &exec, + InputIterator input, + OutputIterator output, + BinaryFunction binary_op, + Decomposition decomp); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/reduce_intervals.inl b/compat/thrust/system/cuda/detail/reduce_intervals.inl new file mode 100644 index 0000000..2381769 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reduce_intervals.inl @@ -0,0 +1,203 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template +struct commutative_reduce_intervals_closure +{ + InputIterator input; + OutputIterator output; + BinaryFunction binary_op; + Decomposition decomposition; + unsigned int shared_array_size; + + typedef Context context_type; + context_type context; + + commutative_reduce_intervals_closure(InputIterator input, OutputIterator output, BinaryFunction binary_op, Decomposition decomposition, unsigned int shared_array_size, Context context = Context()) + : input(input), output(output), binary_op(binary_op), decomposition(decomposition), shared_array_size(shared_array_size), context(context) {} + + __device__ __thrust_forceinline__ + void operator()(void) + { + typedef typename thrust::iterator_value::type OutputType; + extern_shared_ptr shared_array; + + typedef typename Decomposition::index_type index_type; + + // this block processes results in [range.begin(), range.end()) + thrust::system::detail::internal::index_range range = decomposition[context.block_index()]; + + index_type i = range.begin() + context.thread_index(); + + input += i; + + if (range.size() < context.block_dimension()) + { + // compute reduction with the first shared_array_size threads + if (context.thread_index() < thrust::min(shared_array_size,range.size())) + { + OutputType sum = *input; + + i += shared_array_size; + input += shared_array_size; + + while (i < range.end()) + { + OutputType val = *input; + + sum = binary_op(sum, val); + + i += shared_array_size; + input += shared_array_size; + } + + shared_array[context.thread_index()] = sum; + } + } + else + { + // compute reduction with all blockDim.x threads + OutputType sum = *input; + + i += context.block_dimension(); + input += context.block_dimension(); + + while (i < range.end()) + { + OutputType val = *input; + + sum = binary_op(sum, val); + + i += context.block_dimension(); + input += context.block_dimension(); + } + + // write first shared_array_size values into shared memory + if (context.thread_index() < shared_array_size) + shared_array[context.thread_index()] = sum; + + // accumulate remaining values (if any) to shared memory in stages + if (context.block_dimension() > shared_array_size) + { + unsigned int lb = shared_array_size; + unsigned int ub = shared_array_size + lb; + + while (lb < context.block_dimension()) + { + context.barrier(); + + if (lb <= context.thread_index() && context.thread_index() < ub) + { + OutputType tmp = shared_array[context.thread_index() - lb]; + shared_array[context.thread_index() - lb] = binary_op(tmp, sum); + } + + lb += shared_array_size; + ub += shared_array_size; + } + } + } + + context.barrier(); + + block::reduce_n(context, shared_array, thrust::min(range.size(), shared_array_size), binary_op); + + if (context.thread_index() == 0) + { + output += context.block_index(); + *output = shared_array[0]; + } + } +}; + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN + +template +void reduce_intervals(execution_policy &, + InputIterator input, + OutputIterator output, + BinaryFunction binary_op, + Decomposition decomp) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + if (decomp.size() == 0) + return; + + // TODO if (decomp.size() > deviceProperties.maxGridSize[0]) throw cuda exception (or handle general case) + + typedef detail::blocked_thread_array Context; + typedef commutative_reduce_intervals_closure Closure; + typedef typename thrust::iterator_value::type OutputType; + + detail::launch_calculator calculator; + + thrust::tuple config = calculator.with_variable_block_size_available_smem(); + + //size_t max_blocks = thrust::get<0>(config); + size_t block_size = thrust::get<1>(config); + size_t max_memory = thrust::get<2>(config); + + // determine shared array size + size_t shared_array_size = thrust::min(max_memory / sizeof(OutputType), block_size); + size_t shared_array_bytes = sizeof(OutputType) * shared_array_size; + + // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory" + + Closure closure(input, output, binary_op, decomp, shared_array_size); + detail::launch_closure(closure, decomp.size(), block_size, shared_array_bytes); +} + +__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/remove.h b/compat/thrust/system/cuda/detail/remove.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/remove.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/replace.h b/compat/thrust/system/cuda/detail/replace.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/replace.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/reverse.h b/compat/thrust/system/cuda/detail/reverse.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/reverse.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/runtime_introspection.h b/compat/thrust/system/cuda/detail/runtime_introspection.h new file mode 100644 index 0000000..39f6c9f --- /dev/null +++ b/compat/thrust/system/cuda/detail/runtime_introspection.h @@ -0,0 +1,78 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file runtime_introspection.h + * \brief Defines the interface to functions + * providing introspection into the architecture + * of CUDA devices. + */ + +#pragma once + +#include + +// #include this for device_properties_t and function_attributes_t +#include + +// #include this for size_t +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +/*! Returns the current device ordinal. + */ +inline int current_device(); + +/*! Returns a copy of the device_properties_t structure + * that is associated with a given device. + */ +inline device_properties_t device_properties(int device_id); + +/*! Returns a copy of the device_properties_t structure + * that is associated with the current device. + */ +inline device_properties_t device_properties(void); + +/*! Returns a copy of the function_attributes_t structure + * that is associated with a given __global__ function + */ +template +inline function_attributes_t function_attributes(KernelFunction kernel); + +/*! Returns the compute capability of a device in integer format. + * For example, returns 10 for sm_10 and 21 for sm_21 + * \return The compute capability as an integer + */ +inline size_t compute_capability(const device_properties_t &properties); +inline size_t compute_capability(void); + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/runtime_introspection.inl b/compat/thrust/system/cuda/detail/runtime_introspection.inl new file mode 100644 index 0000000..a5cc382 --- /dev/null +++ b/compat/thrust/system/cuda/detail/runtime_introspection.inl @@ -0,0 +1,169 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace runtime_introspection_detail +{ + + +inline void get_device_properties(device_properties_t &p, int device_id) +{ + cudaDeviceProp properties; + + cudaError_t error = cudaGetDeviceProperties(&properties, device_id); + + if(error) + throw thrust::system_error(error, thrust::cuda_category()); + + // be careful about how this is initialized! + device_properties_t temp = { + properties.major, + { + properties.maxGridSize[0], + properties.maxGridSize[1], + properties.maxGridSize[2] + }, + properties.maxThreadsPerBlock, + properties.maxThreadsPerMultiProcessor, + properties.minor, + properties.multiProcessorCount, + properties.regsPerBlock, + properties.sharedMemPerBlock, + properties.warpSize + }; + + p = temp; +} // end get_device_properties() + + +} // end runtime_introspection_detail + + +inline device_properties_t device_properties(int device_id) +{ + // cache the result of get_device_properties, because it is slow + // only cache the first few devices + static const int max_num_devices = 16; + + static bool properties_exist[max_num_devices] = {0}; + static device_properties_t device_properties[max_num_devices] = {}; + + if(device_id >= max_num_devices) + { + device_properties_t result; + runtime_introspection_detail::get_device_properties(result, device_id); + return result; + } + + if(!properties_exist[device_id]) + { + runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id); + + // disallow the compiler to move the write to properties_exist[device_id] + // before the initialization of device_properties[device_id] + __thrust_compiler_fence(); + + properties_exist[device_id] = true; + } + + return device_properties[device_id]; +} + +inline int current_device() +{ + int result = -1; + + cudaError_t error = cudaGetDevice(&result); + + if(error) + throw thrust::system_error(error, thrust::cuda_category()); + + if(result < 0) + throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category()); + + return result; +} + +inline device_properties_t device_properties(void) +{ + return device_properties(current_device()); +} + +template +inline function_attributes_t function_attributes(KernelFunction kernel) +{ +// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined +#ifdef __CUDACC__ + typedef void (*fun_ptr_type)(); + + fun_ptr_type fun_ptr = reinterpret_cast(kernel); + + cudaFuncAttributes attributes; + + cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr); + + if(error) + { + throw thrust::system_error(error, thrust::cuda_category()); + } + + // be careful about how this is initialized! + function_attributes_t result = { + attributes.constSizeBytes, + attributes.localSizeBytes, + attributes.maxThreadsPerBlock, + attributes.numRegs, + attributes.sharedSizeBytes + }; + + return result; +#else + return function_attributes_t(); +#endif // __CUDACC__ +} + +inline size_t compute_capability(const device_properties_t &properties) +{ + return 10 * properties.major + properties.minor; +} + +inline size_t compute_capability(void) +{ + return compute_capability(device_properties()); +} + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/scan.h b/compat/thrust/system/cuda/detail/scan.h new file mode 100644 index 0000000..036c89a --- /dev/null +++ b/compat/thrust/system/cuda/detail/scan.h @@ -0,0 +1,64 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.h + * \brief Scan operations (parallel prefix-sum) [cuda] + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + OutputIterator inclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + AssociativeOperator binary_op); + +template + OutputIterator exclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + AssociativeOperator binary_op); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/scan.inl b/compat/thrust/system/cuda/detail/scan.inl new file mode 100644 index 0000000..9d9c6d2 --- /dev/null +++ b/compat/thrust/system/cuda/detail/scan.inl @@ -0,0 +1,82 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.inl + * \brief Inline file for scan.h. + */ + +#include +#include + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + OutputIterator inclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + AssociativeOperator binary_op) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + return thrust::system::cuda::detail::detail::fast_scan::inclusive_scan(exec, first, last, result, binary_op); +} + +template + OutputIterator exclusive_scan(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + AssociativeOperator binary_op) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + return thrust::system::cuda::detail::detail::fast_scan::exclusive_scan(exec, first, last, result, init, binary_op); +} + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/scan_by_key.h b/compat/thrust/system/cuda/detail/scan_by_key.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/scan_by_key.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/scatter.h b/compat/thrust/system/cuda/detail/scatter.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/scatter.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/sequence.h b/compat/thrust/system/cuda/detail/sequence.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/sequence.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/set_difference.inl b/compat/thrust/system/cuda/detail/set_difference.inl new file mode 100644 index 0000000..33d9884 --- /dev/null +++ b/compat/thrust/system/cuda/detail/set_difference.inl @@ -0,0 +1,138 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace set_difference_detail +{ + + +struct serial_bounded_set_difference +{ + // max_input_size <= 32 + template + inline __device__ + thrust::detail::uint32_t operator()(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp) + { + thrust::detail::uint32_t active_mask = 0; + thrust::detail::uint32_t active_bit = 1; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + *result = *first1; + active_mask |= active_bit; + ++first1; + } // end if + else if(comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + ++first1; + ++first2; + } // end else + + ++result; + active_bit <<= 1; + } // end while + + while(first1 != last1) + { + *result = *first1; + ++first1; + ++result; + active_mask |= active_bit; + active_bit <<= 1; + } + + return active_mask; + } + + + template + inline __device__ + Size count(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + Compare comp) + { + Size result = 0; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + ++first1; + ++result; + } // end if + else if(comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + ++first1; + ++first2; + } // end else + } // end while + + return result + last1 - first1; + } +}; // end serial_bounded_set_difference + + +} // end namespace set_difference_detail + + +template +RandomAccessIterator3 set_difference(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp) +{ + return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_difference_detail::serial_bounded_set_difference()); +} // end set_difference + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/set_intersection.inl b/compat/thrust/system/cuda/detail/set_intersection.inl new file mode 100644 index 0000000..e4810b6 --- /dev/null +++ b/compat/thrust/system/cuda/detail/set_intersection.inl @@ -0,0 +1,129 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace set_intersection_detail +{ + + +struct serial_bounded_set_intersection +{ + // max_input_size <= 32 + template + inline __device__ + thrust::detail::uint32_t operator()(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp) + { + thrust::detail::uint32_t active_mask = 0; + thrust::detail::uint32_t active_bit = 1; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + ++first1; + } // end if + else if(comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + *result = *first1; + ++first1; + ++first2; + active_mask |= active_bit; + } // end else + + ++result; + active_bit <<= 1; + } // end while + + return active_mask; + } + + + template + inline __device__ + Size count(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + Compare comp) + { + Size result = 0; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + ++first1; + } // end if + else if(comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + ++result; + ++first1; + ++first2; + } // end else + } // end while + + return result; + } +}; // end serial_bounded_set_intersection + + +} // end namespace set_intersection_detail + + +template +RandomAccessIterator3 set_intersection(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp) +{ + return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_intersection_detail::serial_bounded_set_intersection()); +} // end set_intersection + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/set_operations.h b/compat/thrust/system/cuda/detail/set_operations.h new file mode 100644 index 0000000..040e341 --- /dev/null +++ b/compat/thrust/system/cuda/detail/set_operations.h @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template +RandomAccessIterator3 set_difference(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp); + + +template +RandomAccessIterator3 set_intersection(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp); + + +template +RandomAccessIterator3 set_symmetric_difference(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp); + + +template +RandomAccessIterator3 set_union(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp); + + +} // end detail +} // end cuda +} // end system +} // end thrust + +#include +#include +#include +#include + diff --git a/compat/thrust/system/cuda/detail/set_symmetric_difference.inl b/compat/thrust/system/cuda/detail/set_symmetric_difference.inl new file mode 100644 index 0000000..112c955 --- /dev/null +++ b/compat/thrust/system/cuda/detail/set_symmetric_difference.inl @@ -0,0 +1,150 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace set_symmetric_difference_detail +{ + + +struct serial_bounded_set_symmetric_difference +{ + // max_input_size <= 32 + template + inline __device__ + thrust::detail::uint32_t operator()(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp) + { + thrust::detail::uint32_t active_mask = 0; + thrust::detail::uint32_t active_bit = 1; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + *result = *first1; + active_mask |= active_bit; + ++first1; + } // end if + else if(comp(*first2,*first1)) + { + *result = *first2; + active_mask |= active_bit; + ++first2; + } // end else if + else + { + ++first1; + ++first2; + } // end else + + ++result; + active_bit <<= 1; + } // end while + + while(first1 != last1) + { + *result = *first1; + ++first1; + ++result; + active_mask |= active_bit; + active_bit <<= 1; + } + + while(first2 != last2) + { + *result = *first2; + ++first2; + ++result; + active_mask |= active_bit; + active_bit <<= 1; + } + + return active_mask; + } + + + template + inline __device__ + Size count(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + Compare comp) + { + Size result = 0; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + ++first1; + ++result; + } // end if + else if(comp(*first2,*first1)) + { + ++first2; + ++result; + } // end else if + else + { + ++first1; + ++first2; + } // end else + } // end while + + return result + thrust::max(last1 - first1,last2 - first2); + } +}; // end serial_bounded_set_symmetric_difference + + +} // end namespace set_symmetric_difference_detail + + +template +RandomAccessIterator3 set_symmetric_difference(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp) +{ + return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_symmetric_difference_detail::serial_bounded_set_symmetric_difference()); +} // end set_symmetric_difference + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/set_union.inl b/compat/thrust/system/cuda/detail/set_union.inl new file mode 100644 index 0000000..66cccab --- /dev/null +++ b/compat/thrust/system/cuda/detail/set_union.inl @@ -0,0 +1,150 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ +namespace set_union_detail +{ + + +struct serial_bounded_set_union +{ + // max_input_size <= 32 + template + inline __device__ + thrust::detail::uint32_t operator()(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + Compare comp) + { + thrust::detail::uint32_t active_mask = 0; + thrust::detail::uint32_t active_bit = 1; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + *result = *first1; + ++first1; + } // end if + else if(comp(*first2,*first1)) + { + *result = *first2; + ++first2; + } // end else if + else + { + *result = *first1; + ++first1; + ++first2; + } // end else + + ++result; + active_mask |= active_bit; + active_bit <<= 1; + } // end while + + while(first1 != last1) + { + *result = *first1; + ++first1; + ++result; + active_mask |= active_bit; + active_bit <<= 1; + } + + while(first2 != last2) + { + *result = *first2; + ++first2; + ++result; + active_mask |= active_bit; + active_bit <<= 1; + } + + return active_mask; + } + + + template + inline __device__ + Size count(Size max_input_size, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + Compare comp) + { + Size result = 0; + + while(first1 != last1 && first2 != last2) + { + if(comp(*first1,*first2)) + { + ++first1; + } // end if + else if(comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + ++first1; + ++first2; + } // end else + + ++result; + } // end while + + return result + thrust::max(last1 - first1,last2 - first2); + } +}; // end serial_bounded_set_union + + +} // end namespace set_union_detail + + +template +RandomAccessIterator3 set_union(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp) +{ + return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_union_detail::serial_bounded_set_union()); +} // end set_union + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/sort.h b/compat/thrust/system/cuda/detail/sort.h new file mode 100644 index 0000000..e78d36a --- /dev/null +++ b/compat/thrust/system/cuda/detail/sort.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + void stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + +template + void stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/sort.inl b/compat/thrust/system/cuda/detail/sort.inl new file mode 100644 index 0000000..d7e0a60 --- /dev/null +++ b/compat/thrust/system/cuda/detail/sort.inl @@ -0,0 +1,287 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file sort.inl + * \brief Inline file for sort.h + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + + +/* + * This file implements the following dispatch procedure for cuda::stable_sort() + * and cuda::stable_sort_by_key(). The first level inspects the KeyType + * and StrictWeakOrdering to determine whether a sort assuming primitive-typed + * data may be applied. + * + * If a sort assuming primitive-typed data can be applied (i.e., a radix sort), + * the input ranges are first trivialized (turned into simple contiguous ranges + * if they are not already). To implement descending orderings, an ascending + * sort will be reversed. + * + * If a sort assuming primitive-typed data cannot be applied, a comparison-based + * sort is used. Depending on the size of the key and value types, one level of + * indirection may be applied to their input ranges. This transformation + * may be applied to either range to convert an ill-suited problem (i.e. sorting with + * large keys or large value) into a problem more amenable to the underlying + * merge sort algorithm. + */ + + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +namespace stable_sort_detail +{ + + +template + struct can_use_primitive_sort + : thrust::detail::and_< + thrust::detail::is_arithmetic, + thrust::detail::or_< + thrust::detail::is_same >, + thrust::detail::is_same > + > + > +{}; + + +template + struct enable_if_primitive_sort + : thrust::detail::enable_if< + can_use_primitive_sort< + typename iterator_value::type, + StrictWeakCompare + >::value + > +{}; + + +template + struct enable_if_comparison_sort + : thrust::detail::disable_if< + can_use_primitive_sort< + typename iterator_value::type, + StrictWeakCompare + >::value + > +{}; + + +template + typename enable_if_primitive_sort::type + stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // ensure sequence has trivial iterators + thrust::detail::trivial_sequence keys(exec, first, last); + + // CUDA path for thrust::stable_sort with primitive keys + // (e.g. int, float, short, etc.) and a less or greater comparison + // method is implemented with a primitive sort + thrust::system::cuda::detail::detail::stable_primitive_sort(exec, keys.begin(), keys.end()); + + // copy results back, if necessary + if(!thrust::detail::is_trivial_iterator::value) + { + thrust::copy(exec, keys.begin(), keys.end(), first); + } + + // if comp is greater then reverse the keys + typedef typename thrust::iterator_traits::value_type KeyType; + const static bool reverse = thrust::detail::is_same >::value; + + if(reverse) + { + thrust::reverse(first, last); + } +} + +template + typename enable_if_comparison_sort::type + stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // decide whether to sort keys indirectly + typedef typename thrust::iterator_value::type KeyType; + typedef thrust::detail::integral_constant 8)> use_key_indirection; + + conditional_temporary_indirect_ordering potentially_indirect_keys(derived_cast(exec), first, last, comp); + + thrust::system::cuda::detail::detail::stable_merge_sort(exec, + potentially_indirect_keys.begin(), + potentially_indirect_keys.end(), + potentially_indirect_keys.comp()); +} + +template + typename enable_if_primitive_sort::type + stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + // path for thrust::stable_sort_by_key with primitive keys + // (e.g. int, float, short, etc.) and a less or greater comparison + // method is implemented with stable_primitive_sort_by_key + + // if comp is greater then reverse the keys and values + typedef typename thrust::iterator_traits::value_type KeyType; + const static bool reverse = thrust::detail::is_same >::value; + + // note, we also have to reverse the (unordered) input to preserve stability + if (reverse) + { + thrust::reverse(exec, keys_first, keys_last); + thrust::reverse(exec, values_first, values_first + (keys_last - keys_first)); + } + + // ensure sequences have trivial iterators + thrust::detail::trivial_sequence keys(exec, keys_first, keys_last); + thrust::detail::trivial_sequence values(exec, values_first, values_first + (keys_last - keys_first)); + + thrust::system::cuda::detail::detail::stable_primitive_sort_by_key(exec, keys.begin(), keys.end(), values.begin()); + + // copy results back, if necessary + if(!thrust::detail::is_trivial_iterator::value) + thrust::copy(exec, keys.begin(), keys.end(), keys_first); + if(!thrust::detail::is_trivial_iterator::value) + thrust::copy(exec, values.begin(), values.end(), values_first); + + if (reverse) + { + thrust::reverse(exec, keys_first, keys_last); + thrust::reverse(exec, values_first, values_first + (keys_last - keys_first)); + } +} + + +template + typename enable_if_comparison_sort::type + stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + // decide whether to apply indirection to either range + typedef typename thrust::iterator_value::type KeyType; + typedef typename thrust::iterator_value::type ValueType; + + typedef thrust::detail::integral_constant 8)> use_key_indirection; + typedef thrust::detail::integral_constant 4)> use_value_indirection; + + conditional_temporary_indirect_ordering< + use_key_indirection, + DerivedPolicy, + RandomAccessIterator1, + StrictWeakOrdering + > potentially_indirect_keys(derived_cast(exec), keys_first, keys_last, comp); + + conditional_temporary_indirect_permutation< + use_value_indirection, + DerivedPolicy, + RandomAccessIterator2 + > potentially_indirect_values(derived_cast(exec), values_first, values_first + (keys_last - keys_first)); + + thrust::system::cuda::detail::detail::stable_merge_sort_by_key(exec, + potentially_indirect_keys.begin(), + potentially_indirect_keys.end(), + potentially_indirect_values.begin(), + potentially_indirect_keys.comp()); +} + + +} // end namespace stable_sort_detail + + +template + void stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + stable_sort_detail::stable_sort(exec, first, last, comp); +} + + +template + void stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + // we're attempting to launch a kernel, assert we're compiling with nvcc + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to compile your code using nvcc, rather than g++ or cl.exe X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + stable_sort_detail::stable_sort_by_key(exec, keys_first, keys_last, values_first, comp); +} + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/swap_ranges.h b/compat/thrust/system/cuda/detail/swap_ranges.h new file mode 100644 index 0000000..9b1949e --- /dev/null +++ b/compat/thrust/system/cuda/detail/swap_ranges.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// cuda has no special swap_ranges + diff --git a/compat/thrust/system/cuda/detail/synchronize.h b/compat/thrust/system/cuda/detail/synchronize.h new file mode 100644 index 0000000..762f4a3 --- /dev/null +++ b/compat/thrust/system/cuda/detail/synchronize.h @@ -0,0 +1,41 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +inline void synchronize(const char *message = ""); + +inline void synchronize_if_enabled(const char *message = ""); + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/synchronize.inl b/compat/thrust/system/cuda/detail/synchronize.inl new file mode 100644 index 0000000..5f70f79 --- /dev/null +++ b/compat/thrust/system/cuda/detail/synchronize.inl @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +void synchronize(const char *message) +{ + cudaError_t error = cudaThreadSynchronize(); + if(error) + { + throw thrust::system_error(error, thrust::cuda_category(), std::string("synchronize: ") + message); + } // end if +} // end synchronize() + +void synchronize_if_enabled(const char *message) +{ +// XXX this could potentially be a runtime decision +#if __THRUST_SYNCHRONOUS + synchronize(message); +#else + // WAR "unused parameter" warning + (void) message; +#endif +} // end synchronize_if_enabled() + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/tabulate.h b/compat/thrust/system/cuda/detail/tabulate.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/tabulate.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/temporary_buffer.h b/compat/thrust/system/cuda/detail/temporary_buffer.h new file mode 100644 index 0000000..628bd75 --- /dev/null +++ b/compat/thrust/system/cuda/detail/temporary_buffer.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special temporary buffer functions + diff --git a/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h b/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h new file mode 100644 index 0000000..3d05f44 --- /dev/null +++ b/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h @@ -0,0 +1,217 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + + +template + struct temporary_indirect_permutation +{ + private: + typedef unsigned int size_type; + typedef thrust::detail::temporary_array array_type; + + public: + temporary_indirect_permutation(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last) + : m_exec(exec), + m_src_first(first), + m_src_last(last), + m_permutation(0, m_exec, last - first) + { + // generate sorted index sequence + thrust::sequence(exec, m_permutation.begin(), m_permutation.end()); + } + + ~temporary_indirect_permutation() + { + // permute the source array using the indices + typedef typename thrust::iterator_value::type value_type; + thrust::detail::temporary_array temp(m_exec, m_src_first, m_src_last); + thrust::gather(m_exec, m_permutation.begin(), m_permutation.end(), temp.begin(), m_src_first); + } + + typedef typename array_type::iterator iterator; + + iterator begin() + { + return m_permutation.begin(); + } + + iterator end() + { + return m_permutation.end(); + } + + private: + DerivedPolicy &m_exec; + RandomAccessIterator m_src_first, m_src_last; + thrust::detail::temporary_array m_permutation; +}; + + +template + struct iterator_range_with_execution_policy +{ + iterator_range_with_execution_policy(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last) + : m_exec(exec), m_first(first), m_last(last) + {} + + typedef RandomAccessIterator iterator; + + iterator begin() + { + return m_first; + } + + iterator end() + { + return m_last; + } + + DerivedPolicy &exec() + { + return m_exec; + } + + DerivedPolicy &m_exec; + RandomAccessIterator m_first, m_last; +}; + + +template + struct conditional_temporary_indirect_permutation + : thrust::detail::eval_if< + Condition::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + >::type +{ + typedef typename thrust::detail::eval_if< + Condition::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + >::type super_t; + + conditional_temporary_indirect_permutation(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last) + : super_t(exec, first, last) + {} +}; + + +template + struct temporary_indirect_ordering + : temporary_indirect_permutation +{ + private: + typedef temporary_indirect_permutation super_t; + + public: + temporary_indirect_ordering(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) + : super_t(exec, first, last), + m_comp(first, comp) + {} + + struct compare + { + RandomAccessIterator first; + + thrust::detail::host_device_function< + Compare, + bool + > comp; + + compare(RandomAccessIterator first, Compare comp) + : first(first), comp(comp) + {} + + template + __host__ __device__ + bool operator()(Integral a, Integral b) + { + return comp(first[a], first[b]); + } + }; + + compare comp() const + { + return m_comp; + } + + private: + compare m_comp; +}; + + +template + struct iterator_range_with_execution_policy_and_compare + : iterator_range_with_execution_policy +{ + typedef iterator_range_with_execution_policy super_t; + + iterator_range_with_execution_policy_and_compare(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) + : super_t(exec, first, last), m_comp(comp) + {} + + typedef Compare compare; + + compare comp() + { + return m_comp; + } + + Compare m_comp; +}; + + +template + struct conditional_temporary_indirect_ordering + : thrust::detail::eval_if< + Condition::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + >::type +{ + typedef typename thrust::detail::eval_if< + Condition::value, + thrust::detail::identity_ >, + thrust::detail::identity_ > + >::type super_t; + + conditional_temporary_indirect_ordering(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) + : super_t(exec, first, last, comp) + {} +}; + + +} // end detail +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/detail/transform.h b/compat/thrust/system/cuda/detail/transform.h new file mode 100644 index 0000000..0af8705 --- /dev/null +++ b/compat/thrust/system/cuda/detail/transform.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// cuda has no special transform + diff --git a/compat/thrust/system/cuda/detail/transform_reduce.h b/compat/thrust/system/cuda/detail/transform_reduce.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/transform_reduce.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/transform_scan.h b/compat/thrust/system/cuda/detail/transform_scan.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/transform_scan.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/trivial_copy.h b/compat/thrust/system/cuda/detail/trivial_copy.h new file mode 100644 index 0000000..e0e898a --- /dev/null +++ b/compat/thrust/system/cuda/detail/trivial_copy.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +template + void trivial_copy_n(execution_policy &exec, + RandomAccessIterator1 first, + Size n, + RandomAccessIterator2 result); + +template + void trivial_copy_n(cross_system &exec, + RandomAccessIterator1 first, + Size n, + RandomAccessIterator2 result); + +} // end detail +} // end cuda +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/detail/trivial_copy.inl b/compat/thrust/system/cuda/detail/trivial_copy.inl new file mode 100644 index 0000000..d23a4ef --- /dev/null +++ b/compat/thrust/system/cuda/detail/trivial_copy.inl @@ -0,0 +1,114 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +namespace trivial_copy_detail +{ + +inline void checked_cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) +{ + cudaError_t error = cudaMemcpy(dst,src,count,kind); + if(error) + { + throw thrust::system_error(error, thrust::cuda_category()); + } // end error +} // end checked_cudaMemcpy() + + +template + cudaMemcpyKind cuda_memcpy_kind(const thrust::cuda::execution_policy &, + const thrust::cpp::execution_policy &) +{ + return cudaMemcpyDeviceToHost; +} // end cuda_memcpy_kind() + + +template + cudaMemcpyKind cuda_memcpy_kind(const thrust::cpp::execution_policy &, + const thrust::cuda::execution_policy &) +{ + return cudaMemcpyHostToDevice; +} // end cuda_memcpy_kind() + + +} // end namespace trivial_copy_detail + + +template + void trivial_copy_n(execution_policy &exec, + RandomAccessIterator1 first, + Size n, + RandomAccessIterator2 result) +{ + typedef typename thrust::iterator_value::type T; + + void *dst = thrust::raw_pointer_cast(&*result); + const void *src = thrust::raw_pointer_cast(&*first); + + trivial_copy_detail::checked_cudaMemcpy(dst, src, n * sizeof(T), cudaMemcpyDeviceToDevice); +} + + +template + void trivial_copy_n(cross_system &systems, + RandomAccessIterator1 first, + Size n, + RandomAccessIterator2 result) +{ + typedef typename thrust::iterator_value::type T; + + void *dst = thrust::raw_pointer_cast(&*result); + const void *src = thrust::raw_pointer_cast(&*first); + + cudaMemcpyKind kind = trivial_copy_detail::cuda_memcpy_kind(thrust::detail::derived_cast(systems.system1), thrust::detail::derived_cast(systems.system2)); + + trivial_copy_detail::checked_cudaMemcpy(dst, src, n * sizeof(T), kind); +} + + +} // end namespace detail +} // end namespace cuda +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/cuda/detail/uninitialized_copy.h b/compat/thrust/system/cuda/detail/uninitialized_copy.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/uninitialized_copy.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/uninitialized_fill.h b/compat/thrust/system/cuda/detail/uninitialized_fill.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/uninitialized_fill.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/unique.h b/compat/thrust/system/cuda/detail/unique.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/unique.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/unique_by_key.h b/compat/thrust/system/cuda/detail/unique_by_key.h new file mode 100644 index 0000000..a307fc5 --- /dev/null +++ b/compat/thrust/system/cuda/detail/unique_by_key.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special version of this algorithm + diff --git a/compat/thrust/system/cuda/detail/vector.inl b/compat/thrust/system/cuda/detail/vector.inl new file mode 100644 index 0000000..3659876 --- /dev/null +++ b/compat/thrust/system/cuda/detail/vector.inl @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ccudaliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ + +template + vector + ::vector() + : super_t() +{} + +template + vector + ::vector(size_type n) + : super_t(n) +{} + +template + vector + ::vector(size_type n, const value_type &value) + : super_t(n,value) +{} + +template + vector + ::vector(const vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(const thrust::detail::vector_base &x) + : super_t(x) +{} + +template + template + vector + ::vector(const std::vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(InputIterator first, InputIterator last) + : super_t(first,last) +{} + +template + template + vector & + vector + ::operator=(const std::vector &x) +{ + super_t::operator=(x); + return *this; +} + +template + template + vector & + vector + ::operator=(const thrust::detail::vector_base &x) +{ + super_t::operator=(x); + return *this; +} + +} // end cuda +} // end system +} // end thrust + diff --git a/compat/thrust/system/cuda/error.h b/compat/thrust/system/cuda/error.h new file mode 100644 index 0000000..8d09853 --- /dev/null +++ b/compat/thrust/system/cuda/error.h @@ -0,0 +1,186 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file thrust/system/cuda/error.h + * \brief CUDA-specific error reporting + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace system +{ + +namespace cuda +{ + +/*! \addtogroup system + * \{ + */ + +// To construct an error_code after a CUDA Runtime error: +// +// error_code(::cudaGetLastError(), cuda_category()) + +// XXX N3000 prefers enum class errc { ... } +namespace errc +{ + +/*! \p errc_t enumerates the kinds of CUDA Runtime errors. + */ +enum errc_t +{ + // from cuda/include/driver_types.h + // mirror their order + success = cudaSuccess, + missing_configuration = cudaErrorMissingConfiguration, + memory_allocation = cudaErrorMemoryAllocation, + initialization_error = cudaErrorInitializationError, + launch_failure = cudaErrorLaunchFailure, + prior_launch_failure = cudaErrorPriorLaunchFailure, + launch_timeout = cudaErrorLaunchTimeout, + launch_out_of_resources = cudaErrorLaunchOutOfResources, + invalid_device_function = cudaErrorInvalidDeviceFunction, + invalid_configuration = cudaErrorInvalidConfiguration, + invalid_device = cudaErrorInvalidDevice, + invalid_value = cudaErrorInvalidValue, + invalid_pitch_value = cudaErrorInvalidPitchValue, + invalid_symbol = cudaErrorInvalidSymbol, + map_buffer_object_failed = cudaErrorMapBufferObjectFailed, + unmap_buffer_object_failed = cudaErrorUnmapBufferObjectFailed, + invalid_host_pointer = cudaErrorInvalidHostPointer, + invalid_device_pointer = cudaErrorInvalidDevicePointer, + invalid_texture = cudaErrorInvalidTexture, + invalid_texture_binding = cudaErrorInvalidTextureBinding, + invalid_channel_descriptor = cudaErrorInvalidChannelDescriptor, + invalid_memcpy_direction = cudaErrorInvalidMemcpyDirection, + address_of_constant_error = cudaErrorAddressOfConstant, + texture_fetch_failed = cudaErrorTextureFetchFailed, + texture_not_bound = cudaErrorTextureNotBound, + synchronization_error = cudaErrorSynchronizationError, + invalid_filter_setting = cudaErrorInvalidFilterSetting, + invalid_norm_setting = cudaErrorInvalidNormSetting, + mixed_device_execution = cudaErrorMixedDeviceExecution, + cuda_runtime_unloading = cudaErrorCudartUnloading, + unknown = cudaErrorUnknown, + not_yet_implemented = cudaErrorNotYetImplemented, + memory_value_too_large = cudaErrorMemoryValueTooLarge, + invalid_resource_handle = cudaErrorInvalidResourceHandle, + not_ready = cudaErrorNotReady, + insufficient_driver = cudaErrorInsufficientDriver, + set_on_active_process_error = cudaErrorSetOnActiveProcess, + no_device = cudaErrorNoDevice, + ecc_uncorrectable = cudaErrorECCUncorrectable, + +#if CUDART_VERSION >= 4020 + shared_object_symbol_not_found = cudaErrorSharedObjectSymbolNotFound, + shared_object_init_failed = cudaErrorSharedObjectInitFailed, + unsupported_limit = cudaErrorUnsupportedLimit, + duplicate_variable_name = cudaErrorDuplicateVariableName, + duplicate_texture_name = cudaErrorDuplicateTextureName, + duplicate_surface_name = cudaErrorDuplicateSurfaceName, + devices_unavailable = cudaErrorDevicesUnavailable, + invalid_kernel_image = cudaErrorInvalidKernelImage, + no_kernel_image_for_device = cudaErrorNoKernelImageForDevice, + incompatible_driver_context = cudaErrorIncompatibleDriverContext, + peer_access_already_enabled = cudaErrorPeerAccessAlreadyEnabled, + peer_access_not_enabled = cudaErrorPeerAccessNotEnabled, + device_already_in_use = cudaErrorDeviceAlreadyInUse, + profiler_disabled = cudaErrorProfilerDisabled, + assert_triggered = cudaErrorAssert, + too_many_peers = cudaErrorTooManyPeers, + host_memory_already_registered = cudaErrorHostMemoryAlreadyRegistered, + host_memory_not_registered = cudaErrorHostMemoryNotRegistered, + operating_system_error = cudaErrorOperatingSystem, +#endif + +#if CUDART_VERSION >= 5000 + peer_access_unsupported = cudaErrorPeerAccessUnsupported, + launch_max_depth_exceeded = cudaErrorLaunchMaxDepthExceeded, + launch_file_scoped_texture_used = cudaErrorLaunchFileScopedTex, + launch_file_scoped_surface_used = cudaErrorLaunchFileScopedSurf, + sync_depth_exceeded = cudaErrorSyncDepthExceeded, + attempted_operation_not_permitted = cudaErrorNotPermitted, + attempted_operation_not_supported = cudaErrorNotSupported, +#endif + + startup_failure = cudaErrorStartupFailure +}; // end errc_t + + +} // end namespace errc + +} // end namespace cuda + +/*! \return A reference to an object of a type derived from class \p thrust::error_category. + * \note The object's \p equivalent virtual functions shall behave as specified + * for the class \p thrust::error_category. The object's \p name virtual function shall + * return a pointer to the string "cuda". The object's + * \p default_error_condition virtual function shall behave as follows: + * + * If the argument ev corresponds to a CUDA error value, the function + * shall return error_condition(ev,cuda_category()). + * Otherwise, the function shall return system_category.default_error_condition(ev). + */ +inline const error_category &cuda_category(void); + + +// XXX N3000 prefers is_error_code_enum + +/*! Specialization of \p is_error_code_enum for \p cuda::errc::errc_t + */ +template<> struct is_error_code_enum : thrust::detail::true_type {}; + + +// XXX replace cuda::errc::errc_t with cuda::errc upon c++0x +/*! \return error_code(static_cast(e), cuda::error_category()) + */ +inline error_code make_error_code(cuda::errc::errc_t e); + + +// XXX replace cuda::errc::errc_t with cuda::errc upon c++0x +/*! \return error_condition(static_cast(e), cuda::error_category()). + */ +inline error_condition make_error_condition(cuda::errc::errc_t e); + +/*! \} // end system + */ + + +} // end system + +namespace cuda +{ + +// XXX replace with using system::cuda_errc upon c++0x +namespace errc = system::cuda::errc; + +} // end cuda + +using system::cuda_category; + +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/cuda/execution_policy.h b/compat/thrust/system/cuda/execution_policy.h new file mode 100644 index 0000000..bbd33de --- /dev/null +++ b/compat/thrust/system/cuda/execution_policy.h @@ -0,0 +1,165 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +/*! \file thrust/system/cuda/execution_policy.h + * \brief Execution policies for Thrust's CUDA system. + */ + +#include + +// get the execution policies definitions first +#include + +// get the definition of par +#include + +// now get all the algorithm defintitions + +// the order of the following #includes seems to matter, unfortunately + +// primitives come first, in order of increasing sophistication +#include +#include +#include + +#include +#include +#include +#include +#include + +// these are alphabetical +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// define these entities here for the purpose of Doxygenating them +// they are actually defined elsewhere +#if 0 +namespace thrust +{ +namespace system +{ +namespace cuda +{ + + +/*! \addtogroup execution_policies + * \{ + */ + + +/*! \p thrust::cuda::execution_policy is the base class for all Thrust parallel execution + * policies which are derived from Thrust's CUDA backend system. + */ +template +struct execution_policy : thrust::execution_policy +{}; + + +/*! \p cuda::tag is a type representing Thrust's CUDA backend system in C++'s type system. + * Iterators "tagged" with a type which is convertible to \p cuda::tag assert that they may be + * "dispatched" to algorithm implementations in the \p cuda system. + */ +struct tag : thrust::system::cuda::execution_policy { unspecified }; + + +/*! \p thrust::cuda::par is the parallel execution policy associated with Thrust's CUDA + * backend system. + * + * Instead of relying on implicit algorithm dispatch through iterator system tags, users may + * directly target Thrust's CUDA backend system by providing \p thrust::cuda::par as an algorithm + * parameter. + * + * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such + * as \p thrust::cuda::vector. + * + * The type of \p thrust::cuda::par is implementation-defined. + * + * The following code snippet demonstrates how to use \p thrust::cuda::par to explicitly dispatch an + * invocation of \p thrust::for_each to the CUDA backend system: + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * printf("%d\n"); + * } + * }; + * ... + * int vec[3]; + * vec[0] = 0; vec[1] = 1; vec[2] = 2; + * + * thrust::for_each(thrust::cuda::par, vec.begin(), vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + */ +static const unspecified par; + + +/*! \} + */ + + +} // end cuda +} // end system +} // end thrust +#endif + + diff --git a/compat/thrust/system/cuda/experimental/pinned_allocator.h b/compat/thrust/system/cuda/experimental/pinned_allocator.h new file mode 100644 index 0000000..5294659 --- /dev/null +++ b/compat/thrust/system/cuda/experimental/pinned_allocator.h @@ -0,0 +1,239 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/cuda/experimental/pinned_allocator.h + * \brief An allocator which creates new elements in "pinned" memory with \p cudaMallocHost + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ + +namespace system +{ + +namespace cuda +{ + +namespace experimental +{ + +/*! \addtogroup memory_management Memory Management + * \addtogroup memory_management_classes + * \ingroup memory_management + * \{ + */ + +/*! \p pinned_allocator is a CUDA-specific host memory allocator + * that employs \c cudaMallocHost for allocation. + * + * \see http://www.sgi.com/tech/stl/Allocators.html + */ +template class pinned_allocator; + +template<> + class pinned_allocator +{ + public: + typedef void value_type; + typedef void * pointer; + typedef const void * const_pointer; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // convert a pinned_allocator to pinned_allocator + template + struct rebind + { + typedef pinned_allocator other; + }; // end rebind +}; // end pinned_allocator + + +template + class pinned_allocator +{ + public: + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // convert a pinned_allocator to pinned_allocator + template + struct rebind + { + typedef pinned_allocator other; + }; // end rebind + + /*! \p pinned_allocator's null constructor does nothing. + */ + __host__ __device__ + inline pinned_allocator() {} + + /*! \p pinned_allocator's null destructor does nothing. + */ + __host__ __device__ + inline ~pinned_allocator() {} + + /*! \p pinned_allocator's copy constructor does nothing. + */ + __host__ __device__ + inline pinned_allocator(pinned_allocator const &) {} + + /*! This version of \p pinned_allocator's copy constructor + * is templated on the \c value_type of the \p pinned_allocator + * to copy from. It is provided merely for convenience; it + * does nothing. + */ + template + __host__ __device__ + inline pinned_allocator(pinned_allocator const &) {} + + /*! This method returns the address of a \c reference of + * interest. + * + * \p r The \c reference of interest. + * \return \c r's address. + */ + __host__ __device__ + inline pointer address(reference r) { return &r; } + + /*! This method returns the address of a \c const_reference + * of interest. + * + * \p r The \c const_reference of interest. + * \return \c r's address. + */ + __host__ __device__ + inline const_pointer address(const_reference r) { return &r; } + + /*! This method allocates storage for objects in pinned host + * memory. + * + * \p cnt The number of objects to allocate. + * \return a \c pointer to the newly allocated objects. + * \note This method does not invoke \p value_type's constructor. + * It is the responsibility of the caller to initialize the + * objects at the returned \c pointer. + */ + __host__ + inline pointer allocate(size_type cnt, + const_pointer = 0) + { + if(cnt > this->max_size()) + { + throw std::bad_alloc(); + } // end if + + pointer result(0); + cudaError_t error = cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type)); + + if(error) + { + throw std::bad_alloc(); + } // end if + + return result; + } // end allocate() + + /*! This method deallocates pinned host memory previously allocated + * with this \c pinned_allocator. + * + * \p p A \c pointer to the previously allocated memory. + * \p cnt The number of objects previously allocated at + * \p p. + * \note This method does not invoke \p value_type's destructor. + * It is the responsibility of the caller to destroy + * the objects stored at \p p. + */ + __host__ + inline void deallocate(pointer p, size_type cnt) + { + cudaError_t error = cudaFreeHost(p); + + if(error) + { + throw thrust::system_error(error, thrust::cuda_category()); + } // end if + } // end deallocate() + + /*! This method returns the maximum size of the \c cnt parameter + * accepted by the \p allocate() method. + * + * \return The maximum number of objects that may be allocated + * by a single call to \p allocate(). + */ + inline size_type max_size() const + { + return (std::numeric_limits::max)() / sizeof(T); + } // end max_size() + + /*! This method tests this \p pinned_allocator for equality to + * another. + * + * \param x The other \p pinned_allocator of interest. + * \return This method always returns \c true. + */ + __host__ __device__ + inline bool operator==(pinned_allocator const& x) { return true; } + + /*! This method tests this \p pinned_allocator for inequality + * to another. + * + * \param x The other \p pinned_allocator of interest. + * \return This method always returns \c false. + */ + __host__ __device__ + inline bool operator!=(pinned_allocator const &x) { return !operator==(x); } +}; // end pinned_allocator + +/*! \} + */ + +} // end experimental + +} // end cuda + +} // end system + +// alias cuda's members at top-level +namespace cuda +{ + +namespace experimental +{ + +using thrust::system::cuda::experimental::pinned_allocator; + +} // end experimental + +} // end cuda + +} // end thrust + diff --git a/compat/thrust/system/cuda/memory.h b/compat/thrust/system/cuda/memory.h new file mode 100644 index 0000000..368eea2 --- /dev/null +++ b/compat/thrust/system/cuda/memory.h @@ -0,0 +1,421 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ccudaliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/cuda/memory.h + * \brief Managing memory associated with Thrust's CUDA system. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ + +template class pointer; + +} // end cuda +} // end system +} // end thrust + + +/*! \cond + */ + +// specialize std::iterator_traits to avoid problems with the name of +// pointer's constructor shadowing its nested pointer type +// do this before pointer is defined so the specialization is correctly +// used inside the definition +namespace std +{ + +template + struct iterator_traits > +{ + private: + typedef thrust::system::cuda::pointer ptr; + + public: + typedef typename ptr::iterator_category iterator_category; + typedef typename ptr::value_type value_type; + typedef typename ptr::difference_type difference_type; + typedef ptr pointer; + typedef typename ptr::reference reference; +}; // end iterator_traits + +} // end std + +/*! \endcond + */ + + +namespace thrust +{ +namespace system +{ + +/*! \addtogroup system_backends Systems + * \ingroup system + * \{ + */ + +/*! \namespace thrust::system::cuda + * \brief \p thrust::system::cuda is the namespace containing functionality for allocating, manipulating, + * and deallocating memory available to Thrust's CUDA backend system. + * The identifiers are provided in a separate namespace underneath thrust::system + * for import convenience but are also aliased in the top-level thrust::tbb + * namespace for easy access. + * + */ +namespace cuda +{ + +// forward declaration of reference for pointer +template class reference; + +/*! \cond + */ + +// XXX nvcc + msvc have trouble instantiating reference below +// this is a workaround +namespace detail +{ + +template + struct reference_msvc_workaround +{ + typedef thrust::system::cuda::reference type; +}; // end reference_msvc_workaround + +} // end detail + +/*! \endcond + */ + +#if 0 +/*! \p cuda::tag is type representing Thrust's CUDA backend system in C++'s type system. + * Iterators "tagged" with a type which is convertible to \p cuda::tag assert that they may be + * "dispatched" to algorithm implementations in the \p cuda system. + */ +struct tag { unspecified }; +#endif + +/*! \p pointer stores a pointer to an object allocated in memory available to the cuda system. + * This type provides type safety when dispatching standard algorithms on ranges resident + * in cuda memory. + * + * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. + * + * \p pointer can be created with the function \p cuda::malloc, or by explicitly calling its constructor + * with a raw pointer. + * + * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function + * or the \p raw_pointer_cast function. + * + * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory + * pointed to by \p pointer. + * + * \tparam T specifies the type of the pointee. + * + * \see cuda::malloc + * \see cuda::free + * \see raw_pointer_cast + */ +template + class pointer + : public thrust::pointer< + T, + thrust::system::cuda::tag, + thrust::system::cuda::reference, + thrust::system::cuda::pointer + > +{ + /*! \cond + */ + + private: + typedef thrust::pointer< + T, + thrust::system::cuda::tag, + //thrust::system::cuda::reference, + typename detail::reference_msvc_workaround::type, + thrust::system::cuda::pointer + > super_t; + + /*! \endcond + */ + + public: + + /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. + */ + __host__ __device__ + pointer() : super_t() {} + + /*! This constructor allows construction of a pointer from a T*. + * + * \param ptr A raw pointer to copy from, presumed to point to a location in memory + * accessible by the \p tbb system. + * \tparam OtherT \p OtherT shall be convertible to \p T. + */ + template + __host__ __device__ + explicit pointer(OtherT *ptr) : super_t(ptr) {} + + /*! This constructor allows construction from another pointer-like object with related type. + * + * \param other The \p OtherPointer to copy. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::cuda::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type * = 0) : super_t(other) {} + + /*! Assignment operator allows assigning from another pointer-like object with related type. + * + * \param other The other pointer-like object to assign from. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::cuda::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + pointer & + >::type + operator=(const OtherPointer &other) + { + return super_t::operator=(other); + } +}; // end pointer + + +/*! \p reference is a wrapped reference to an object stored in memory available to the \p cuda system. + * \p reference is the type of the result of dereferencing a \p cuda::pointer. + * + * \tparam T Specifies the type of the referenced object. + */ +template + class reference + : public thrust::reference< + T, + thrust::system::cuda::pointer, + thrust::system::cuda::reference + > +{ + /*! \cond + */ + + private: + typedef thrust::reference< + T, + thrust::system::cuda::pointer, + thrust::system::cuda::reference + > super_t; + + /*! \endcond + */ + + public: + /*! \cond + */ + + typedef typename super_t::value_type value_type; + typedef typename super_t::pointer pointer; + + /*! \endcond + */ + + /*! This constructor initializes this \p reference to refer to an object + * pointed to by the given \p pointer. After this \p reference is constructed, + * it shall refer to the object pointed to by \p ptr. + * + * \param ptr A \p pointer to copy from. + */ + __host__ __device__ + explicit reference(const pointer &ptr) + : super_t(ptr) + {} + + /*! This constructor accepts a const reference to another \p reference of related type. + * After this \p reference is constructed, it shall refer to the same object as \p other. + * + * \param other A \p reference to copy from. + * \tparam OtherT The element type of the other \p reference. + * + * \note This constructor is templated primarily to allow initialization of reference + * from reference. + */ + template + __host__ __device__ + reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type * = 0) + : super_t(other) + {} + + /*! Copy assignment operator copy assigns from another \p reference of related type. + * + * \param other The other \p reference to assign from. + * \return *this + * \tparam OtherT The element type of the other \p reference. + */ + template + __host__ __device__ + reference &operator=(const reference &other); + + /*! Assignment operator assigns from a \p value_type. + * + * \param x The \p value_type to assign from. + * \return *this + */ + __host__ __device__ + reference &operator=(const value_type &x); +}; // end reference + +/*! Exchanges the values of two objects referred to by \p reference. + * \p x The first \p reference of interest. + * \p y The second \p reference ot interest. + */ +template +__host__ __device__ +void swap(reference x, reference y); + +/*! Allocates an area of memory available to Thrust's cuda system. + * \param n Number of bytes to allocate. + * \return A cuda::pointer pointing to the beginning of the newly + * allocated memory. A null cuda::pointer is returned if + * an error occurs. + * \note The cuda::pointer returned by this function must be + * deallocated with \p cuda::free. + * \see cuda::free + * \see std::malloc + */ +inline pointer malloc(std::size_t n); + +/*! Allocates a typed area of memory available to Thrust's cuda system. + * \param n Number of elements to allocate. + * \return A cuda::pointer pointing to the beginning of the newly + * allocated memory. A null cuda::pointer is returned if + * an error occurs. + * \note The cuda::pointer returned by this function must be + * deallocated with \p cuda::free. + * \see cuda::free + * \see std::malloc + */ +template +inline pointer malloc(std::size_t n); + +/*! Deallocates an area of memory previously allocated by cuda::malloc. + * \param ptr A cuda::pointer pointing to the beginning of an area + * of memory previously allocated with cuda::malloc. + * \see cuda::malloc + * \see std::free + */ +inline void free(pointer ptr); + +// XXX upon c++11 +// template using allocator = thrust::detail::malloc_allocator >; + +/*! \p cuda::allocator is the default allocator used by the \p cuda system's containers such as + * cuda::vector if no user-specified allocator is provided. \p cuda::allocator allocates + * (deallocates) storage with \p cuda::malloc (\p cuda::free). + */ +template + struct allocator + : thrust::detail::malloc_allocator< + T, + tag, + pointer + > +{ + /*! The \p rebind metafunction provides the type of an \p allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p allocator. + */ + typedef allocator other; + }; + + /*! No-argument constructor has no effect. + */ + __host__ __device__ + inline allocator() {} + + /*! Copy constructor has no effect. + */ + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Constructor from other \p allocator has no effect. + */ + template + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Destructor has no effect. + */ + __host__ __device__ + inline ~allocator() {} +}; // end allocator + +} // end cuda + +/*! \} + */ + +} // end system + +/*! \namespace thrust::cuda + * \brief \p thrust::cuda is a top-level alias for thrust::system::cuda. + */ +namespace cuda +{ + +using thrust::system::cuda::pointer; +using thrust::system::cuda::reference; +using thrust::system::cuda::malloc; +using thrust::system::cuda::free; +using thrust::system::cuda::allocator; + +} // end cuda + +} // end thrust + +#include + diff --git a/compat/thrust/system/cuda/vector.h b/compat/thrust/system/cuda/vector.h new file mode 100644 index 0000000..ac47a84 --- /dev/null +++ b/compat/thrust/system/cuda/vector.h @@ -0,0 +1,148 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ccudaliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/cuda/vector.h + * \brief A dynamically-sizable array of elements which reside in memory available to + * Thrust's CUDA system. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +// forward declaration of host_vector +template class host_vector; + +namespace system +{ +namespace cuda +{ + +// XXX upon c++11 +// template > using vector = thrust::detail::vector_base; + +/*! \p cuda::vector is a container that supports random access to elements, + * constant time removal of elements at the end, and linear time insertion + * and removal of elements at the beginning or in the middle. The number of + * elements in a \p cuda::vector may vary dynamically; memory management is + * automatic. The elements contained in a \p cuda::vector reside in memory + * available to the \p cuda system. + * + * \tparam T The element type of the \p cuda::vector. + * \tparam Allocator The allocator type of the \p cuda::vector. Defaults to \p cuda::allocator. + * + * \see http://www.sgi.com/tech/stl/Vector.html + * \see host_vector For the documentation of the complete interface which is + * shared by \p cuda::vector + * \see device_vector + */ +template > + class vector + : public thrust::detail::vector_base +{ + /*! \cond + */ + private: + typedef thrust::detail::vector_base super_t; + /*! \endcond + */ + + public: + + /*! \cond + */ + typedef typename super_t::size_type size_type; + typedef typename super_t::value_type value_type; + /*! \endcond + */ + + /*! This constructor creates an empty \p cuda::vector. + */ + vector(); + + /*! This constructor creates a \p cuda::vector with \p n default-constructed elements. + * \param n The size of the \p cuda::vector to create. + */ + explicit vector(size_type n); + + /*! This constructor creates a \p cuda::vector with \p n copies of \p value. + * \param n The size of the \p cuda::vector to create. + * \param value An element to copy. + */ + explicit vector(size_type n, const value_type &value); + + /*! Copy constructor copies from another \p cuda::vector. + * \param x The other \p cuda::vector to copy. + */ + vector(const vector &x); + + /*! This constructor copies from another Thrust vector-like object. + * \param x The other object to copy from. + */ + template + vector(const thrust::detail::vector_base &x); + + /*! This constructor copies from a \c std::vector. + * \param x The \c std::vector to copy from. + */ + template + vector(const std::vector &x); + + /*! This constructor creates a \p cuda::vector by copying from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + vector(InputIterator first, InputIterator last); + + // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns + // + /*! Assignment operator assigns from a \c std::vector. + * \param x The \c std::vector to assign from. + * \return *this + */ + template + vector &operator=(const std::vector &x); + + /*! Assignment operator assigns from another Thrust vector-like object. + * \param x The other object to assign from. + * \return *this + */ + template + vector &operator=(const thrust::detail::vector_base &x); +}; // end vector + +} // end cuda +} // end system + +// alias system::cuda names at top-level +namespace cuda +{ + +using thrust::system::cuda::vector; + +} // end cuda + +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/adl/adjacent_difference.h b/compat/thrust/system/detail/adl/adjacent_difference.h new file mode 100644 index 0000000..246c116 --- /dev/null +++ b/compat/thrust/system/detail/adl/adjacent_difference.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the adjacent_difference.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch adjacent_difference + +#define __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/adjacent_difference.h> +#include __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER +#undef __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER + +#define __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/adjacent_difference.h> +#include __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER +#undef __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER + diff --git a/compat/thrust/system/detail/adl/assign_value.h b/compat/thrust/system/detail/adl/assign_value.h new file mode 100644 index 0000000..b5c588a --- /dev/null +++ b/compat/thrust/system/detail/adl/assign_value.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the assign_value.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch assign_value + +#define __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/assign_value.h> +#include __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER +#undef __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER + +#define __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/assign_value.h> +#include __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER +#undef __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER + diff --git a/compat/thrust/system/detail/adl/binary_search.h b/compat/thrust/system/detail/adl/binary_search.h new file mode 100644 index 0000000..7accfbc --- /dev/null +++ b/compat/thrust/system/detail/adl/binary_search.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the binary_search.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch binary_search + +#define __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/binary_search.h> +#include __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER +#undef __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER + +#define __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/binary_search.h> +#include __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER +#undef __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER + diff --git a/compat/thrust/system/detail/adl/copy.h b/compat/thrust/system/detail/adl/copy.h new file mode 100644 index 0000000..91a32cd --- /dev/null +++ b/compat/thrust/system/detail/adl/copy.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the copy.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch copy + +#define __THRUST_HOST_SYSTEM_COPY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/copy.h> +#include __THRUST_HOST_SYSTEM_COPY_HEADER +#undef __THRUST_HOST_SYSTEM_COPY_HEADER + +#define __THRUST_DEVICE_SYSTEM_COPY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/copy.h> +#include __THRUST_DEVICE_SYSTEM_COPY_HEADER +#undef __THRUST_DEVICE_SYSTEM_COPY_HEADER + diff --git a/compat/thrust/system/detail/adl/copy_if.h b/compat/thrust/system/detail/adl/copy_if.h new file mode 100644 index 0000000..fd1df97 --- /dev/null +++ b/compat/thrust/system/detail/adl/copy_if.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy_if.h of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the copy_if.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch copy_if + +#define __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/copy_if.h> +#include __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER +#undef __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER + +#define __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/copy_if.h> +#include __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER +#undef __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER + diff --git a/compat/thrust/system/detail/adl/count.h b/compat/thrust/system/detail/adl/count.h new file mode 100644 index 0000000..0dd9591 --- /dev/null +++ b/compat/thrust/system/detail/adl/count.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a count of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the count.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch count + +#define __THRUST_HOST_SYSTEM_COUNT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/count.h> +#include __THRUST_HOST_SYSTEM_COUNT_HEADER +#undef __THRUST_HOST_SYSTEM_COUNT_HEADER + +#define __THRUST_DEVICE_SYSTEM_COUNT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/count.h> +#include __THRUST_DEVICE_SYSTEM_COUNT_HEADER +#undef __THRUST_DEVICE_SYSTEM_COUNT_HEADER + diff --git a/compat/thrust/system/detail/adl/equal.h b/compat/thrust/system/detail/adl/equal.h new file mode 100644 index 0000000..f933d4f --- /dev/null +++ b/compat/thrust/system/detail/adl/equal.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a equal of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the equal.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch equal + +#define __THRUST_HOST_SYSTEM_EQUAL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/equal.h> +#include __THRUST_HOST_SYSTEM_EQUAL_HEADER +#undef __THRUST_HOST_SYSTEM_EQUAL_HEADER + +#define __THRUST_DEVICE_SYSTEM_EQUAL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/equal.h> +#include __THRUST_DEVICE_SYSTEM_EQUAL_HEADER +#undef __THRUST_DEVICE_SYSTEM_EQUAL_HEADER + diff --git a/compat/thrust/system/detail/adl/extrema.h b/compat/thrust/system/detail/adl/extrema.h new file mode 100644 index 0000000..c766570 --- /dev/null +++ b/compat/thrust/system/detail/adl/extrema.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a extrema of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the extrema.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch extrema + +#define __THRUST_HOST_SYSTEM_EXTREMA_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/extrema.h> +#include __THRUST_HOST_SYSTEM_EXTREMA_HEADER +#undef __THRUST_HOST_SYSTEM_EXTREMA_HEADER + +#define __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/extrema.h> +#include __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER +#undef __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER + diff --git a/compat/thrust/system/detail/adl/fill.h b/compat/thrust/system/detail/adl/fill.h new file mode 100644 index 0000000..b241b8a --- /dev/null +++ b/compat/thrust/system/detail/adl/fill.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the fill.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch fill + +#define __THRUST_HOST_SYSTEM_FILL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/fill.h> +#include __THRUST_HOST_SYSTEM_FILL_HEADER +#undef __THRUST_HOST_SYSTEM_FILL_HEADER + +#define __THRUST_DEVICE_SYSTEM_FILL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/fill.h> +#include __THRUST_DEVICE_SYSTEM_FILL_HEADER +#undef __THRUST_DEVICE_SYSTEM_FILL_HEADER + diff --git a/compat/thrust/system/detail/adl/find.h b/compat/thrust/system/detail/adl/find.h new file mode 100644 index 0000000..7c99f3e --- /dev/null +++ b/compat/thrust/system/detail/adl/find.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the find.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch find + +#define __THRUST_HOST_SYSTEM_FIND_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/find.h> +#include __THRUST_HOST_SYSTEM_FIND_HEADER +#undef __THRUST_HOST_SYSTEM_FIND_HEADER + +#define __THRUST_DEVICE_SYSTEM_FIND_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/find.h> +#include __THRUST_DEVICE_SYSTEM_FIND_HEADER +#undef __THRUST_DEVICE_SYSTEM_FIND_HEADER + diff --git a/compat/thrust/system/detail/adl/for_each.h b/compat/thrust/system/detail/adl/for_each.h new file mode 100644 index 0000000..0b2717f --- /dev/null +++ b/compat/thrust/system/detail/adl/for_each.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the for_each.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch for_each + +#define __THRUST_HOST_SYSTEM_FOR_EACH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/for_each.h> +#include __THRUST_HOST_SYSTEM_FOR_EACH_HEADER +#undef __THRUST_HOST_SYSTEM_FOR_EACH_HEADER + +#define __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/for_each.h> +#include __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER +#undef __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER + diff --git a/compat/thrust/system/detail/adl/gather.h b/compat/thrust/system/detail/adl/gather.h new file mode 100644 index 0000000..da4c1d1 --- /dev/null +++ b/compat/thrust/system/detail/adl/gather.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the gather.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch gather + +#define __THRUST_HOST_SYSTEM_FOR_EACH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/gather.h> +#include __THRUST_HOST_SYSTEM_FOR_EACH_HEADER +#undef __THRUST_HOST_SYSTEM_FOR_EACH_HEADER + +#define __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/gather.h> +#include __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER +#undef __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER + diff --git a/compat/thrust/system/detail/adl/generate.h b/compat/thrust/system/detail/adl/generate.h new file mode 100644 index 0000000..3a98847 --- /dev/null +++ b/compat/thrust/system/detail/adl/generate.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the generate.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch generate + +#define __THRUST_HOST_SYSTEM_GENERATE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/generate.h> +#include __THRUST_HOST_SYSTEM_GENERATE_HEADER +#undef __THRUST_HOST_SYSTEM_GENERATE_HEADER + +#define __THRUST_DEVICE_SYSTEM_GENERATE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/generate.h> +#include __THRUST_DEVICE_SYSTEM_GENERATE_HEADER +#undef __THRUST_DEVICE_SYSTEM_GENERATE_HEADER + diff --git a/compat/thrust/system/detail/adl/get_value.h b/compat/thrust/system/detail/adl/get_value.h new file mode 100644 index 0000000..ed4ef2c --- /dev/null +++ b/compat/thrust/system/detail/adl/get_value.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the get_value.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch get_value + +#define __THRUST_HOST_SYSTEM_GET_VALUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/get_value.h> +#include __THRUST_HOST_SYSTEM_GET_VALUE_HEADER +#undef __THRUST_HOST_SYSTEM_GET_VALUE_HEADER + +#define __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/get_value.h> +#include __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER +#undef __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER + diff --git a/compat/thrust/system/detail/adl/inner_product.h b/compat/thrust/system/detail/adl/inner_product.h new file mode 100644 index 0000000..18cc65b --- /dev/null +++ b/compat/thrust/system/detail/adl/inner_product.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the inner_product.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch inner_product + +#define __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/inner_product.h> +#include __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER +#undef __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER + +#define __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/inner_product.h> +#include __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER +#undef __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER + diff --git a/compat/thrust/system/detail/adl/iter_swap.h b/compat/thrust/system/detail/adl/iter_swap.h new file mode 100644 index 0000000..b302c25 --- /dev/null +++ b/compat/thrust/system/detail/adl/iter_swap.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the iter_swap.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch iter_swap + +#define __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/iter_swap.h> +#include __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER +#undef __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER + +#define __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/iter_swap.h> +#include __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER +#undef __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER + diff --git a/compat/thrust/system/detail/adl/logical.h b/compat/thrust/system/detail/adl/logical.h new file mode 100644 index 0000000..585f71a --- /dev/null +++ b/compat/thrust/system/detail/adl/logical.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the logical.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch logical + +#define __THRUST_HOST_SYSTEM_LOGICAL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/logical.h> +#include __THRUST_HOST_SYSTEM_LOGICAL_HEADER +#undef __THRUST_HOST_SYSTEM_LOGICAL_HEADER + +#define __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/logical.h> +#include __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER +#undef __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER + diff --git a/compat/thrust/system/detail/adl/malloc_and_free.h b/compat/thrust/system/detail/adl/malloc_and_free.h new file mode 100644 index 0000000..7d99a26 --- /dev/null +++ b/compat/thrust/system/detail/adl/malloc_and_free.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the malloc_and_free.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch malloc_and_free + +#define __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/malloc_and_free.h> +#include __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER +#undef __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER + +#define __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/malloc_and_free.h> +#include __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER +#undef __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER + diff --git a/compat/thrust/system/detail/adl/merge.h b/compat/thrust/system/detail/adl/merge.h new file mode 100644 index 0000000..59d8ace --- /dev/null +++ b/compat/thrust/system/detail/adl/merge.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the merge.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch merge + +#define __THRUST_HOST_SYSTEM_MERGE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/merge.h> +#include __THRUST_HOST_SYSTEM_MERGE_HEADER +#undef __THRUST_HOST_SYSTEM_MERGE_HEADER + +#define __THRUST_DEVICE_SYSTEM_MERGE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/merge.h> +#include __THRUST_DEVICE_SYSTEM_MERGE_HEADER +#undef __THRUST_DEVICE_SYSTEM_MERGE_HEADER + diff --git a/compat/thrust/system/detail/adl/mismatch.h b/compat/thrust/system/detail/adl/mismatch.h new file mode 100644 index 0000000..d2d1831 --- /dev/null +++ b/compat/thrust/system/detail/adl/mismatch.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the mismatch.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch mismatch + +#define __THRUST_HOST_SYSTEM_MISMATCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/mismatch.h> +#include __THRUST_HOST_SYSTEM_MISMATCH_HEADER +#undef __THRUST_HOST_SYSTEM_MISMATCH_HEADER + +#define __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/mismatch.h> +#include __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER +#undef __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER + diff --git a/compat/thrust/system/detail/adl/partition.h b/compat/thrust/system/detail/adl/partition.h new file mode 100644 index 0000000..efdc605 --- /dev/null +++ b/compat/thrust/system/detail/adl/partition.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the partition.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch partition + +#define __THRUST_HOST_SYSTEM_PARTITION_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/partition.h> +#include __THRUST_HOST_SYSTEM_PARTITION_HEADER +#undef __THRUST_HOST_SYSTEM_PARTITION_HEADER + +#define __THRUST_DEVICE_SYSTEM_PARTITION_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/partition.h> +#include __THRUST_DEVICE_SYSTEM_PARTITION_HEADER +#undef __THRUST_DEVICE_SYSTEM_PARTITION_HEADER + diff --git a/compat/thrust/system/detail/adl/reduce.h b/compat/thrust/system/detail/adl/reduce.h new file mode 100644 index 0000000..afa00f9 --- /dev/null +++ b/compat/thrust/system/detail/adl/reduce.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the reduce.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch reduce + +#define __THRUST_HOST_SYSTEM_REDUCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reduce.h> +#include __THRUST_HOST_SYSTEM_REDUCE_HEADER +#undef __THRUST_HOST_SYSTEM_REDUCE_HEADER + +#define __THRUST_DEVICE_SYSTEM_REDUCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reduce.h> +#include __THRUST_DEVICE_SYSTEM_REDUCE_HEADER +#undef __THRUST_DEVICE_SYSTEM_REDUCE_HEADER + diff --git a/compat/thrust/system/detail/adl/reduce_by_key.h b/compat/thrust/system/detail/adl/reduce_by_key.h new file mode 100644 index 0000000..eac65b7 --- /dev/null +++ b/compat/thrust/system/detail/adl/reduce_by_key.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the reduce_by_key.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch reduce_by_key + +#define __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reduce_by_key.h> +#include __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER +#undef __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER + +#define __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reduce_by_key.h> +#include __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER +#undef __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER + diff --git a/compat/thrust/system/detail/adl/remove.h b/compat/thrust/system/detail/adl/remove.h new file mode 100644 index 0000000..9d64be8 --- /dev/null +++ b/compat/thrust/system/detail/adl/remove.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the remove.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch remove + +#define __THRUST_HOST_SYSTEM_REMOVE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/remove.h> +#include __THRUST_HOST_SYSTEM_REMOVE_HEADER +#undef __THRUST_HOST_SYSTEM_REMOVE_HEADER + +#define __THRUST_DEVICE_SYSTEM_REMOVE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/remove.h> +#include __THRUST_DEVICE_SYSTEM_REMOVE_HEADER +#undef __THRUST_DEVICE_SYSTEM_REMOVE_HEADER + diff --git a/compat/thrust/system/detail/adl/replace.h b/compat/thrust/system/detail/adl/replace.h new file mode 100644 index 0000000..e4d8bd2 --- /dev/null +++ b/compat/thrust/system/detail/adl/replace.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the replace.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch replace + +#define __THRUST_HOST_SYSTEM_REPLACE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/replace.h> +#include __THRUST_HOST_SYSTEM_REPLACE_HEADER +#undef __THRUST_HOST_SYSTEM_REPLACE_HEADER + +#define __THRUST_DEVICE_SYSTEM_REPLACE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/replace.h> +#include __THRUST_DEVICE_SYSTEM_REPLACE_HEADER +#undef __THRUST_DEVICE_SYSTEM_REPLACE_HEADER + diff --git a/compat/thrust/system/detail/adl/reverse.h b/compat/thrust/system/detail/adl/reverse.h new file mode 100644 index 0000000..8cbcfd8 --- /dev/null +++ b/compat/thrust/system/detail/adl/reverse.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the reverse.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch reverse + +#define __THRUST_HOST_SYSTEM_REVERSE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reverse.h> +#include __THRUST_HOST_SYSTEM_REVERSE_HEADER +#undef __THRUST_HOST_SYSTEM_REVERSE_HEADER + +#define __THRUST_DEVICE_SYSTEM_REVERSE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reverse.h> +#include __THRUST_DEVICE_SYSTEM_REVERSE_HEADER +#undef __THRUST_DEVICE_SYSTEM_REVERSE_HEADER + diff --git a/compat/thrust/system/detail/adl/scan.h b/compat/thrust/system/detail/adl/scan.h new file mode 100644 index 0000000..e70cd9f --- /dev/null +++ b/compat/thrust/system/detail/adl/scan.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the scan.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch scan + +#define __THRUST_HOST_SYSTEM_SCAN_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scan.h> +#include __THRUST_HOST_SYSTEM_SCAN_HEADER +#undef __THRUST_HOST_SYSTEM_SCAN_HEADER + +#define __THRUST_DEVICE_SYSTEM_SCAN_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scan.h> +#include __THRUST_DEVICE_SYSTEM_SCAN_HEADER +#undef __THRUST_DEVICE_SYSTEM_SCAN_HEADER + diff --git a/compat/thrust/system/detail/adl/scan_by_key.h b/compat/thrust/system/detail/adl/scan_by_key.h new file mode 100644 index 0000000..02c4b84 --- /dev/null +++ b/compat/thrust/system/detail/adl/scan_by_key.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the scan_by_key.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch scan_by_key + +#define __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scan_by_key.h> +#include __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER +#undef __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER + +#define __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scan_by_key.h> +#include __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER +#undef __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER + diff --git a/compat/thrust/system/detail/adl/scatter.h b/compat/thrust/system/detail/adl/scatter.h new file mode 100644 index 0000000..b94b0d9 --- /dev/null +++ b/compat/thrust/system/detail/adl/scatter.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the scatter.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch scatter + +#define __THRUST_HOST_SYSTEM_SCATTER_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scatter.h> +#include __THRUST_HOST_SYSTEM_SCATTER_HEADER +#undef __THRUST_HOST_SYSTEM_SCATTER_HEADER + +#define __THRUST_DEVICE_SYSTEM_SCATTER_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scatter.h> +#include __THRUST_DEVICE_SYSTEM_SCATTER_HEADER +#undef __THRUST_DEVICE_SYSTEM_SCATTER_HEADER + diff --git a/compat/thrust/system/detail/adl/sequence.h b/compat/thrust/system/detail/adl/sequence.h new file mode 100644 index 0000000..07dcc7b --- /dev/null +++ b/compat/thrust/system/detail/adl/sequence.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the sequence.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch sequence + +#define __THRUST_HOST_SYSTEM_SEQUENCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/sequence.h> +#include __THRUST_HOST_SYSTEM_SEQUENCE_HEADER +#undef __THRUST_HOST_SYSTEM_SEQUENCE_HEADER + +#define __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/sequence.h> +#include __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER +#undef __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER + diff --git a/compat/thrust/system/detail/adl/set_operations.h b/compat/thrust/system/detail/adl/set_operations.h new file mode 100644 index 0000000..9901b46 --- /dev/null +++ b/compat/thrust/system/detail/adl/set_operations.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the set_operations.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch set_operations + +#define __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/set_operations.h> +#include __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER +#undef __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER + +#define __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/set_operations.h> +#include __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER +#undef __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER + diff --git a/compat/thrust/system/detail/adl/sort.h b/compat/thrust/system/detail/adl/sort.h new file mode 100644 index 0000000..afcb903 --- /dev/null +++ b/compat/thrust/system/detail/adl/sort.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the sort.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch sort + +#define __THRUST_HOST_SYSTEM_SORT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/sort.h> +#include __THRUST_HOST_SYSTEM_SORT_HEADER +#undef __THRUST_HOST_SYSTEM_SORT_HEADER + +#define __THRUST_DEVICE_SYSTEM_SORT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/sort.h> +#include __THRUST_DEVICE_SYSTEM_SORT_HEADER +#undef __THRUST_DEVICE_SYSTEM_SORT_HEADER + diff --git a/compat/thrust/system/detail/adl/swap_ranges.h b/compat/thrust/system/detail/adl/swap_ranges.h new file mode 100644 index 0000000..c006936 --- /dev/null +++ b/compat/thrust/system/detail/adl/swap_ranges.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the swap_ranges.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch swap_ranges + +#define __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/swap_ranges.h> +#include __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER +#undef __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER + +#define __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/swap_ranges.h> +#include __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER +#undef __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER + diff --git a/compat/thrust/system/detail/adl/tabulate.h b/compat/thrust/system/detail/adl/tabulate.h new file mode 100644 index 0000000..cb1fdeb --- /dev/null +++ b/compat/thrust/system/detail/adl/tabulate.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the tabulate.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch tabulate + +#define __THRUST_HOST_SYSTEM_TABULATE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/tabulate.h> +#include __THRUST_HOST_SYSTEM_TABULATE_HEADER +#undef __THRUST_HOST_SYSTEM_TABULATE_HEADER + +#define __THRUST_DEVICE_SYSTEM_TABULATE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/tabulate.h> +#include __THRUST_DEVICE_SYSTEM_TABULATE_HEADER +#undef __THRUST_DEVICE_SYSTEM_TABULATE_HEADER + diff --git a/compat/thrust/system/detail/adl/temporary_buffer.h b/compat/thrust/system/detail/adl/temporary_buffer.h new file mode 100644 index 0000000..66df0ea --- /dev/null +++ b/compat/thrust/system/detail/adl/temporary_buffer.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the temporary_buffer.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch get_temporary_buffer or return_temporary_buffer + +#define __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/temporary_buffer.h> +#include __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER +#undef __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER + +#define __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/temporary_buffer.h> +#include __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER +#undef __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER + diff --git a/compat/thrust/system/detail/adl/transform.h b/compat/thrust/system/detail/adl/transform.h new file mode 100644 index 0000000..c9e6a01 --- /dev/null +++ b/compat/thrust/system/detail/adl/transform.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the transform.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch transform + +#define __THRUST_HOST_SYSTEM_TRANSFORM_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform.h> +#include __THRUST_HOST_SYSTEM_TRANSFORM_HEADER +#undef __THRUST_HOST_SYSTEM_TRANSFORM_HEADER + +#define __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform.h> +#include __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER +#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER + diff --git a/compat/thrust/system/detail/adl/transform_reduce.h b/compat/thrust/system/detail/adl/transform_reduce.h new file mode 100644 index 0000000..0a5d977 --- /dev/null +++ b/compat/thrust/system/detail/adl/transform_reduce.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the transform_reduce.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch transform_reduce + +#define __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform_reduce.h> +#include __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER +#undef __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER + +#define __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform_reduce.h> +#include __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER +#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER + diff --git a/compat/thrust/system/detail/adl/transform_scan.h b/compat/thrust/system/detail/adl/transform_scan.h new file mode 100644 index 0000000..47c1dc3 --- /dev/null +++ b/compat/thrust/system/detail/adl/transform_scan.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the transform_scan.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch transform_scan + +#define __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform_scan.h> +#include __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER +#undef __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER + +#define __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform_scan.h> +#include __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER +#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER + diff --git a/compat/thrust/system/detail/adl/uninitialized_copy.h b/compat/thrust/system/detail/adl/uninitialized_copy.h new file mode 100644 index 0000000..7cb0b8e --- /dev/null +++ b/compat/thrust/system/detail/adl/uninitialized_copy.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the uninitialized_copy.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch uninitialized_copy + +#define __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/uninitialized_copy.h> +#include __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER +#undef __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER + +#define __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/uninitialized_copy.h> +#include __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER +#undef __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER + diff --git a/compat/thrust/system/detail/adl/uninitialized_fill.h b/compat/thrust/system/detail/adl/uninitialized_fill.h new file mode 100644 index 0000000..9f00b51 --- /dev/null +++ b/compat/thrust/system/detail/adl/uninitialized_fill.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the uninitialized_fill.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch uninitialized_fill + +#define __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/uninitialized_fill.h> +#include __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER +#undef __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER + +#define __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/uninitialized_fill.h> +#include __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER +#undef __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER + diff --git a/compat/thrust/system/detail/adl/unique.h b/compat/thrust/system/detail/adl/unique.h new file mode 100644 index 0000000..932ff58 --- /dev/null +++ b/compat/thrust/system/detail/adl/unique.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the unique.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch unique + +#define __THRUST_HOST_SYSTEM_UNIQUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/unique.h> +#include __THRUST_HOST_SYSTEM_UNIQUE_HEADER +#undef __THRUST_HOST_SYSTEM_UNIQUE_HEADER + +#define __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/unique.h> +#include __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER +#undef __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER + diff --git a/compat/thrust/system/detail/adl/unique_by_key.h b/compat/thrust/system/detail/adl/unique_by_key.h new file mode 100644 index 0000000..30e6f2f --- /dev/null +++ b/compat/thrust/system/detail/adl/unique_by_key.h @@ -0,0 +1,32 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a fill of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// the purpose of this header is to #include the unique_by_key.h header +// of the host and device systems. It should be #included in any +// code which uses adl to dispatch unique_by_key + +#define __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/unique_by_key.h> +#include __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER +#undef __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER + +#define __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/unique_by_key.h> +#include __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER +#undef __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER + diff --git a/compat/thrust/system/detail/bad_alloc.h b/compat/thrust/system/detail/bad_alloc.h new file mode 100644 index 0000000..bb73d1f --- /dev/null +++ b/compat/thrust/system/detail/bad_alloc.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ + +// define our own bad_alloc so we can set its .what() +class bad_alloc + : public std::bad_alloc +{ + public: + inline bad_alloc(const std::string &w) + : std::bad_alloc(), m_what() + { + m_what = std::bad_alloc::what(); + m_what += ": "; + m_what += w; + } // end bad_alloc() + + inline virtual ~bad_alloc(void) throw () {}; + + inline virtual const char *what(void) const throw() + { + return m_what.c_str(); + } // end what() + + private: + std::string m_what; +}; // end bad_alloc + +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/errno.h b/compat/thrust/system/detail/errno.h new file mode 100644 index 0000000..34bc8cc --- /dev/null +++ b/compat/thrust/system/detail/errno.h @@ -0,0 +1,120 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +// The rationale for the existence of these apparently redundant definitions is +// to provide them portably and to avoid bringing in system headers which might +// pollute the global namespace. These identifiers are in lowercase to avoid +// colliding with the real macros in errno.h. + +namespace thrust +{ + +namespace system +{ + +namespace detail +{ + +static const int eafnosupport = 9901; +static const int eaddrinuse = 9902; +static const int eaddrnotavail = 9903; +static const int eisconn = 9904; +static const int ebadmsg = 9905; +static const int econnaborted = 9906; +static const int ealready = 9907; +static const int econnrefused = 9908; +static const int econnreset = 9909; +static const int edestaddrreq = 9910; +static const int ehostunreach = 9911; +static const int eidrm = 9912; +static const int emsgsize = 9913; +static const int enetdown = 9914; +static const int enetreset = 9915; +static const int enetunreach = 9916; +static const int enobufs = 9917; +static const int enolink = 9918; +static const int enodata = 9919; +static const int enomsg = 9920; +static const int enoprotoopt = 9921; +static const int enosr = 9922; +static const int enotsock = 9923; +static const int enostr = 9924; +static const int enotconn = 9925; +static const int enotsup = 9926; +static const int ecanceled = 9927; +static const int einprogress = 9928; +static const int eopnotsupp = 9929; +static const int ewouldblock = 9930; +static const int eownerdead = 9931; +static const int eproto = 9932; +static const int eprotonosupport = 9933; +static const int enotrecoverable = 9934; +static const int etime = 9935; +static const int etxtbsy = 9936; +static const int etimedout = 9938; +static const int eloop = 9939; +static const int eoverflow = 9940; +static const int eprototype = 9941; +static const int enosys = 9942; +static const int einval = 9943; +static const int erange = 9944; +static const int eilseq = 9945; +static const int e2big = 9946; +static const int edom = 9947; +static const int efault = 9948; +static const int ebadf = 9949; +static const int epipe = 9950; +static const int exdev = 9951; +static const int ebusy = 9952; +static const int enotempty = 9953; +static const int enoexec = 9954; +static const int eexist = 9955; +static const int efbig = 9956; +static const int enametoolong = 9957; +static const int enotty = 9958; +static const int eintr = 9959; +static const int espipe = 9960; +static const int eio = 9961; +static const int eisdir = 9962; +static const int echild = 9963; +static const int enolck = 9964; +static const int enospc = 9965; +static const int enxio = 9966; +static const int enodev = 9967; +static const int enoent = 9968; +static const int esrch = 9969; +static const int enotdir = 9970; +static const int enomem = 9971; +static const int eperm = 9972; +static const int eacces = 9973; +static const int erofs = 9974; +static const int edeadlk = 9975; +static const int eagain = 9976; +static const int enfile = 9977; +static const int emfile = 9978; +static const int emlink = 9979; + +} // end detail + +} // end system + +} // end thrust + diff --git a/compat/thrust/system/detail/error_category.inl b/compat/thrust/system/detail/error_category.inl new file mode 100644 index 0000000..8e19c89 --- /dev/null +++ b/compat/thrust/system/detail/error_category.inl @@ -0,0 +1,234 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace system +{ + +error_category + ::~error_category(void) +{ + ; +} // end error_category::~error_category() + + +error_condition error_category + ::default_error_condition(int ev) const +{ + return error_condition(ev, *this); +} // end error_category::default_error_condition() + + +bool error_category + ::equivalent(int code, const error_condition &condition) const +{ + return default_error_condition(code) == condition; +} // end error_condition::equivalent() + + +bool error_category + ::equivalent(const error_code &code, int condition) const +{ + bool result = (this->operator==(code.category())) && (code.value() == condition); + return result; +} // end error_code::equivalent() + + +bool error_category + ::operator==(const error_category &rhs) const +{ + return this == &rhs; +} // end error_category::operator==() + + +bool error_category + ::operator!=(const error_category &rhs) const +{ + return !this->operator==(rhs); +} // end error_category::operator!=() + + +bool error_category + ::operator<(const error_category &rhs) const +{ + return thrust::less()(this,&rhs); +} // end error_category::operator<() + + +namespace detail +{ + + +class generic_error_category + : public error_category +{ + public: + inline generic_error_category(void) {} + + inline virtual const char *name(void) const + { + return "generic"; + } + + inline virtual std::string message(int ev) const + { + static const std::string unknown_err("Unknown error"); + + // XXX strerror is not thread-safe: + // prefer strerror_r (which is not provided on windows) + const char *c_str = std::strerror(ev); + return c_str ? std::string(c_str) : unknown_err; + } +}; // end generic_category_result + + +class system_error_category + : public error_category +{ + public: + inline system_error_category(void) {} + + inline virtual const char *name(void) const + { + return "system"; + } + + inline virtual std::string message(int ev) const + { + return generic_category().message(ev); + } + + inline virtual error_condition default_error_condition(int ev) const + { + using namespace errc; + + switch(ev) + { + case eafnosupport: return make_error_condition(address_family_not_supported); + case eaddrinuse: return make_error_condition(address_in_use); + case eaddrnotavail: return make_error_condition(address_not_available); + case eisconn: return make_error_condition(already_connected); + case e2big: return make_error_condition(argument_list_too_long); + case edom: return make_error_condition(argument_out_of_domain); + case efault: return make_error_condition(bad_address); + case ebadf: return make_error_condition(bad_file_descriptor); + case ebadmsg: return make_error_condition(bad_message); + case epipe: return make_error_condition(broken_pipe); + case econnaborted: return make_error_condition(connection_aborted); + case ealready: return make_error_condition(connection_already_in_progress); + case econnrefused: return make_error_condition(connection_refused); + case econnreset: return make_error_condition(connection_reset); + case exdev: return make_error_condition(cross_device_link); + case edestaddrreq: return make_error_condition(destination_address_required); + case ebusy: return make_error_condition(device_or_resource_busy); + case enotempty: return make_error_condition(directory_not_empty); + case enoexec: return make_error_condition(executable_format_error); + case eexist: return make_error_condition(file_exists); + case efbig: return make_error_condition(file_too_large); + case enametoolong: return make_error_condition(filename_too_long); + case enosys: return make_error_condition(function_not_supported); + case ehostunreach: return make_error_condition(host_unreachable); + case eidrm: return make_error_condition(identifier_removed); + case eilseq: return make_error_condition(illegal_byte_sequence); + case enotty: return make_error_condition(inappropriate_io_control_operation); + case eintr: return make_error_condition(interrupted); + case einval: return make_error_condition(invalid_argument); + case espipe: return make_error_condition(invalid_seek); + case eio: return make_error_condition(io_error); + case eisdir: return make_error_condition(is_a_directory); + case emsgsize: return make_error_condition(message_size); + case enetdown: return make_error_condition(network_down); + case enetreset: return make_error_condition(network_reset); + case enetunreach: return make_error_condition(network_unreachable); + case enobufs: return make_error_condition(no_buffer_space); + case echild: return make_error_condition(no_child_process); + case enolink: return make_error_condition(no_link); + case enolck: return make_error_condition(no_lock_available); + case enodata: return make_error_condition(no_message_available); + case enomsg: return make_error_condition(no_message); + case enoprotoopt: return make_error_condition(no_protocol_option); + case enospc: return make_error_condition(no_space_on_device); + case enosr: return make_error_condition(no_stream_resources); + case enxio: return make_error_condition(no_such_device_or_address); + case enodev: return make_error_condition(no_such_device); + case enoent: return make_error_condition(no_such_file_or_directory); + case esrch: return make_error_condition(no_such_process); + case enotdir: return make_error_condition(not_a_directory); + case enotsock: return make_error_condition(not_a_socket); + case enostr: return make_error_condition(not_a_stream); + case enotconn: return make_error_condition(not_connected); + case enomem: return make_error_condition(not_enough_memory); + case enotsup: return make_error_condition(not_supported); + case ecanceled: return make_error_condition(operation_canceled); + case einprogress: return make_error_condition(operation_in_progress); + case eperm: return make_error_condition(operation_not_permitted); + case eopnotsupp: return make_error_condition(operation_not_supported); + case ewouldblock: return make_error_condition(operation_would_block); + case eownerdead: return make_error_condition(owner_dead); + case eacces: return make_error_condition(permission_denied); + case eproto: return make_error_condition(protocol_error); + case eprotonosupport: return make_error_condition(protocol_not_supported); + case erofs: return make_error_condition(read_only_file_system); + case edeadlk: return make_error_condition(resource_deadlock_would_occur); + case eagain: return make_error_condition(resource_unavailable_try_again); + case erange: return make_error_condition(result_out_of_range); + case enotrecoverable: return make_error_condition(state_not_recoverable); + case etime: return make_error_condition(stream_timeout); + case etxtbsy: return make_error_condition(text_file_busy); + case etimedout: return make_error_condition(timed_out); + case enfile: return make_error_condition(too_many_files_open_in_system); + case emfile: return make_error_condition(too_many_files_open); + case emlink: return make_error_condition(too_many_links); + case eloop: return make_error_condition(too_many_symbolic_link_levels); + case eoverflow: return make_error_condition(value_too_large); + case eprototype: return make_error_condition(wrong_protocol_type); + default: return error_condition(ev,system_category()); + } + } +}; // end system_category_result + + +} // end detail + + +const error_category &generic_category(void) +{ + static const detail::generic_error_category result; + return result; +} + + +const error_category &system_category(void) +{ + static const detail::system_error_category result; + return result; +} + + +} // end system + +} // end thrust + diff --git a/compat/thrust/system/detail/error_code.inl b/compat/thrust/system/detail/error_code.inl new file mode 100644 index 0000000..0cf86b4 --- /dev/null +++ b/compat/thrust/system/detail/error_code.inl @@ -0,0 +1,197 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace thrust +{ + +namespace system +{ + +error_code + ::error_code(void) + :m_val(0),m_cat(&system_category()) +{ + ; +} // end error_code::error_code() + + +error_code + ::error_code(int val, const error_category &cat) + :m_val(val),m_cat(&cat) +{ + ; +} // end error_code::error_code() + + +template + error_code + ::error_code(ErrorCodeEnum e +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + , typename thrust::detail::enable_if::value>::type * +#endif // THRUST_HOST_COMPILER_MSVC + ) +{ + *this = make_error_code(e); +} // end error_code::error_code() + + +void error_code + ::assign(int val, const error_category &cat) +{ + m_val = val; + m_cat = &cat; +} // end error_code::assign() + + +template +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + typename thrust::detail::enable_if::value, error_code>::type & +#else + error_code & +#endif // THRUST_HOST_COMPILER_MSVC + error_code + ::operator=(ErrorCodeEnum e) +{ + *this = make_error_code(e); + return *this; +} // end error_code::operator=() + + +void error_code + ::clear(void) +{ + m_val = 0; + m_cat = &system_category(); +} // end error_code::clear() + + +int error_code + ::value(void) const +{ + return m_val; +} // end error_code::value() + + +const error_category &error_code + ::category(void) const +{ + return *m_cat; +} // end error_code::category() + + +error_condition error_code + ::default_error_condition(void) const +{ + return category().default_error_condition(value()); +} // end error_code::default_error_condition() + + +std::string error_code + ::message(void) const +{ + return category().message(value()); +} // end error_code::message() + + +error_code + ::operator bool (void) const +{ + return value() != 0; +} // end error_code::operator bool () + + +error_code make_error_code(errc::errc_t e) +{ + return error_code(static_cast(e), generic_category()); +} // end make_error_code() + + +bool operator<(const error_code &lhs, const error_code &rhs) +{ + bool result = lhs.category().operator<(rhs.category()); + result = result || lhs.category().operator==(rhs.category()); + result = result || lhs.value() < rhs.value(); + return result; +} // end operator==() + + +template + std::basic_ostream& + operator<<(std::basic_ostream &os, const error_code &ec) +{ + return os << ec.category().name() << ':' << ec.value(); +} // end operator<<() + + +bool operator==(const error_code &lhs, const error_code &rhs) +{ + return lhs.category().operator==(rhs.category()) && lhs.value() == rhs.value(); +} // end operator==() + + +bool operator==(const error_code &lhs, const error_condition &rhs) +{ + return lhs.category().equivalent(lhs.value(), rhs) || rhs.category().equivalent(lhs,rhs.value()); +} // end operator==() + + +bool operator==(const error_condition &lhs, const error_code &rhs) +{ + return rhs.category().equivalent(lhs.value(), lhs) || lhs.category().equivalent(rhs, lhs.value()); +} // end operator==() + + +bool operator==(const error_condition &lhs, const error_condition &rhs) +{ + return lhs.category().operator==(rhs.category()) && lhs.value() == rhs.value(); +} // end operator==() + + +bool operator!=(const error_code &lhs, const error_code &rhs) +{ + return !(lhs == rhs); +} // end operator!=() + + +bool operator!=(const error_code &lhs, const error_condition &rhs) +{ + return !(lhs == rhs); +} // end operator!=() + + +bool operator!=(const error_condition &lhs, const error_code &rhs) +{ + return !(lhs == rhs); +} // end operator!=() + + +bool operator!=(const error_condition &lhs, const error_condition &rhs) +{ + return !(lhs == rhs); +} // end operator!=() + + +} // end system + +} // end thrust + diff --git a/compat/thrust/system/detail/error_condition.inl b/compat/thrust/system/detail/error_condition.inl new file mode 100644 index 0000000..00fbaf0 --- /dev/null +++ b/compat/thrust/system/detail/error_condition.inl @@ -0,0 +1,133 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace system +{ + +error_condition + ::error_condition(void) + :m_val(0),m_cat(&generic_category()) +{ + ; +} // end error_condition::error_condition() + + +error_condition + ::error_condition(int val, const error_category &cat) + :m_val(val),m_cat(&cat) +{ + ; +} // end error_condition::error_condition() + + +template + error_condition + ::error_condition(ErrorConditionEnum e +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + , typename thrust::detail::enable_if::value>::type * +#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + ) +{ + *this = make_error_condition(e); +} // end error_condition::error_condition() + + +void error_condition + ::assign(int val, const error_category &cat) +{ + m_val = val; + m_cat = &cat; +} // end error_category::assign() + + +template +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + typename thrust::detail::enable_if::value, error_condition>::type & +#else + error_condition & +#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + error_condition + ::operator=(ErrorConditionEnum e) +{ + *this = make_error_condition(e); + return *this; +} // end error_condition::operator=() + + +void error_condition + ::clear(void) +{ + m_val = 0; + m_cat = &generic_category(); +} // end error_condition::clear() + + +int error_condition + ::value(void) const +{ + return m_val; +} // end error_condition::value() + + +const error_category &error_condition + ::category(void) const +{ + return *m_cat; +} // end error_condition::category() + + +std::string error_condition + ::message(void) const +{ + return category().message(value()); +} // end error_condition::message() + + +error_condition + ::operator bool (void) const +{ + return value() != 0; +} // end error_condition::operator bool () + + +error_condition make_error_condition(errc::errc_t e) +{ + return error_condition(static_cast(e), generic_category()); +} // end make_error_condition() + + +bool operator<(const error_condition &lhs, + const error_condition &rhs) +{ + return lhs.category().operator<(rhs.category()) || (lhs.category().operator==(rhs.category()) && (lhs.value() < rhs.value())); +} // end operator<() + + +} // end system + +} // end thrust + diff --git a/compat/thrust/system/detail/generic/adjacent_difference.h b/compat/thrust/system/detail/generic/adjacent_difference.h new file mode 100644 index 0000000..bb340df --- /dev/null +++ b/compat/thrust/system/detail/generic/adjacent_difference.h @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file adjacent_difference.h + * \brief Generic implementation of adjacent_difference. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +OutputIterator adjacent_difference(thrust::execution_policy &exec, + InputIterator first, InputIterator last, + OutputIterator result); + +template +OutputIterator adjacent_difference(thrust::execution_policy &exec, + InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/adjacent_difference.inl b/compat/thrust/system/detail/generic/adjacent_difference.inl new file mode 100644 index 0000000..619b29f --- /dev/null +++ b/compat/thrust/system/detail/generic/adjacent_difference.inl @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +OutputIterator adjacent_difference(thrust::execution_policy &exec, + InputIterator first, InputIterator last, + OutputIterator result) +{ + typedef typename thrust::iterator_traits::value_type InputType; + thrust::minus binary_op; + + return thrust::adjacent_difference(exec, first, last, result, binary_op); +} // end adjacent_difference() + +template +OutputIterator adjacent_difference(thrust::execution_policy &exec, + InputIterator first, InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + if(first == last) + { + // empty range, nothing to do + return result; + } + else + { + // an in-place operation is requested, copy the input and call the entry point + // XXX a special-purpose kernel would be faster here since + // only block boundaries need to be copied + thrust::detail::temporary_array input_copy(exec, first, last); + + *result = *first; + thrust::transform(exec, input_copy.begin() + 1, input_copy.end(), input_copy.begin(), result + 1, binary_op); + } + + return result + (last - first); +} + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/advance.h b/compat/thrust/system/detail/generic/advance.h new file mode 100644 index 0000000..249aac7 --- /dev/null +++ b/compat/thrust/system/detail/generic/advance.h @@ -0,0 +1,40 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +void advance(InputIterator& i, Distance n); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/advance.inl b/compat/thrust/system/detail/generic/advance.inl new file mode 100644 index 0000000..b95737a --- /dev/null +++ b/compat/thrust/system/detail/generic/advance.inl @@ -0,0 +1,62 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template +void advance(InputIterator& i, Distance n, thrust::incrementable_traversal_tag) +{ + while(n) + { + ++i; + --n; + } // end while +} // end advance() + +template +void advance(InputIterator& i, Distance n, thrust::random_access_traversal_tag) +{ + i += n; +} // end advance() + +} // end detail + +template +void advance(InputIterator& i, Distance n) +{ + // dispatch on iterator traversal + thrust::system::detail::generic::detail::advance(i, n, + typename thrust::iterator_traversal::type()); +} // end advance() + +} // end namespace detail +} // end namespace generic +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/binary_search.h b/compat/thrust/system/detail/generic/binary_search.h new file mode 100644 index 0000000..7fd6c50 --- /dev/null +++ b/compat/thrust/system/detail/generic/binary_search.h @@ -0,0 +1,156 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file binary_search.h + * \brief Generic implementations of binary search functions. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template +ForwardIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value); + +template +ForwardIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp); + + +template +ForwardIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value); + +template +ForwardIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp); + + +template +bool binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value); + +template +bool binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp); + + +template +OutputIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output); + +template +OutputIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp); + + +template +OutputIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output); + +template +OutputIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp); + + +template +OutputIterator binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output); + +template +OutputIterator binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp); + + +template +thrust::pair +equal_range(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value); + +template +thrust::pair +equal_range(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value, + StrictWeakOrdering comp); + + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/binary_search.inl b/compat/thrust/system/detail/generic/binary_search.inl new file mode 100644 index 0000000..151ac0e --- /dev/null +++ b/compat/thrust/system/detail/generic/binary_search.inl @@ -0,0 +1,342 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file binary_search.inl + * \brief Inline file for binary_search.h + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace thrust +{ +namespace detail +{ + +// XXX WAR circular #inclusion with this forward declaration +template class temporary_array; + +} // end detail +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + + +// short names to avoid nvcc bug +struct lbf +{ + template + __host__ __device__ + typename thrust::iterator_traits::difference_type + operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp) + { + return thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp) - begin; + } +}; + +struct ubf +{ + template + __host__ __device__ + typename thrust::iterator_traits::difference_type + operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp){ + return thrust::system::detail::generic::scalar::upper_bound(begin, end, value, comp) - begin; + } +}; + +struct bsf +{ + template + __host__ __device__ + bool operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp){ + RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp); + + thrust::detail::host_device_function wrapped_comp(comp); + + return iter != end && !wrapped_comp(value, *iter); + } +}; + + +template +struct binary_search_functor +{ + ForwardIterator begin; + ForwardIterator end; + StrictWeakOrdering comp; + BinarySearchFunction func; + + binary_search_functor(ForwardIterator begin, ForwardIterator end, StrictWeakOrdering comp, BinarySearchFunction func) + : begin(begin), end(end), comp(comp), func(func) {} + + template + __host__ __device__ + void operator()(Tuple t) + { + thrust::get<1>(t) = func(begin, end, thrust::get<0>(t), comp); + } +}; // binary_search_functor + + +// Vector Implementation +template +OutputIterator binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp, + BinarySearchFunction func) +{ + thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(values_begin, output)), + thrust::make_zip_iterator(thrust::make_tuple(values_end, output + thrust::distance(values_begin, values_end))), + detail::binary_search_functor(begin, end, comp, func)); + + return output + thrust::distance(values_begin, values_end); +} + + + +// Scalar Implementation +template +OutputType binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp, + BinarySearchFunction func) +{ + // use the vectorized path to implement the scalar version + + // allocate device buffers for value and output + thrust::detail::temporary_array d_value(exec,1); + thrust::detail::temporary_array d_output(exec,1); + + // copy value to device + d_value[0] = value; + + // perform the query + thrust::system::detail::generic::detail::binary_search(exec, begin, end, d_value.begin(), d_value.end(), d_output.begin(), comp, func); + + // copy result to host and return + return d_output[0]; +} + +} // end namespace detail + + +////////////////////// +// Scalar Functions // +////////////////////// + +template +ForwardIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value) +{ + return thrust::lower_bound(exec, begin, end, value, thrust::less()); +} + +template +ForwardIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_traits::difference_type difference_type; + + return begin + detail::binary_search(exec, begin, end, value, comp, detail::lbf()); +} + + +template +ForwardIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value) +{ + return thrust::upper_bound(exec, begin, end, value, thrust::less()); +} + +template +ForwardIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_traits::difference_type difference_type; + + return begin + detail::binary_search(exec, begin, end, value, comp, detail::ubf()); +} + + +template +bool binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value) +{ + return thrust::binary_search(exec, begin, end, value, thrust::less()); +} + +template +bool binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp) +{ + return detail::binary_search(exec, begin, end, value, comp, detail::bsf()); +} + + +////////////////////// +// Vector Functions // +////////////////////// + +template +OutputIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output) +{ + typedef typename thrust::iterator_value::type ValueType; + + return thrust::lower_bound(exec, begin, end, values_begin, values_end, output, thrust::less()); +} + +template +OutputIterator lower_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp) +{ + return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::lbf()); +} + + +template +OutputIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output) +{ + typedef typename thrust::iterator_value::type ValueType; + + return thrust::upper_bound(exec, begin, end, values_begin, values_end, output, thrust::less()); +} + +template +OutputIterator upper_bound(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp) +{ + return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::ubf()); +} + + +template +OutputIterator binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output) +{ + typedef typename thrust::iterator_value::type ValueType; + + return thrust::binary_search(exec, begin, end, values_begin, values_end, output, thrust::less()); +} + +template +OutputIterator binary_search(thrust::execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + InputIterator values_begin, + InputIterator values_end, + OutputIterator output, + StrictWeakOrdering comp) +{ + return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::bsf()); +} + + +template +thrust::pair +equal_range(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const LessThanComparable &value) +{ + return thrust::equal_range(exec, first, last, value, thrust::less()); +} + + +template +thrust::pair +equal_range(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &value, + StrictWeakOrdering comp) +{ + ForwardIterator lb = thrust::lower_bound(exec, first, last, value, comp); + ForwardIterator ub = thrust::upper_bound(exec, first, last, value, comp); + return thrust::make_pair(lb, ub); +} + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/copy.h b/compat/thrust/system/detail/generic/copy.h new file mode 100644 index 0000000..8df98fe --- /dev/null +++ b/compat/thrust/system/detail/generic/copy.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template + OutputIterator copy_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result); + + +} // end generic +} // end detail +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/generic/copy.inl b/compat/thrust/system/detail/generic/copy.inl new file mode 100644 index 0000000..e081015 --- /dev/null +++ b/compat/thrust/system/detail/generic/copy.inl @@ -0,0 +1,80 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type T; + return thrust::transform(exec, first, last, result, thrust::identity()); +} // end copy() + + +template + OutputIterator copy_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + typedef thrust::identity xfrm_type; + + // XXX why do we need to do this? figure out why, and then see if we can do without + typedef typename thrust::detail::unary_transform_functor::type functor_type; + + typedef thrust::tuple iterator_tuple; + typedef thrust::zip_iterator zip_iter; + + zip_iter zipped = thrust::make_zip_iterator(thrust::make_tuple(first,result)); + + return thrust::get<1>(thrust::for_each_n(exec, zipped, n, functor_type(xfrm_type())).get_iterator_tuple()); +} // end copy_n() + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/copy_if.h b/compat/thrust/system/detail/generic/copy_if.h new file mode 100644 index 0000000..183f012 --- /dev/null +++ b/compat/thrust/system/detail/generic/copy_if.h @@ -0,0 +1,62 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator copy_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + +template + OutputIterator copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/copy_if.inl b/compat/thrust/system/detail/generic/copy_if.inl new file mode 100644 index 0000000..145561c --- /dev/null +++ b/compat/thrust/system/detail/generic/copy_if.inl @@ -0,0 +1,155 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template +OutputIterator copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING(IndexType n = thrust::distance(first, last)); + + // compute {0,1} predicates + thrust::detail::temporary_array predicates(exec, n); + thrust::transform(exec, + stencil, + stencil + n, + predicates.begin(), + thrust::detail::predicate_to_integral(pred)); + + // scan {0,1} predicates + thrust::detail::temporary_array scatter_indices(exec, n); + thrust::exclusive_scan(exec, + predicates.begin(), + predicates.end(), + scatter_indices.begin(), + static_cast(0), + thrust::plus()); + + // scatter the true elements + thrust::scatter_if(exec, + first, + last, + scatter_indices.begin(), + predicates.begin(), + result, + thrust::identity()); + + // find the end of the new sequence + IndexType output_size = scatter_indices[n - 1] + predicates[n - 1]; + + return result + output_size; +} + +} // end namespace detail + + +template + OutputIterator copy_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + // XXX it's potentially expensive to send [first,last) twice + // we should probably specialize this case for POD + // since we can safely keep the input in a temporary instead + // of doing two loads + return thrust::copy_if(exec, first, last, first, result, pred); +} // end copy_if() + + +template + OutputIterator copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + typedef typename thrust::iterator_traits::difference_type difference_type; + + // empty sequence + if(first == last) + return result; + + difference_type n = thrust::distance(first, last); + + // create an unsigned version of n (we know n is positive from the comparison above) + // to avoid a warning in the compare below + typename thrust::detail::make_unsigned::type unsigned_n(n); + + // use 32-bit indices when possible (almost always) + if(sizeof(difference_type) > sizeof(unsigned int) && unsigned_n > (std::numeric_limits::max)()) + { + result = detail::copy_if(exec, first, last, stencil, result, pred); + } // end if + else + { + result = detail::copy_if(exec, first, last, stencil, result, pred); + } // end else + + return result; +} // end copy_if() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/count.h b/compat/thrust/system/detail/generic/count.h new file mode 100644 index 0000000..bc4899e --- /dev/null +++ b/compat/thrust/system/detail/generic/count.h @@ -0,0 +1,46 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +typename thrust::iterator_traits::difference_type +count(thrust::execution_policy &exec, InputIterator first, InputIterator last, const EqualityComparable& value); + +template +typename thrust::iterator_traits::difference_type +count_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/count.inl b/compat/thrust/system/detail/generic/count.inl new file mode 100644 index 0000000..e3ab871 --- /dev/null +++ b/compat/thrust/system/detail/generic/count.inl @@ -0,0 +1,75 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +struct count_if_transform +{ + __host__ __device__ + count_if_transform(Predicate _pred) : pred(_pred){} + + __host__ __device__ + CountType operator()(const InputType& val) + { + if(pred(val)) + return 1; + else + return 0; + } // end operator() + + Predicate pred; +}; // end count_if_transform + +template +typename thrust::iterator_traits::difference_type +count(thrust::execution_policy &exec, InputIterator first, InputIterator last, const EqualityComparable& value) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + // XXX use placeholder expression here + return thrust::count_if(exec, first, last, thrust::detail::equal_to_value(value)); +} // end count() + +template +typename thrust::iterator_traits::difference_type +count_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) +{ + typedef typename thrust::iterator_traits::value_type InputType; + typedef typename thrust::iterator_traits::difference_type CountType; + + thrust::system::detail::generic::count_if_transform unary_op(pred); + thrust::plus binary_op; + return thrust::transform_reduce(exec, first, last, unary_op, CountType(0), binary_op); +} // end count_if() + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/distance.h b/compat/thrust/system/detail/generic/distance.h new file mode 100644 index 0000000..80f051c --- /dev/null +++ b/compat/thrust/system/detail/generic/distance.h @@ -0,0 +1,42 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + inline typename thrust::iterator_traits::difference_type + distance(InputIterator first, InputIterator last); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/distance.inl b/compat/thrust/system/detail/generic/distance.inl new file mode 100644 index 0000000..a1fdf14 --- /dev/null +++ b/compat/thrust/system/detail/generic/distance.inl @@ -0,0 +1,69 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template + inline typename thrust::iterator_traits::difference_type + distance(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag) +{ + typename thrust::iterator_traits::difference_type result(0); + + while(first != last) + { + ++first; + ++result; + } // end while + + return result; +} // end advance() + +template + inline typename thrust::iterator_traits::difference_type + distance(InputIterator first, InputIterator last, thrust::random_access_traversal_tag) +{ + return last - first; +} // end distance() + +} // end detail + +template + inline typename thrust::iterator_traits::difference_type + distance(InputIterator first, InputIterator last) +{ + // dispatch on iterator traversal + return thrust::system::detail::generic::detail::distance(first, last, + typename thrust::iterator_traversal::type()); +} // end advance() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/equal.h b/compat/thrust/system/detail/generic/equal.h new file mode 100644 index 0000000..da7d105 --- /dev/null +++ b/compat/thrust/system/detail/generic/equal.h @@ -0,0 +1,43 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2); + +template +bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/equal.inl b/compat/thrust/system/detail/generic/equal.inl new file mode 100644 index 0000000..12b8005 --- /dev/null +++ b/compat/thrust/system/detail/generic/equal.inl @@ -0,0 +1,50 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) +{ + typedef typename thrust::iterator_traits::value_type InputType1; + + return thrust::equal(exec, first1, last1, first2, thrust::detail::equal_to()); +} + +template +bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred) +{ + return thrust::mismatch(exec, first1, last1, first2, binary_pred).first == last1; +} + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/extrema.h b/compat/thrust/system/detail/generic/extrema.h new file mode 100644 index 0000000..abb4ddc --- /dev/null +++ b/compat/thrust/system/detail/generic/extrema.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file extrema.h + * \brief Generic device implementations of extrema functions. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +ForwardIterator max_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + +template +ForwardIterator max_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp); + +template +ForwardIterator min_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + +template +ForwardIterator min_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp); + +template +thrust::pair minmax_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + +template +thrust::pair minmax_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/extrema.inl b/compat/thrust/system/detail/generic/extrema.inl new file mode 100644 index 0000000..b5f92c3 --- /dev/null +++ b/compat/thrust/system/detail/generic/extrema.inl @@ -0,0 +1,244 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file distance.h + * \brief Device implementations for distance. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +////////////// +// Functors // +////////////// + +// return the smaller/larger element making sure to prefer the +// first occurance of the minimum/maximum element +template +struct min_element_reduction +{ + BinaryPredicate comp; + + __host__ __device__ + min_element_reduction(BinaryPredicate comp) : comp(comp){} + + __host__ __device__ + thrust::tuple + operator()(const thrust::tuple& lhs, + const thrust::tuple& rhs ) + { + if(comp(thrust::get<0>(lhs), thrust::get<0>(rhs))) + return lhs; + if(comp(thrust::get<0>(rhs), thrust::get<0>(lhs))) + return rhs; + + // values are equivalent, prefer value with smaller index + if(thrust::get<1>(lhs) < thrust::get<1>(rhs)) + return lhs; + else + return rhs; + } // end operator()() + +}; // end min_element_reduction + + +template +struct max_element_reduction +{ + BinaryPredicate comp; + + __host__ __device__ + max_element_reduction(BinaryPredicate comp) : comp(comp){} + + __host__ __device__ + thrust::tuple + operator()(const thrust::tuple& lhs, + const thrust::tuple& rhs ) + { + if(comp(thrust::get<0>(lhs), thrust::get<0>(rhs))) + return rhs; + if(comp(thrust::get<0>(rhs), thrust::get<0>(lhs))) + return lhs; + + // values are equivalent, prefer value with smaller index + if(thrust::get<1>(lhs) < thrust::get<1>(rhs)) + return lhs; + else + return rhs; + } // end operator()() + +}; // end max_element_reduction + +// return the smaller & larger element making sure to prefer the +// first occurance of the minimum/maximum element +template +struct minmax_element_reduction +{ + BinaryPredicate comp; + + minmax_element_reduction(BinaryPredicate comp) : comp(comp){} + + __host__ __device__ + thrust::tuple< thrust::tuple, thrust::tuple > + operator()(const thrust::tuple< thrust::tuple, thrust::tuple >& lhs, + const thrust::tuple< thrust::tuple, thrust::tuple >& rhs ) + { + + return thrust::make_tuple(min_element_reduction(comp)(thrust::get<0>(lhs), thrust::get<0>(rhs)), + max_element_reduction(comp)(thrust::get<1>(lhs), thrust::get<1>(rhs))); + } // end operator()() +}; // end minmax_element_reduction + +template +struct duplicate_tuple +{ + __host__ __device__ + thrust::tuple< thrust::tuple, thrust::tuple > + operator()(const thrust::tuple& t) + { + return thrust::make_tuple(t, t); + } +}; // end duplicate_tuple + +} // end namespace detail + +template +ForwardIterator min_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + typedef typename thrust::iterator_value::type value_type; + + return thrust::min_element(exec, first, last, thrust::less()); +} // end min_element() + +template +ForwardIterator min_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + if (first == last) + return last; + + typedef typename thrust::iterator_traits::value_type InputType; + typedef typename thrust::iterator_traits::difference_type IndexType; + + thrust::tuple result = + thrust::reduce + (exec, + thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))), + thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))) + (last - first), + thrust::tuple(*first, 0), + detail::min_element_reduction(comp)); + + return first + thrust::get<1>(result); +} // end min_element() + +template +ForwardIterator max_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + typedef typename thrust::iterator_value::type value_type; + + return thrust::max_element(exec, first, last, thrust::less()); +} // end max_element() + +template +ForwardIterator max_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + if (first == last) + return last; + + typedef typename thrust::iterator_traits::value_type InputType; + typedef typename thrust::iterator_traits::difference_type IndexType; + + thrust::tuple result = + thrust::reduce + (exec, + thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))), + thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))) + (last - first), + thrust::tuple(*first, 0), + detail::max_element_reduction(comp)); + + return first + thrust::get<1>(result); +} // end max_element() + +template +thrust::pair minmax_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + typedef typename thrust::iterator_value::type value_type; + + return thrust::minmax_element(exec, first, last, thrust::less()); +} // end minmax_element() + +template +thrust::pair minmax_element(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + if (first == last) + return thrust::make_pair(last, last); + + typedef typename thrust::iterator_traits::value_type InputType; + typedef typename thrust::iterator_traits::difference_type IndexType; + + thrust::tuple< thrust::tuple, thrust::tuple > result = + thrust::transform_reduce + (exec, + thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))), + thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))) + (last - first), + detail::duplicate_tuple(), + detail::duplicate_tuple()(thrust::tuple(*first, 0)), + detail::minmax_element_reduction(comp)); + + return thrust::make_pair(first + thrust::get<1>(thrust::get<0>(result)), first + thrust::get<1>(thrust::get<1>(result))); +} // end minmax_element() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/fill.h b/compat/thrust/system/detail/generic/fill.h new file mode 100644 index 0000000..9745b1c --- /dev/null +++ b/compat/thrust/system/detail/generic/fill.h @@ -0,0 +1,63 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file fill.h + * \brief Device implementation of fill. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator fill_n(thrust::execution_policy &exec, + OutputIterator first, + Size n, + const T &value) +{ + // XXX consider using the placeholder expression _1 = value + return thrust::generate_n(exec, first, n, thrust::detail::fill_functor(value)); +} + +template + void fill(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &value) +{ + // XXX consider using the placeholder expression _1 = value + thrust::generate(exec, first, last, thrust::detail::fill_functor(value)); +} + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/find.h b/compat/thrust/system/detail/generic/find.h new file mode 100644 index 0000000..08888c5 --- /dev/null +++ b/compat/thrust/system/detail/generic/find.h @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +InputIterator find(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + const T& value); + +template +InputIterator find_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred); + +template +InputIterator find_if_not(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/find.inl b/compat/thrust/system/detail/generic/find.inl new file mode 100644 index 0000000..a3414e1 --- /dev/null +++ b/compat/thrust/system/detail/generic/find.inl @@ -0,0 +1,141 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +// Contributed by Erich Elsen + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template +InputIterator find(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + const T& value) +{ + // XXX consider a placeholder expression here + return thrust::find_if(exec, first, last, thrust::detail::equal_to_value(value)); +} // end find() + + +template +struct find_if_functor +{ + __host__ __device__ + TupleType operator()(const TupleType& lhs, const TupleType& rhs) const + { + // select the smallest index among true results + if (thrust::get<0>(lhs) && thrust::get<0>(rhs)) + return TupleType(true, (thrust::min)(thrust::get<1>(lhs), thrust::get<1>(rhs))); + else if (thrust::get<0>(lhs)) + return lhs; + else + return rhs; + } +}; + + +template +InputIterator find_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + typedef typename thrust::iterator_traits::difference_type difference_type; + typedef typename thrust::tuple result_type; + + // empty sequence + if (first == last) + return last; + + const difference_type n = thrust::distance(first, last); + + // this implementation breaks up the sequence into separate intervals + // in an attempt to early-out as soon as a value is found + + // TODO incorporate sizeof(InputType) into interval_threshold and round to multiple of 32 + const difference_type interval_threshold = 1 << 20; + const difference_type interval_size = (std::min)(interval_threshold, n); + + // force transform_iterator output to bool + typedef thrust::transform_iterator XfrmIterator; + typedef thrust::tuple > IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + IteratorTuple iter_tuple = thrust::make_tuple(XfrmIterator(first, pred), + thrust::counting_iterator(0)); + + ZipIterator begin = thrust::make_zip_iterator(iter_tuple); + ZipIterator end = begin + n; + + for(ZipIterator interval_begin = begin; interval_begin < end; interval_begin += interval_size) + { + ZipIterator interval_end = interval_begin + interval_size; + if(end < interval_end) + { + interval_end = end; + } // end if + + result_type result = thrust::reduce(exec, + interval_begin, interval_end, + result_type(false,interval_end - begin), + find_if_functor()); + + // see if we found something + if (thrust::get<0>(result)) + { + return first + thrust::get<1>(result); + } + } + + //nothing was found if we reach here... + return first + n; +} + + +template +InputIterator find_if_not(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + return thrust::find_if(exec, first, last, thrust::detail::not1(pred)); +} // end find() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/for_each.h b/compat/thrust/system/detail/generic/for_each.h new file mode 100644 index 0000000..61abe20 --- /dev/null +++ b/compat/thrust/system/detail/generic/for_each.h @@ -0,0 +1,72 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file for_each.h + * \brief Generic implementation of for_each & for_each_n. + * It is an error to call these functions; they have no implementation. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template +InputIterator for_each(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + UnaryFunction f) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return first; +} // end for_each() + + +template +InputIterator for_each_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + UnaryFunction f) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return first; +} // end for_each_n() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/gather.h b/compat/thrust/system/detail/generic/gather.h new file mode 100644 index 0000000..cfb6f85 --- /dev/null +++ b/compat/thrust/system/detail/generic/gather.h @@ -0,0 +1,78 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator gather(thrust::execution_policy &exec, + InputIterator map_first, + InputIterator map_last, + RandomAccessIterator input_first, + OutputIterator result); + + +template + OutputIterator gather_if(thrust::execution_policy &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result); + + +template + OutputIterator gather_if(thrust::execution_policy &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result, + Predicate pred); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/gather.inl b/compat/thrust/system/detail/generic/gather.inl new file mode 100644 index 0000000..ab2cdd8 --- /dev/null +++ b/compat/thrust/system/detail/generic/gather.inl @@ -0,0 +1,102 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputIterator gather(thrust::execution_policy &exec, + InputIterator map_first, + InputIterator map_last, + RandomAccessIterator input_first, + OutputIterator result) +{ + return thrust::transform(exec, + thrust::make_permutation_iterator(input_first, map_first), + thrust::make_permutation_iterator(input_first, map_last), + result, + thrust::identity::type>()); +} // end gather() + + +template + OutputIterator gather_if(thrust::execution_policy &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type StencilType; + return thrust::gather_if(exec, + map_first, + map_last, + stencil, + input_first, + result, + thrust::identity()); +} // end gather_if() + + +template + OutputIterator gather_if(thrust::execution_policy &exec, + InputIterator1 map_first, + InputIterator1 map_last, + InputIterator2 stencil, + RandomAccessIterator input_first, + OutputIterator result, + Predicate pred) +{ + typedef typename thrust::iterator_value::type InputType; + return thrust::transform_if(exec, + thrust::make_permutation_iterator(input_first, map_first), + thrust::make_permutation_iterator(input_first, map_last), + stencil, + result, + thrust::identity(), + pred); +} // end gather_if() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/generate.h b/compat/thrust/system/detail/generic/generate.h new file mode 100644 index 0000000..e7a8e00 --- /dev/null +++ b/compat/thrust/system/detail/generic/generate.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void generate(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Generator gen); + +template + OutputIterator generate_n(thrust::execution_policy &exec, + OutputIterator first, + Size n, + Generator gen); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/generate.inl b/compat/thrust/system/detail/generic/generate.inl new file mode 100644 index 0000000..4da5763 --- /dev/null +++ b/compat/thrust/system/detail/generic/generate.inl @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void generate(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Generator gen) +{ + thrust::for_each(exec, first, last, typename thrust::detail::generate_functor::type(gen)); +} // end generate() + +template + OutputIterator generate_n(thrust::execution_policy &exec, + OutputIterator first, + Size n, + Generator gen) +{ + return thrust::for_each_n(exec, first, n, typename thrust::detail::generate_functor::type(gen)); +} // end generate() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/inner_product.h b/compat/thrust/system/detail/generic/inner_product.h new file mode 100644 index 0000000..9ac5c69 --- /dev/null +++ b/compat/thrust/system/detail/generic/inner_product.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputType inner_product(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init); + +template +OutputType inner_product(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init, + BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/inner_product.inl b/compat/thrust/system/detail/generic/inner_product.inl new file mode 100644 index 0000000..b6a339e --- /dev/null +++ b/compat/thrust/system/detail/generic/inner_product.inl @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template +OutputType inner_product(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init) +{ + thrust::plus binary_op1; + thrust::multiplies binary_op2; + return thrust::inner_product(exec, first1, last1, first2, init, binary_op1, binary_op2); +} // end inner_product() + + +template +OutputType inner_product(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputType init, + BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2) +{ + typedef thrust::zip_iterator > ZipIter; + + ZipIter first = thrust::make_zip_iterator(thrust::make_tuple(first1,first2)); + + // only the first iterator in the tuple is relevant for the purposes of last + ZipIter last = thrust::make_zip_iterator(thrust::make_tuple(last1, first2)); + + return thrust::transform_reduce(exec, first, last, thrust::detail::zipped_binary_op(binary_op2), init, binary_op1); +} // end inner_product() + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/logical.h b/compat/thrust/system/detail/generic/logical.h new file mode 100644 index 0000000..e0d01e3 --- /dev/null +++ b/compat/thrust/system/detail/generic/logical.h @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template +bool all_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) +{ + return thrust::find_if(exec, first, last, thrust::detail::not1(pred)) == last; +} + +template +bool any_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) +{ + return thrust::find_if(exec, first, last, pred) != last; +} + +template +bool none_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) +{ + return !thrust::any_of(exec, first, last, pred); +} + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/memory.h b/compat/thrust/system/detail/generic/memory.h new file mode 100644 index 0000000..c0fe623 --- /dev/null +++ b/compat/thrust/system/detail/generic/memory.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file generic/memory.h + * \brief Generic implementation of memory functions. + * Calling some of these is an error. They have no implementation. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template void malloc(thrust::execution_policy &, Size); + +template +thrust::pointer malloc(thrust::execution_policy &s, std::size_t n); + +template void free(thrust::execution_policy &, Pointer); + +template +__host__ __device__ +void assign_value(tag, Pointer1, Pointer2); + +template +__host__ __device__ +void get_value(thrust::execution_policy &, Pointer); + +template +__host__ __device__ +void iter_swap(tag, Pointer1, Pointer2); + +} // end generic +} // end detail +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/generic/memory.inl b/compat/thrust/system/detail/generic/memory.inl new file mode 100644 index 0000000..f89a763 --- /dev/null +++ b/compat/thrust/system/detail/generic/memory.inl @@ -0,0 +1,92 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void malloc(thrust::execution_policy &, Size) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} + + +template + thrust::pointer + malloc(thrust::execution_policy &exec, std::size_t n) +{ + thrust::pointer void_ptr = thrust::malloc(exec, sizeof(T) * n); + + return pointer(static_cast(void_ptr.get())); +} // end malloc() + + +template + void free(thrust::execution_policy &, Pointer) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} + + +template +__host__ __device__ +void assign_value(thrust::execution_policy &, Pointer1, Pointer2) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} + + +template +__host__ __device__ +void get_value(thrust::execution_policy &, Pointer) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} + + +template +__host__ __device__ +void iter_swap(tag, Pointer1, Pointer2) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/merge.h b/compat/thrust/system/detail/generic/merge.h new file mode 100644 index 0000000..5f0b996 --- /dev/null +++ b/compat/thrust/system/detail/generic/merge.h @@ -0,0 +1,87 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +// XXX calling this function is an error; there is no implementation +template + OutputIterator merge(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + + +template + OutputIterator merge(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +template + thrust::pair + merge_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + Compare comp); + + +template + thrust::pair + merge_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/merge.inl b/compat/thrust/system/detail/generic/merge.inl new file mode 100644 index 0000000..b913611 --- /dev/null +++ b/compat/thrust/system/detail/generic/merge.inl @@ -0,0 +1,125 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator merge(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end merge() + + +template + OutputIterator merge(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::merge(exec,first1,last1,first2,last2,result,thrust::less()); +} // end merge() + + +template + thrust::pair + merge_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + Compare comp) +{ + typedef thrust::tuple iterator_tuple1; + typedef thrust::tuple iterator_tuple2; + typedef thrust::tuple iterator_tuple3; + + typedef thrust::zip_iterator zip_iterator1; + typedef thrust::zip_iterator zip_iterator2; + typedef thrust::zip_iterator zip_iterator3; + + zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); + zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); + + zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); + zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); + + zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); + + thrust::detail::compare_first comp_first(comp); + + iterator_tuple3 result = thrust::merge(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); + + return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); +} // end merge_by_key() + + +template + thrust::pair + merge_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::merge_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); +} // end merge_by_key() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/mismatch.h b/compat/thrust/system/detail/generic/mismatch.h new file mode 100644 index 0000000..dc581ff --- /dev/null +++ b/compat/thrust/system/detail/generic/mismatch.h @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + thrust::pair + mismatch(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2); + + +template + thrust::pair + mismatch(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + BinaryPredicate pred); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/mismatch.inl b/compat/thrust/system/detail/generic/mismatch.inl new file mode 100644 index 0000000..923c27f --- /dev/null +++ b/compat/thrust/system/detail/generic/mismatch.inl @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + thrust::pair + mismatch(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2) +{ + typedef typename thrust::iterator_value::type InputType1; + + // XXX use a placeholder expression here + return thrust::mismatch(exec, first1, last1, first2, thrust::detail::equal_to()); +} // end mismatch() + +template + thrust::pair + mismatch(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + BinaryPredicate pred) +{ + // Contributed by Erich Elsen + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first1,first2)); + ZipIterator zipped_last = thrust::make_zip_iterator(thrust::make_tuple(last1, first2)); + + ZipIterator result = thrust::find_if_not(exec, zipped_first, zipped_last, thrust::detail::tuple_binary_predicate(pred)); + + return thrust::make_pair(thrust::get<0>(result.get_iterator_tuple()), + thrust::get<1>(result.get_iterator_tuple())); +} // end mismatch() + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/partition.h b/compat/thrust/system/detail/generic/partition.h new file mode 100644 index 0000000..63daa1d --- /dev/null +++ b/compat/thrust/system/detail/generic/partition.h @@ -0,0 +1,150 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file partition.h + * \brief Generic implementations of partition functions. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + ForwardIterator stable_partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + +template + ForwardIterator stable_partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + +template + thrust::pair + stable_partition_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + +template + thrust::pair + stable_partition_copy(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + +template + ForwardIterator partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + +template + ForwardIterator partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + +template + thrust::pair + partition_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + +template + thrust::pair + partition_copy(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + +template + ForwardIterator partition_point(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + +template + bool is_partitioned(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/partition.inl b/compat/thrust/system/detail/generic/partition.inl new file mode 100644 index 0000000..3298afc --- /dev/null +++ b/compat/thrust/system/detail/generic/partition.inl @@ -0,0 +1,238 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + ForwardIterator stable_partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + // copy input to temp buffer + thrust::detail::temporary_array temp(exec, first, last); + + // count the size of the true partition + typename thrust::iterator_difference::type num_true = thrust::count_if(exec, first,last,pred); + + // point to the beginning of the false partition + ForwardIterator out_false = first; + thrust::advance(out_false, num_true); + + return thrust::stable_partition_copy(exec, temp.begin(), temp.end(), first, out_false, pred).first; +} // end stable_partition() + + +template + ForwardIterator stable_partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + // copy input to temp buffer + thrust::detail::temporary_array temp(exec, first, last); + + // count the size of the true partition + InputIterator stencil_last = stencil; + thrust::advance(stencil_last, temp.size()); + typename thrust::iterator_difference::type num_true = thrust::count_if(exec, stencil, stencil_last, pred); + + // point to the beginning of the false partition + ForwardIterator out_false = first; + thrust::advance(out_false, num_true); + + return thrust::stable_partition_copy(exec, temp.begin(), temp.end(), stencil, first, out_false, pred).first; +} // end stable_partition() + + +template + thrust::pair + stable_partition_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + thrust::detail::unary_negate not_pred(pred); + + // remove_copy_if the true partition to out_true + OutputIterator1 end_of_true_partition = thrust::remove_copy_if(exec, first, last, out_true, not_pred); + + // remove_copy_if the false partition to out_false + OutputIterator2 end_of_false_partition = thrust::remove_copy_if(exec, first, last, out_false, pred); + + return thrust::make_pair(end_of_true_partition, end_of_false_partition); +} // end stable_partition_copy() + + +template + thrust::pair + stable_partition_copy(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + thrust::detail::unary_negate not_pred(pred); + + // remove_copy_if the true partition to out_true + OutputIterator1 end_of_true_partition = thrust::remove_copy_if(exec, first, last, stencil, out_true, not_pred); + + // remove_copy_if the false partition to out_false + OutputIterator2 end_of_false_partition = thrust::remove_copy_if(exec, first, last, stencil, out_false, pred); + + return thrust::make_pair(end_of_true_partition, end_of_false_partition); +} // end stable_partition_copy() + + +template + ForwardIterator partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + return thrust::stable_partition(exec, first, last, pred); +} // end partition() + + +template + ForwardIterator partition(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + return thrust::stable_partition(exec, first, last, stencil, pred); +} // end partition() + + +template + thrust::pair + partition_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + return thrust::stable_partition_copy(exec,first,last,out_true,out_false,pred); +} // end partition_copy() + + +template + thrust::pair + partition_copy(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + return thrust::stable_partition_copy(exec,first,last,stencil,out_true,out_false,pred); +} // end partition_copy() + + +template + ForwardIterator partition_point(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + return thrust::find_if_not(exec, first, last, pred); +} // end partition_point() + + +template + bool is_partitioned(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + return thrust::is_sorted(exec, + thrust::make_transform_iterator(first, thrust::detail::not1(pred)), + thrust::make_transform_iterator(last, thrust::detail::not1(pred))); +} // end is_partitioned() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/reduce.h b/compat/thrust/system/detail/generic/reduce.h new file mode 100644 index 0000000..2811df1 --- /dev/null +++ b/compat/thrust/system/detail/generic/reduce.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + typename thrust::iterator_traits::value_type + reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last); + +template + T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init); + +template + T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init, BinaryFunction binary_op); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/reduce.inl b/compat/thrust/system/detail/generic/reduce.inl new file mode 100644 index 0000000..8f52385 --- /dev/null +++ b/compat/thrust/system/detail/generic/reduce.inl @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + typename thrust::iterator_traits::value_type + reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last) +{ + typedef typename thrust::iterator_value::type InputType; + + // use InputType(0) as init by default + return thrust::reduce(exec, first, last, InputType(0)); +} // end reduce() + + +template + T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init) +{ + // use plus by default + return thrust::reduce(exec, first, last, init, thrust::plus()); +} // end reduce() + + +template + OutputType reduce(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + OutputType init, + BinaryFunction binary_op) +{ + // unimplemented + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return OutputType(); +} // end reduce() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/reduce_by_key.h b/compat/thrust/system/detail/generic/reduce_by_key.h new file mode 100644 index 0000000..c6064ab --- /dev/null +++ b/compat/thrust/system/detail/generic/reduce_by_key.h @@ -0,0 +1,86 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + thrust::pair + reduce_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output); + +template + thrust::pair + reduce_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred); + +template + thrust::pair + reduce_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/reduce_by_key.inl b/compat/thrust/system/detail/generic/reduce_by_key.inl new file mode 100644 index 0000000..2ca21a5 --- /dev/null +++ b/compat/thrust/system/detail/generic/reduce_by_key.inl @@ -0,0 +1,212 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce_by_key.inl + * \brief Inline file for reduce_by_key.h. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template +struct reduce_by_key_functor +{ + AssociativeOperator binary_op; + + typedef typename thrust::tuple result_type; + + __host__ __device__ + reduce_by_key_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} + + __host__ __device__ + result_type operator()(result_type a, result_type b) + { + return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), + thrust::get<1>(a) | thrust::get<1>(b)); + } +}; + +} // end namespace detail + + +template + thrust::pair + reduce_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_traits::difference_type difference_type; + typedef typename thrust::iterator_traits::value_type KeyType; + + typedef unsigned int FlagType; // TODO use difference_type + + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator2 is a "pure" output iterator + // TemporaryType = InputIterator2::value_type + // else + // TemporaryType = OutputIterator2::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + typedef typename thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + if (keys_first == keys_last) + return thrust::make_pair(keys_output, values_output); + + // input size + difference_type n = keys_last - keys_first; + + InputIterator2 values_last = values_first + n; + + // compute head flags + thrust::detail::temporary_array head_flags(exec, n); + thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, head_flags.begin() + 1, thrust::detail::not2(binary_pred)); + head_flags[0] = 1; + + // compute tail flags + thrust::detail::temporary_array tail_flags(exec, n); //COPY INSTEAD OF TRANSFORM + thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, tail_flags.begin(), thrust::detail::not2(binary_pred)); + tail_flags[n-1] = 1; + + // scan the values by flag + thrust::detail::temporary_array scanned_values(exec, n); + thrust::detail::temporary_array scanned_tail_flags(exec, n); + + thrust::inclusive_scan + (exec, + thrust::make_zip_iterator(thrust::make_tuple(values_first, head_flags.begin())), + thrust::make_zip_iterator(thrust::make_tuple(values_last, head_flags.end())), + thrust::make_zip_iterator(thrust::make_tuple(scanned_values.begin(), scanned_tail_flags.begin())), + detail::reduce_by_key_functor(binary_op)); + + thrust::exclusive_scan(exec, tail_flags.begin(), tail_flags.end(), scanned_tail_flags.begin(), FlagType(0), thrust::plus()); + + // number of unique keys + FlagType N = scanned_tail_flags[n - 1] + 1; + + // scatter the keys and accumulated values + thrust::scatter_if(exec, keys_first, keys_last, scanned_tail_flags.begin(), head_flags.begin(), keys_output); + thrust::scatter_if(exec, scanned_values.begin(), scanned_values.end(), scanned_tail_flags.begin(), tail_flags.begin(), values_output); + + return thrust::make_pair(keys_output + N, values_output + N); +} // end reduce_by_key() + + +template + thrust::pair + reduce_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output) +{ + typedef typename thrust::iterator_value::type KeyType; + + // use equal_to as default BinaryPredicate + return thrust::reduce_by_key(exec, keys_first, keys_last, values_first, keys_output, values_output, thrust::equal_to()); +} // end reduce_by_key() + + +template + thrust::pair + reduce_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + typedef typename thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + >::type T; + + // use plus as default BinaryFunction + return thrust::reduce_by_key(exec, + keys_first, keys_last, + values_first, + keys_output, + values_output, + binary_pred, + thrust::plus()); +} // end reduce_by_key() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/remove.h b/compat/thrust/system/detail/generic/remove.h new file mode 100644 index 0000000..e236735 --- /dev/null +++ b/compat/thrust/system/detail/generic/remove.h @@ -0,0 +1,100 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file remove.h + * \brief Generic implementations of remove functions. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + ForwardIterator remove(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &value); + +template + OutputIterator remove_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + const T &value); + +template + ForwardIterator remove_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + +template + ForwardIterator remove_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + +template + OutputIterator remove_copy_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + +template + OutputIterator remove_copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/remove.inl b/compat/thrust/system/detail/generic/remove.inl new file mode 100644 index 0000000..8a533e0 --- /dev/null +++ b/compat/thrust/system/detail/generic/remove.inl @@ -0,0 +1,144 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file remove.inl + * \brief Inline file for remove.h + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + ForwardIterator remove(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &value) +{ + thrust::detail::equal_to_value pred(value); + + // XXX consider using a placeholder here + return thrust::remove_if(exec, first, last, pred); +} // end remove() + + +template + OutputIterator remove_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + const T &value) +{ + thrust::detail::equal_to_value pred(value); + + // XXX consider using a placeholder here + return thrust::remove_copy_if(exec, first, last, result, pred); +} // end remove_copy() + + +template + ForwardIterator remove_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + // create temporary storage for an intermediate result + thrust::detail::temporary_array temp(exec, first, last); + + // remove into temp + return thrust::remove_copy_if(exec, temp.begin(), temp.end(), temp.begin(), first, pred); +} // end remove_if() + + +template + ForwardIterator remove_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + // create temporary storage for an intermediate result + thrust::detail::temporary_array temp(exec, first, last); + + // remove into temp + return thrust::remove_copy_if(exec, temp.begin(), temp.end(), stencil, first, pred); +} // end remove_if() + + +template + OutputIterator remove_copy_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + return thrust::remove_copy_if(exec, first, last, first, result, pred); +} // end remove_copy_if() + + +template + OutputIterator remove_copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + return thrust::copy_if(exec, first, last, stencil, result, thrust::detail::not1(pred)); +} // end remove_copy_if() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/replace.h b/compat/thrust/system/detail/generic/replace.h new file mode 100644 index 0000000..deb2e55 --- /dev/null +++ b/compat/thrust/system/detail/generic/replace.h @@ -0,0 +1,92 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator replace_copy_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred, + const T &new_value); + + +template + OutputIterator replace_copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred, + const T &new_value); + + +template + OutputIterator replace_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + const T &old_value, + const T &new_value); + + +template + void replace_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred, + const T &new_value); + + +template + void replace_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred, + const T &new_value); + + +template + void replace(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &old_value, + const T &new_value); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/replace.inl b/compat/thrust/system/detail/generic/replace.inl new file mode 100644 index 0000000..52e7118 --- /dev/null +++ b/compat/thrust/system/detail/generic/replace.inl @@ -0,0 +1,168 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +// this functor receives x, and returns a new_value if predicate(x) is true; otherwise, +// it returns x +template + struct new_value_if +{ + new_value_if(Predicate p, NewType nv):pred(p),new_value(nv){} + + template + __host__ __device__ + OutputType operator()(const InputType x) const + { + return pred(x) ? new_value : x; + } // end operator()() + + // this version of operator()() works like the previous but + // feeds its second argument to pred + template + __host__ __device__ + OutputType operator()(const InputType x, const PredicateArgumentType y) + { + return pred(y) ? new_value : x; + } // end operator()() + + Predicate pred; + NewType new_value; +}; // end new_value_if + +// this unary functor ignores its argument and returns a constant +template + struct constant_unary +{ + constant_unary(T _c):c(_c){} + + template + __host__ __device__ + T operator()(U &x) + { + return c; + } // end operator()() + + T c; +}; // end constant_unary + +} // end detail + +template + OutputIterator replace_copy_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred, + const T &new_value) +{ + typedef typename thrust::iterator_traits::value_type InputType; + typedef typename thrust::iterator_traits::value_type OutputType; + + detail::new_value_if op(pred,new_value); + return thrust::transform(exec, first, last, result, op); +} // end replace_copy_if() + +template + OutputIterator replace_copy_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred, + const T &new_value) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + + detail::new_value_if op(pred,new_value); + return thrust::transform(exec, first, last, stencil, result, op); +} // end replace_copy_if() + + +template + OutputIterator replace_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + const T &old_value, + const T &new_value) +{ + thrust::detail::equal_to_value pred(old_value); + return thrust::replace_copy_if(exec, first, last, result, pred, new_value); +} // end replace_copy() + +template + void replace_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred, + const T &new_value) +{ + detail::constant_unary f(new_value); + + // XXX replace this with generate_if: + // constant_nullary f(new_value); + // generate_if(first, last, first, f, pred); + thrust::transform_if(exec, first, last, first, first, f, pred); +} // end replace_if() + +template + void replace_if(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred, + const T &new_value) +{ + detail::constant_unary f(new_value); + + // XXX replace this with generate_if: + // constant_nullary f(new_value); + // generate_if(stencil, stencil + n, first, f, pred); + thrust::transform_if(exec, first, last, stencil, first, f, pred); +} // end replace_if() + +template + void replace(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &old_value, + const T &new_value) +{ + thrust::detail::equal_to_value pred(old_value); + return thrust::replace_if(exec, first, last, pred, new_value); +} // end replace() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/reverse.h b/compat/thrust/system/detail/generic/reverse.h new file mode 100644 index 0000000..327bf22 --- /dev/null +++ b/compat/thrust/system/detail/generic/reverse.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void reverse(thrust::execution_policy &exec, + BidirectionalIterator first, + BidirectionalIterator last); + +template + OutputIterator reverse_copy(thrust::execution_policy &exec, + BidirectionalIterator first, + BidirectionalIterator last, + OutputIterator result); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/reverse.inl b/compat/thrust/system/detail/generic/reverse.inl new file mode 100644 index 0000000..27c1bbf --- /dev/null +++ b/compat/thrust/system/detail/generic/reverse.inl @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void reverse(thrust::execution_policy &exec, + BidirectionalIterator first, + BidirectionalIterator last) +{ + typedef typename thrust::iterator_difference::type difference_type; + + // find the midpoint of [first,last) + difference_type N = thrust::distance(first, last); + BidirectionalIterator mid(first); + thrust::advance(mid, N / 2); + + // swap elements of [first,mid) with [last - 1, mid) + thrust::swap_ranges(exec, first, mid, thrust::make_reverse_iterator(last)); +} // end reverse() + +template + OutputIterator reverse_copy(thrust::execution_policy &exec, + BidirectionalIterator first, + BidirectionalIterator last, + OutputIterator result) +{ + return thrust::copy(exec, + thrust::make_reverse_iterator(last), + thrust::make_reverse_iterator(first), + result); +} // end reverse_copy() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + + diff --git a/compat/thrust/system/detail/generic/scalar/binary_search.h b/compat/thrust/system/detail/generic/scalar/binary_search.h new file mode 100644 index 0000000..6ed9e8d --- /dev/null +++ b/compat/thrust/system/detail/generic/scalar/binary_search.h @@ -0,0 +1,85 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ + +namespace system +{ + +namespace detail +{ + +namespace generic +{ + +namespace scalar +{ + +template +__host__ __device__ +RandomAccessIterator lower_bound_n(RandomAccessIterator first, + Size n, + const T &val, + BinaryPredicate comp); + +template +__host__ __device__ +RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last, + const T &val, + BinaryPredicate comp); + +template +__host__ __device__ +RandomAccessIterator upper_bound_n(RandomAccessIterator first, + Size n, + const T &val, + BinaryPredicate comp); + +template +__host__ __device__ +RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last, + const T &val, + BinaryPredicate comp); + +template +__host__ __device__ + pair + equal_range(RandomAccessIterator first, RandomAccessIterator last, + const T &val, + BinaryPredicate comp); + +template +__host__ __device__ +bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp); + +} // end scalar + +} // end generic + +} // end detail + +} // end system + +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/generic/scalar/binary_search.inl b/compat/thrust/system/detail/generic/scalar/binary_search.inl new file mode 100644 index 0000000..5a9d379 --- /dev/null +++ b/compat/thrust/system/detail/generic/scalar/binary_search.inl @@ -0,0 +1,159 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace system +{ + +namespace detail +{ + +namespace generic +{ + +namespace scalar +{ + +template +__host__ __device__ +RandomAccessIterator lower_bound_n(RandomAccessIterator first, + Size n, + const T &val, + BinaryPredicate comp) +{ + // wrap comp + thrust::detail::host_device_function< + BinaryPredicate, + bool + > wrapped_comp(comp); + + Size start = 0, i; + while(start < n) + { + i = (start + n) / 2; + if(wrapped_comp(first[i], val)) + { + start = i + 1; + } + else + { + n = i; + } + } // end while + + return first + start; +} + +// XXX generalize these upon implementation of scalar::distance & scalar::advance + +template +__host__ __device__ +RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last, + const T &val, + BinaryPredicate comp) +{ + typename thrust::iterator_difference::type n = last - first; + return lower_bound_n(first, n, val, comp); +} + +template +__host__ __device__ +RandomAccessIterator upper_bound_n(RandomAccessIterator first, + Size n, + const T &val, + BinaryPredicate comp) +{ + // wrap comp + thrust::detail::host_device_function< + BinaryPredicate, + bool + > wrapped_comp(comp); + + Size start = 0, i; + while(start < n) + { + i = (start + n) / 2; + if(wrapped_comp(val, first[i])) + { + n = i; + } + else + { + start = i + 1; + } + } // end while + + return first + start; +} + +template +__host__ __device__ +RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last, + const T &val, + BinaryPredicate comp) +{ + typename thrust::iterator_difference::type n = last - first; + return upper_bound_n(first, n, val, comp); +} + +template +__host__ __device__ + pair + equal_range(RandomAccessIterator first, RandomAccessIterator last, + const T &val, + BinaryPredicate comp) +{ + RandomAccessIterator lb = thrust::system::detail::generic::scalar::lower_bound(first, last, val, comp); + return thrust::make_pair(lb, thrust::system::detail::generic::scalar::upper_bound(lb, last, val, comp)); +} + + +template +__host__ __device__ +bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp) +{ + RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(first, last, value, comp); + + // wrap comp + thrust::detail::host_device_function< + Compare, + bool + > wrapped_comp(comp); + + return iter != last && !wrapped_comp(value,*iter); +} + +} // end scalar + +} // end generic + +} // end detail + +} // end system + +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/generic/scan.h b/compat/thrust/system/detail/generic/scan.h new file mode 100644 index 0000000..205f87f --- /dev/null +++ b/compat/thrust/system/detail/generic/scan.h @@ -0,0 +1,94 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator inclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +// XXX it is an error to call this function; it has no implementation +template + OutputIterator inclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op); + + +template + OutputIterator exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template + OutputIterator exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init); + + +// XXX it is an error to call this function; it has no implementation +template + OutputIterator exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/scan.inl b/compat/thrust/system/detail/generic/scan.inl new file mode 100644 index 0000000..33e0803 --- /dev/null +++ b/compat/thrust/system/detail/generic/scan.inl @@ -0,0 +1,144 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator inclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + + typedef typename thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + >::type ValueType; + + // assume plus as the associative operator + return thrust::inclusive_scan(exec, first, last, result, thrust::plus()); +} // end inclusive_scan() + + +template + OutputIterator exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + + typedef typename thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + >::type ValueType; + + // assume 0 as the initialization value + return thrust::exclusive_scan(exec, first, last, result, ValueType(0)); +} // end exclusive_scan() + + +template + OutputIterator exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init) +{ + // assume plus as the associative operator + return thrust::exclusive_scan(exec, first, last, result, init, thrust::plus()); +} // end exclusive_scan() + + +template + OutputIterator inclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end inclusive_scan + + +template + OutputIterator exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end exclusive_scan() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/scan_by_key.h b/compat/thrust/system/detail/generic/scan_by_key.h new file mode 100644 index 0000000..160121b --- /dev/null +++ b/compat/thrust/system/detail/generic/scan_by_key.h @@ -0,0 +1,137 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan_by_key.h + * \brief Generic implementations of key-value scans. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result); + + +template + OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred); + + +template + OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + AssociativeOperator binary_op); + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result); + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init); + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred); + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + AssociativeOperator binary_op); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/scan_by_key.inl b/compat/thrust/system/detail/generic/scan_by_key.inl new file mode 100644 index 0000000..d866dde --- /dev/null +++ b/compat/thrust/system/detail/generic/scan_by_key.inl @@ -0,0 +1,239 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template +struct segmented_scan_functor +{ + AssociativeOperator binary_op; + + typedef typename thrust::tuple result_type; + + __host__ __device__ + segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} + + __host__ __device__ + result_type operator()(result_type a, result_type b) + { + return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), + thrust::get<1>(a) | thrust::get<1>(b)); + } +}; + +} // end namespace detail + + +template + OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result) +{ + typedef typename thrust::iterator_traits::value_type InputType1; + return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, thrust::equal_to()); +} + + +template + OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, binary_pred, thrust::plus()); +} + + +template + OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + AssociativeOperator binary_op) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + typedef unsigned int HeadFlagType; + + const size_t n = last1 - first1; + + if(n != 0) + { + // compute head flags + thrust::detail::temporary_array flags(exec, n); + flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred)); + + // scan key-flag tuples, + // For additional details refer to Section 2 of the following paper + // S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs" + // NVIDIA Technical Report NVR-2008-003, December 2008 + // http://mgarland.org/files/papers/nvr-2008-003.pdf + thrust::inclusive_scan + (exec, + thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())), + thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())) + n, + thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())), + detail::segmented_scan_functor(binary_op)); + } + + return result + n; +} + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, OutputType(0)); +} + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init) +{ + typedef typename thrust::iterator_traits::value_type InputType1; + return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, thrust::equal_to()); +} + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, binary_pred, thrust::plus()); +} + + +template + OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + AssociativeOperator binary_op) +{ + typedef typename thrust::iterator_traits::value_type OutputType; + typedef unsigned int HeadFlagType; + + const size_t n = last1 - first1; + + if(n != 0) + { + InputIterator2 last2 = first2 + n; + + // compute head flags + thrust::detail::temporary_array flags(exec, n); + flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred)); + + // shift input one to the right and initialize segments with init + thrust::detail::temporary_array temp(exec, n); + thrust::replace_copy_if(exec, first2, last2 - 1, flags.begin() + 1, temp.begin() + 1, thrust::negate(), init); + temp[0] = init; + + // scan key-flag tuples, + // For additional details refer to Section 2 of the following paper + // S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs" + // NVIDIA Technical Report NVR-2008-003, December 2008 + // http://mgarland.org/files/papers/nvr-2008-003.pdf + thrust::inclusive_scan(exec, + thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())), + thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())) + n, + thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())), + detail::segmented_scan_functor(binary_op)); + } + + return result + n; +} + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/scatter.h b/compat/thrust/system/detail/generic/scatter.h new file mode 100644 index 0000000..858d11a --- /dev/null +++ b/compat/thrust/system/detail/generic/scatter.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void scatter(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + RandomAccessIterator output); + + +template + void scatter_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output); + + +template + void scatter_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output, + Predicate pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/scatter.inl b/compat/thrust/system/detail/generic/scatter.inl new file mode 100644 index 0000000..8c40359 --- /dev/null +++ b/compat/thrust/system/detail/generic/scatter.inl @@ -0,0 +1,93 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void scatter(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + RandomAccessIterator output) +{ + thrust::transform(exec, + first, + last, + thrust::make_permutation_iterator(output, map), + thrust::identity::type>()); +} // end scatter() + + +template + void scatter_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output) +{ + // default predicate is identity + typedef typename thrust::iterator_value::type StencilType; + thrust::scatter_if(exec, first, last, map, stencil, output, thrust::identity()); +} // end scatter_if() + + +template + void scatter_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 map, + InputIterator3 stencil, + RandomAccessIterator output, + Predicate pred) +{ + typedef typename thrust::iterator_value::type InputType; + thrust::transform_if(exec, first, last, stencil, thrust::make_permutation_iterator(output, map), thrust::identity(), pred); +} // end scatter_if() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/select_system.h b/compat/thrust/system/detail/generic/select_system.h new file mode 100644 index 0000000..250a0bc --- /dev/null +++ b/compat/thrust/system/detail/generic/select_system.h @@ -0,0 +1,182 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace select_system_detail +{ + + +// min_system case 1: both systems have the same type, just return the first one +template +__host__ __device__ +System &min_system(thrust::execution_policy &system1, + thrust::execution_policy &) +{ + return thrust::detail::derived_cast(system1); +} // end min_system() + + +// min_system case 2: systems have differing type and the first type is considered the minimum +template +__host__ __device__ + typename thrust::detail::enable_if< + thrust::detail::is_same< + System1, + typename thrust::detail::minimum_system::type + >::value, + System1 & + >::type + min_system(thrust::execution_policy &system1, thrust::execution_policy &) +{ + return thrust::detail::derived_cast(system1); +} // end min_system() + + +// min_system case 3: systems have differing type and the second type is considered the minimum +template +__host__ __device__ + typename thrust::detail::enable_if< + thrust::detail::is_same< + System2, + typename thrust::detail::minimum_system::type + >::value, + System2 & + >::type + min_system(thrust::execution_policy &, thrust::execution_policy &system2) +{ + return thrust::detail::derived_cast(system2); +} // end min_system() + + +} // end select_system_detail + + +template +__host__ __device__ + typename thrust::detail::disable_if< + select_system1_exists::value, + System & + >::type + select_system(thrust::execution_policy &system) +{ + return thrust::detail::derived_cast(system); +} // end select_system() + + +template +__host__ __device__ + typename thrust::detail::enable_if_defined< + thrust::detail::minimum_system + >::type + &select_system(thrust::execution_policy &system1, + thrust::execution_policy &system2) +{ + return select_system_detail::min_system(system1,system2); +} // end select_system() + + +template +__host__ __device__ + typename thrust::detail::lazy_disable_if< + select_system3_exists::value, + thrust::detail::minimum_system + >::type + &select_system(thrust::execution_policy &system1, + thrust::execution_policy &system2, + thrust::execution_policy &system3) +{ + return select_system(select_system(system1,system2), system3); +} // end select_system() + + +template +__host__ __device__ + typename thrust::detail::lazy_disable_if< + select_system4_exists::value, + thrust::detail::minimum_system + >::type + &select_system(thrust::execution_policy &system1, + thrust::execution_policy &system2, + thrust::execution_policy &system3, + thrust::execution_policy &system4) +{ + return select_system(select_system(system1,system2,system3), system4); +} // end select_system() + + +template +__host__ __device__ + typename thrust::detail::lazy_disable_if< + select_system5_exists::value, + thrust::detail::minimum_system + >::type + &select_system(thrust::execution_policy &system1, + thrust::execution_policy &system2, + thrust::execution_policy &system3, + thrust::execution_policy &system4, + thrust::execution_policy &system5) +{ + return select_system(select_system(system1,system2,system3,system4), system5); +} // end select_system() + + +template +__host__ __device__ + typename thrust::detail::lazy_disable_if< + select_system6_exists::value, + thrust::detail::minimum_system + >::type + &select_system(thrust::execution_policy &system1, + thrust::execution_policy &system2, + thrust::execution_policy &system3, + thrust::execution_policy &system4, + thrust::execution_policy &system5, + thrust::execution_policy &system6) +{ + return select_system(select_system(system1,system2,system3,system4,system5), system6); +} // end select_system() + + +// map a single any_system_tag to device_system_tag +inline __host__ __device__ +thrust::device_system_tag select_system(thrust::any_system_tag) +{ + return thrust::device_system_tag(); +} // end select_system() + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/sequence.h b/compat/thrust/system/detail/generic/sequence.h new file mode 100644 index 0000000..b23a7b5 --- /dev/null +++ b/compat/thrust/system/detail/generic/sequence.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void sequence(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + + +template + void sequence(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + T init); + + +template + void sequence(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + T init, + T step); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/sequence.inl b/compat/thrust/system/detail/generic/sequence.inl new file mode 100644 index 0000000..45aec69 --- /dev/null +++ b/compat/thrust/system/detail/generic/sequence.inl @@ -0,0 +1,69 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void sequence(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + typedef typename thrust::iterator_traits::value_type T; + + thrust::sequence(exec, first, last, T(0)); +} // end sequence() + + +template + void sequence(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + T init) +{ + thrust::sequence(exec, first, last, init, T(1)); +} // end sequence() + + +template + void sequence(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + T init, + T step) +{ + thrust::tabulate(exec, first, last, init + step * thrust::placeholders::_1); +} // end sequence() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/set_operations.h b/compat/thrust/system/detail/generic/set_operations.h new file mode 100644 index 0000000..1ca8d39 --- /dev/null +++ b/compat/thrust/system/detail/generic/set_operations.h @@ -0,0 +1,303 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator set_difference(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +// XXX it is an error to call this function; it has no implementation +template + OutputIterator set_difference(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + + +template + thrust::pair + set_difference_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +template + thrust::pair + set_difference_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp); + + +template + OutputIterator set_intersection(thrust::execution_policy &system, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +// XXX it is an error to call this function; it has no implementation +template + OutputIterator set_intersection(thrust::execution_policy &system, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + + +template + thrust::pair + set_intersection_by_key(thrust::execution_policy &system, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +template + thrust::pair + set_intersection_by_key(thrust::execution_policy &system, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp); + + +template + OutputIterator set_symmetric_difference(thrust::execution_policy &system, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +// XXX it is an error to call this function; it has no implementation +template + OutputIterator set_symmetric_difference(thrust::execution_policy &system, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + + +template + thrust::pair + set_symmetric_difference_by_key(thrust::execution_policy &system, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +template + thrust::pair + set_symmetric_difference_by_key(thrust::execution_policy &system, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp); + + +template + OutputIterator set_union(thrust::execution_policy &system, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result); + + +// XXX it is an error to call this function; it has no implementation +template + OutputIterator set_union(thrust::execution_policy &system, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + + +template + thrust::pair + set_union_by_key(thrust::execution_policy &system, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +template + thrust::pair + set_union_by_key(thrust::execution_policy &system, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/set_operations.inl b/compat/thrust/system/detail/generic/set_operations.inl new file mode 100644 index 0000000..bac9ccd --- /dev/null +++ b/compat/thrust/system/detail/generic/set_operations.inl @@ -0,0 +1,449 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator set_difference(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_difference(exec, first1, last1, first2, last2, result, thrust::less()); +} // end set_difference() + + +template + thrust::pair + set_difference_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_difference_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); +} // end set_difference_by_key() + + +template + thrust::pair + set_difference_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + typedef thrust::tuple iterator_tuple1; + typedef thrust::tuple iterator_tuple2; + typedef thrust::tuple iterator_tuple3; + + typedef thrust::zip_iterator zip_iterator1; + typedef thrust::zip_iterator zip_iterator2; + typedef thrust::zip_iterator zip_iterator3; + + zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); + zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); + + zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); + zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); + + zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); + + thrust::detail::compare_first comp_first(comp); + + iterator_tuple3 result = thrust::set_difference(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); + + return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); +} // end set_difference_by_key() + + +template + OutputIterator set_intersection(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_intersection(exec, first1, last1, first2, last2, result, thrust::less()); +} // end set_intersection() + + +template + thrust::pair + set_intersection_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_intersection_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, thrust::less()); +} // end set_intersection_by_key() + + +template + thrust::pair + set_intersection_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + typedef thrust::tuple iterator_tuple1; + typedef thrust::tuple iterator_tuple2; + typedef thrust::tuple iterator_tuple3; + + typedef thrust::zip_iterator zip_iterator1; + typedef thrust::zip_iterator zip_iterator2; + typedef thrust::zip_iterator zip_iterator3; + + // fabricate a values_first2 by "sending" keys twice + // it should never be dereferenced by set_intersection + InputIterator2 values_first2 = keys_first2; + + zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); + zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); + + zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); + zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); + + zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); + + thrust::detail::compare_first comp_first(comp); + + iterator_tuple3 result = thrust::set_intersection(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); + + return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); +} // end set_intersection_by_key() + + +template + OutputIterator set_symmetric_difference(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_symmetric_difference(exec, first1, last1, first2, last2, result, thrust::less()); +} // end set_symmetric_difference() + + +template + thrust::pair + set_symmetric_difference_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_symmetric_difference_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); +} // end set_symmetric_difference_by_key() + + +template + thrust::pair + set_symmetric_difference_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + typedef thrust::tuple iterator_tuple1; + typedef thrust::tuple iterator_tuple2; + typedef thrust::tuple iterator_tuple3; + + typedef thrust::zip_iterator zip_iterator1; + typedef thrust::zip_iterator zip_iterator2; + typedef thrust::zip_iterator zip_iterator3; + + zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); + zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); + + zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); + zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); + + zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); + + thrust::detail::compare_first comp_first(comp); + + iterator_tuple3 result = thrust::set_symmetric_difference(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); + + return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); +} // end set_symmetric_difference_by_key() + + +template + OutputIterator set_union(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_union(exec, first1, last1, first2, last2, result, thrust::less()); +} // end set_union() + + +template + thrust::pair + set_union_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_union_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); +} // end set_union_by_key() + + +template + thrust::pair + set_union_by_key(thrust::execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + typedef thrust::tuple iterator_tuple1; + typedef thrust::tuple iterator_tuple2; + typedef thrust::tuple iterator_tuple3; + + typedef thrust::zip_iterator zip_iterator1; + typedef thrust::zip_iterator zip_iterator2; + typedef thrust::zip_iterator zip_iterator3; + + zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); + zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); + + zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); + zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); + + zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); + + thrust::detail::compare_first comp_first(comp); + + iterator_tuple3 result = thrust::set_union(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); + + return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); +} // end set_union_by_key() + + +template + OutputIterator set_difference(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end set_difference() + + +template + OutputIterator set_intersection(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end set_intersection() + + +template + OutputIterator set_symmetric_difference(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end set_symmetric_difference() + + +template + OutputIterator set_union(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + return result; +} // end set_union() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/sort.h b/compat/thrust/system/detail/generic/sort.h new file mode 100644 index 0000000..5498708 --- /dev/null +++ b/compat/thrust/system/detail/generic/sort.h @@ -0,0 +1,142 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last); + + +template + void sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + + +template + void sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + + +template + void sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + + +template + void stable_sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last); + + +// XXX it is an error to call this function; it has no implementation +template + void stable_sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + + +template + void stable_sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + + +// XXX it is an error to call this function; it has no implementation +template + void stable_sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + + +template + bool is_sorted(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + + +template + bool is_sorted(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp); + + +template + ForwardIterator is_sorted_until(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + + +template + ForwardIterator is_sorted_until(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp); + + +} // end generic +} // end detail +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/generic/sort.inl b/compat/thrust/system/detail/generic/sort.inl new file mode 100644 index 0000000..aabb2ee --- /dev/null +++ b/compat/thrust/system/detail/generic/sort.inl @@ -0,0 +1,202 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + typedef typename thrust::iterator_value::type value_type; + thrust::sort(exec, first, last, thrust::less()); +} // end sort() + + +template + void sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // implement with stable_sort + thrust::stable_sort(exec, first, last, comp); +} // end sort() + + +template + void sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + typedef typename thrust::iterator_value::type value_type; + thrust::sort_by_key(exec, keys_first, keys_last, values_first, thrust::less()); +} // end sort_by_key() + + +template + void sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + // implement with stable_sort_by_key + thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first, comp); +} // end sort_by_key() + + +template + void stable_sort(thrust::execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last) +{ + typedef typename thrust::iterator_value::type value_type; + thrust::stable_sort(exec, first, last, thrust::less()); +} // end stable_sort() + + +template + void stable_sort_by_key(thrust::execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + typedef typename iterator_value::type value_type; + thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first, thrust::less()); +} // end stable_sort_by_key() + + +template + bool is_sorted(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + return thrust::is_sorted_until(exec, first, last) == last; +} // end is_sorted() + + +template + bool is_sorted(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp) +{ + return thrust::is_sorted_until(exec, first, last, comp) == last; +} // end is_sorted() + + +template + ForwardIterator is_sorted_until(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + typedef typename thrust::iterator_value::type InputType; + + return thrust::is_sorted_until(exec, first, last, thrust::less()); +} // end is_sorted_until() + + +template + ForwardIterator is_sorted_until(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Compare comp) +{ + if(thrust::distance(first,last) < 2) return last; + + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ForwardIterator first_plus_one = first; + thrust::advance(first_plus_one, 1); + + ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first_plus_one, first)); + ZipIterator zipped_last = thrust::make_zip_iterator(thrust::make_tuple(last, first)); + + return thrust::get<0>(thrust::find_if(exec, zipped_first, zipped_last, thrust::detail::tuple_binary_predicate(comp)).get_iterator_tuple()); +} // end is_sorted_until() + + +template + void stable_sort(tag, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} // end stable_sort() + + +template + void stable_sort_by_key(tag, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + // unimplemented primitive + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); +} // end stable_sort_by_key() + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/swap_ranges.h b/compat/thrust/system/detail/generic/swap_ranges.h new file mode 100644 index 0000000..5d640d3 --- /dev/null +++ b/compat/thrust/system/detail/generic/swap_ranges.h @@ -0,0 +1,46 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + ForwardIterator2 swap_ranges(thrust::execution_policy &exec, + ForwardIterator1 first1, + ForwardIterator1 last1, + ForwardIterator2 first2); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/swap_ranges.inl b/compat/thrust/system/detail/generic/swap_ranges.inl new file mode 100644 index 0000000..0e12d07 --- /dev/null +++ b/compat/thrust/system/detail/generic/swap_ranges.inl @@ -0,0 +1,73 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +// XXX define this here rather than in internal_functional.h +// to avoid circular dependence between swap.h & internal_functional.h +struct swap_pair_elements +{ + template + __host__ __device__ + void operator()(Tuple t) + { + // use unqualified swap to allow ADL to catch any user-defined swap + using thrust::swap; + swap(thrust::get<0>(t), thrust::get<1>(t)); + } +}; // end swap_pair_elements + +} // end detail + +template + ForwardIterator2 swap_ranges(thrust::execution_policy &exec, + ForwardIterator1 first1, + ForwardIterator1 last1, + ForwardIterator2 first2) +{ + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator result = thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(first1, first2)), + thrust::make_zip_iterator(thrust::make_tuple(last1, first2)), + detail::swap_pair_elements()); + return thrust::get<1>(result.get_iterator_tuple()); +} // end swap_ranges() + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/tabulate.h b/compat/thrust/system/detail/generic/tabulate.h new file mode 100644 index 0000000..e5911b1 --- /dev/null +++ b/compat/thrust/system/detail/generic/tabulate.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void tabulate(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + UnaryOperation unary_op); + +template + OutputIterator tabulate_n(thrust::execution_policy &exec, + OutputIterator first, + Size n, + UnaryOperation unary_op); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/tabulate.inl b/compat/thrust/system/detail/generic/tabulate.inl new file mode 100644 index 0000000..d2ffc26 --- /dev/null +++ b/compat/thrust/system/detail/generic/tabulate.inl @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + void tabulate(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + UnaryOperation unary_op) +{ + typedef typename iterator_difference::type difference_type; + + // by default, counting_iterator uses a 64b difference_type on 32b platforms to avoid overflowing its counter. + // this causes problems when a zip_iterator is created in transform's implementation -- ForwardIterator is + // incremented by a 64b difference_type and some compilers warn + // to avoid this, specify the counting_iterator's difference_type to be the same as ForwardIterator's. + thrust::counting_iterator iter(0); + + thrust::transform(exec, iter, iter + thrust::distance(first, last), first, unary_op); +} // end tabulate() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + + diff --git a/compat/thrust/system/detail/generic/tag.h b/compat/thrust/system/detail/generic/tag.h new file mode 100644 index 0000000..577d6a3 --- /dev/null +++ b/compat/thrust/system/detail/generic/tag.h @@ -0,0 +1,48 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file generic/tag.h + * \brief Implementation of the generic backend's tag. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +// tag exists only to make the generic entry points the least priority match +// during ADL. tag should not be derived from and is constructible from anything +struct tag +{ + template + __host__ __device__ inline + tag(const T &) {} +}; + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/temporary_buffer.h b/compat/thrust/system/detail/generic/temporary_buffer.h new file mode 100644 index 0000000..8cb08b0 --- /dev/null +++ b/compat/thrust/system/detail/generic/temporary_buffer.h @@ -0,0 +1,49 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + thrust::pair, typename thrust::pointer::difference_type> + get_temporary_buffer(thrust::execution_policy &exec, typename thrust::pointer::difference_type n); + + +template + void return_temporary_buffer(thrust::execution_policy &exec, Pointer p); + + +} // end generic +} // end detail +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/detail/generic/temporary_buffer.inl b/compat/thrust/system/detail/generic/temporary_buffer.inl new file mode 100644 index 0000000..0a6be7e --- /dev/null +++ b/compat/thrust/system/detail/generic/temporary_buffer.inl @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + thrust::pair, typename thrust::pointer::difference_type> + get_temporary_buffer(thrust::execution_policy &exec, typename thrust::pointer::difference_type n) +{ + thrust::pointer ptr = thrust::malloc(exec, n); + + // check for a failed malloc + if(!ptr.get()) + { + n = 0; + } // end if + + return thrust::make_pair(ptr, n); +} // end get_temporary_buffer() + + +template + void return_temporary_buffer(thrust::execution_policy &exec, Pointer p) +{ + thrust::free(exec, p); +} // end return_temporary_buffer() + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/transform.h b/compat/thrust/system/detail/generic/transform.h new file mode 100644 index 0000000..e98d402 --- /dev/null +++ b/compat/thrust/system/detail/generic/transform.h @@ -0,0 +1,101 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputIterator transform(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction op); + +template + OutputIterator transform(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryFunction op); + +template + ForwardIterator transform_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + ForwardIterator result, + UnaryFunction unary_op, + Predicate pred); + +template + ForwardIterator transform_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + ForwardIterator result, + UnaryFunction unary_op, + Predicate pred); + +template + ForwardIterator transform_if(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator3 stencil, + ForwardIterator result, + BinaryFunction binary_op, + Predicate pred); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/transform.inl b/compat/thrust/system/detail/generic/transform.inl new file mode 100644 index 0000000..8f09953 --- /dev/null +++ b/compat/thrust/system/detail/generic/transform.inl @@ -0,0 +1,214 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + OutputIterator transform(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction op) +{ + // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke + // a function which is only __host__ or __device__ by selecting a generic functor + // which is one or the other + // when nvcc is able to deal with this, remove this WAR + + // given the minimal system, determine the unary transform functor we need + typedef typename thrust::detail::unary_transform_functor::type UnaryTransformFunctor; + + // make an iterator tuple + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_result = + thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(first,result)), + thrust::make_zip_iterator(thrust::make_tuple(last,result)), + UnaryTransformFunctor(op)); + + return thrust::get<1>(zipped_result.get_iterator_tuple()); +} // end transform() + + +template + OutputIterator transform(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryFunction op) +{ + // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke + // a function which is only __host__ or __device__ by selecting a generic functor + // which is one or the other + // when nvcc is able to deal with this, remove this WAR + + // given the minimal system, determine the binary transform functor we need + typedef typename thrust::detail::binary_transform_functor::type BinaryTransformFunctor; + + // make an iterator tuple + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_result = + thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(first1,first2,result)), + thrust::make_zip_iterator(thrust::make_tuple(last1,first2,result)), + BinaryTransformFunctor(op)); + + return thrust::get<2>(zipped_result.get_iterator_tuple()); +} // end transform() + + +template + ForwardIterator transform_if(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + ForwardIterator result, + UnaryFunction unary_op, + Predicate pred) +{ + // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke + // a function which is only __host__ or __device__ by selecting a generic functor + // which is one or the other + // when nvcc is able to deal with this, remove this WAR + + // given the minimal system, determine the unary transform_if functor we need + typedef typename thrust::detail::unary_transform_if_functor::type UnaryTransformIfFunctor; + + // make an iterator tuple + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_result = + thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(first,result)), + thrust::make_zip_iterator(thrust::make_tuple(last,result)), + UnaryTransformIfFunctor(unary_op,pred)); + + return thrust::get<1>(zipped_result.get_iterator_tuple()); +} // end transform_if() + + +template + ForwardIterator transform_if(thrust::execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + ForwardIterator result, + UnaryFunction unary_op, + Predicate pred) +{ + // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke + // a function which is only __host__ or __device__ by selecting a generic functor + // which is one or the other + // when nvcc is able to deal with this, remove this WAR + + // given the minimal system, determine the unary transform_if functor we need + typedef typename thrust::detail::unary_transform_if_with_stencil_functor::type UnaryTransformIfFunctor; + + // make an iterator tuple + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_result = + thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(first,stencil,result)), + thrust::make_zip_iterator(thrust::make_tuple(last,stencil,result)), + UnaryTransformIfFunctor(unary_op,pred)); + + return thrust::get<2>(zipped_result.get_iterator_tuple()); +} // end transform_if() + + +template + ForwardIterator transform_if(thrust::execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator3 stencil, + ForwardIterator result, + BinaryFunction binary_op, + Predicate pred) +{ + // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke + // a function which is only __host__ or __device__ by selecting a generic functor + // which is one or the other + // when nvcc is able to deal with this, remove this WAR + + // given the minimal system, determine the binary transform_if functor we need + typedef typename thrust::detail::binary_transform_if_functor::type BinaryTransformIfFunctor; + + // make an iterator tuple + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_result = + thrust::for_each(exec, + thrust::make_zip_iterator(thrust::make_tuple(first1,first2,stencil,result)), + thrust::make_zip_iterator(thrust::make_tuple(last1,first2,stencil,result)), + BinaryTransformIfFunctor(binary_op,pred)); + + return thrust::get<3>(zipped_result.get_iterator_tuple()); +} // end transform_if() + + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/transform_reduce.h b/compat/thrust/system/detail/generic/transform_reduce.h new file mode 100644 index 0000000..c1f098f --- /dev/null +++ b/compat/thrust/system/detail/generic/transform_reduce.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputType transform_reduce(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + UnaryFunction unary_op, + OutputType init, + BinaryFunction binary_op); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/transform_reduce.inl b/compat/thrust/system/detail/generic/transform_reduce.inl new file mode 100644 index 0000000..ce8b6a1 --- /dev/null +++ b/compat/thrust/system/detail/generic/transform_reduce.inl @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputType transform_reduce(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + UnaryFunction unary_op, + OutputType init, + BinaryFunction binary_op) +{ + thrust::transform_iterator xfrm_first(first, unary_op); + thrust::transform_iterator xfrm_last(last, unary_op); + + return thrust::reduce(exec, xfrm_first, xfrm_last, init, binary_op); +} // end transform_reduce() + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/transform_scan.h b/compat/thrust/system/detail/generic/transform_scan.h new file mode 100644 index 0000000..99db86e --- /dev/null +++ b/compat/thrust/system/detail/generic/transform_scan.h @@ -0,0 +1,64 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputIterator transform_inclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + BinaryFunction binary_op); + +template + OutputIterator transform_exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + T init, + AssociativeOperator binary_op); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/transform_scan.inl b/compat/thrust/system/detail/generic/transform_scan.inl new file mode 100644 index 0000000..a95ec20 --- /dev/null +++ b/compat/thrust/system/detail/generic/transform_scan.inl @@ -0,0 +1,124 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + OutputIterator transform_inclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + BinaryFunction binary_op) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if UnaryFunction is AdaptableUnaryFunction + // TemporaryType = AdaptableUnaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + typedef typename thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + thrust::transform_iterator _first(first, unary_op); + thrust::transform_iterator _last(last, unary_op); + + return thrust::inclusive_scan(exec, _first, _last, result, binary_op); +} // end transform_inclusive_scan() + +template + OutputIterator transform_exclusive_scan(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + T init, + AssociativeOperator binary_op) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if UnaryFunction is AdaptableUnaryFunction + // TemporaryType = AdaptableUnaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + typedef typename thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + thrust::transform_iterator _first(first, unary_op); + thrust::transform_iterator _last(last, unary_op); + + return thrust::exclusive_scan(exec, _first, _last, result, init, binary_op); +} // end transform_exclusive_scan() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + + diff --git a/compat/thrust/system/detail/generic/type_traits.h b/compat/thrust/system/detail/generic/type_traits.h new file mode 100644 index 0000000..4011352 --- /dev/null +++ b/compat/thrust/system/detail/generic/type_traits.h @@ -0,0 +1,168 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file generic/type_traits.h + * \brief Introspection for free functions defined in generic. + */ + +#pragma once + +#include + +namespace thrust +{ + +// forward declaration of any_system_tag for any_conversion below +struct any_system_tag; + +namespace system +{ +namespace detail +{ + +// we must define these traits outside of generic's namespace +namespace generic_type_traits_ns +{ + +typedef char yes; +typedef char (&no)[2]; + +struct any_conversion +{ + template any_conversion(const T &); + + // add this extra constructor to disambiguate conversion from any_system_tag + any_conversion(const any_system_tag &); +}; + +namespace select_system_exists_ns +{ + no select_system(const any_conversion &); + no select_system(const any_conversion &, const any_conversion &); + no select_system(const any_conversion &, const any_conversion &, const any_conversion &); + no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &); + no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &); + no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &); + + template yes check(const T &); + + no check(no); + + template + struct select_system1_exists + { + static Tag &tag; + + static const bool value = sizeof(check(select_system(tag))) == sizeof(yes); + }; + + template + struct select_system2_exists + { + static Tag1 &tag1; + static Tag2 &tag2; + + static const bool value = sizeof(check(select_system(tag1,tag2))) == sizeof(yes); + }; + + template + struct select_system3_exists + { + static Tag1 &tag1; + static Tag2 &tag2; + static Tag3 &tag3; + + static const bool value = sizeof(check(select_system(tag1,tag2,tag3))) == sizeof(yes); + }; + + template + struct select_system4_exists + { + static Tag1 &tag1; + static Tag2 &tag2; + static Tag3 &tag3; + static Tag4 &tag4; + + static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4))) == sizeof(yes); + }; + + template + struct select_system5_exists + { + static Tag1 &tag1; + static Tag2 &tag2; + static Tag3 &tag3; + static Tag4 &tag4; + static Tag5 &tag5; + + static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4,tag5))) == sizeof(yes); + }; + + template + struct select_system6_exists + { + static Tag1 &tag1; + static Tag2 &tag2; + static Tag3 &tag3; + static Tag4 &tag4; + static Tag5 &tag5; + static Tag6 &tag6; + + static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4,tag5,tag6))) == sizeof(yes); + }; +} // end select_system_exists_ns + +} // end generic_type_traits_ns + +namespace generic +{ + +template + struct select_system1_exists + : generic_type_traits_ns::select_system_exists_ns::select_system1_exists +{}; + +template + struct select_system2_exists + : generic_type_traits_ns::select_system_exists_ns::select_system2_exists +{}; + +template + struct select_system3_exists + : generic_type_traits_ns::select_system_exists_ns::select_system3_exists +{}; + +template + struct select_system4_exists + : generic_type_traits_ns::select_system_exists_ns::select_system4_exists +{}; + +template + struct select_system5_exists + : generic_type_traits_ns::select_system_exists_ns::select_system5_exists +{}; + +template + struct select_system6_exists + : generic_type_traits_ns::select_system_exists_ns::select_system6_exists +{}; + +} // end generic +} // end detail +} // end system +} // end thrust + diff --git a/compat/thrust/system/detail/generic/uninitialized_copy.h b/compat/thrust/system/detail/generic/uninitialized_copy.h new file mode 100644 index 0000000..67e3e68 --- /dev/null +++ b/compat/thrust/system/detail/generic/uninitialized_copy.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + ForwardIterator uninitialized_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + ForwardIterator result); + +template + ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + ForwardIterator result); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/uninitialized_copy.inl b/compat/thrust/system/detail/generic/uninitialized_copy.inl new file mode 100644 index 0000000..414e6e4 --- /dev/null +++ b/compat/thrust/system/detail/generic/uninitialized_copy.inl @@ -0,0 +1,187 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template + struct uninitialized_copy_functor +{ + template + __host__ __device__ + void operator()(Tuple t) + { + const InputType &in = thrust::get<0>(t); + OutputType &out = thrust::get<1>(t); + + ::new(static_cast(&out)) OutputType(in); + } // end operator()() +}; // end uninitialized_copy_functor + + +// non-trivial copy constructor path +template + ForwardIterator uninitialized_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + ForwardIterator result, + thrust::detail::false_type) // has_trivial_copy_constructor +{ + // zip up the iterators + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result)); + ZipIterator end = begin; + + // get a zip_iterator pointing to the end + const typename thrust::iterator_difference::type n = thrust::distance(first,last); + thrust::advance(end, n); + + // create a functor + typedef typename iterator_traits::value_type InputType; + typedef typename iterator_traits::value_type OutputType; + + detail::uninitialized_copy_functor f; + + // do the for_each + thrust::for_each(exec, begin, end, f); + + // return the end of the output range + return thrust::get<1>(end.get_iterator_tuple()); +} // end uninitialized_copy() + + +// trivial copy constructor path +template + ForwardIterator uninitialized_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + ForwardIterator result, + thrust::detail::true_type) // has_trivial_copy_constructor +{ + return thrust::copy(exec, first, last, result); +} // end uninitialized_copy() + + +// non-trivial copy constructor path +template + ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + ForwardIterator result, + thrust::detail::false_type) // has_trivial_copy_constructor +{ + // zip up the iterators + typedef thrust::tuple IteratorTuple; + typedef thrust::zip_iterator ZipIterator; + + ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first,result)); + + // create a functor + typedef typename iterator_traits::value_type InputType; + typedef typename iterator_traits::value_type OutputType; + + detail::uninitialized_copy_functor f; + + // do the for_each_n + ZipIterator zipped_last = thrust::for_each_n(exec, zipped_first, n, f); + + // return the end of the output range + return thrust::get<1>(zipped_last.get_iterator_tuple()); +} // end uninitialized_copy_n() + + +// trivial copy constructor path +template + ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + ForwardIterator result, + thrust::detail::true_type) // has_trivial_copy_constructor +{ + return thrust::copy_n(exec, first, n, result); +} // end uninitialized_copy_n() + + +} // end detail + + +template + ForwardIterator uninitialized_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + ForwardIterator result) +{ + typedef typename iterator_traits::value_type ResultType; + + typedef typename thrust::detail::has_trivial_copy_constructor::type ResultTypeHasTrivialCopyConstructor; + + return thrust::system::detail::generic::detail::uninitialized_copy(exec, first, last, result, ResultTypeHasTrivialCopyConstructor()); +} // end uninitialized_copy() + + +template + ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, + InputIterator first, + Size n, + ForwardIterator result) +{ + typedef typename iterator_traits::value_type ResultType; + + typedef typename thrust::detail::has_trivial_copy_constructor::type ResultTypeHasTrivialCopyConstructor; + + return thrust::system::detail::generic::detail::uninitialized_copy_n(exec, first, n, result, ResultTypeHasTrivialCopyConstructor()); +} // end uninitialized_copy_n() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/uninitialized_fill.h b/compat/thrust/system/detail/generic/uninitialized_fill.h new file mode 100644 index 0000000..c1df694 --- /dev/null +++ b/compat/thrust/system/detail/generic/uninitialized_fill.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + +template + void uninitialized_fill(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &x); + +template + ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, + ForwardIterator first, + Size n, + const T &x); + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/uninitialized_fill.inl b/compat/thrust/system/detail/generic/uninitialized_fill.inl new file mode 100644 index 0000000..bb30b24 --- /dev/null +++ b/compat/thrust/system/detail/generic/uninitialized_fill.inl @@ -0,0 +1,128 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ +namespace detail +{ + +template + void uninitialized_fill(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &x, + thrust::detail::true_type) // has_trivial_copy_constructor +{ + thrust::fill(exec, first, last, x); +} // end uninitialized_fill() + +template + void uninitialized_fill(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &x, + thrust::detail::false_type) // has_trivial_copy_constructor +{ + typedef typename iterator_traits::value_type ValueType; + + thrust::for_each(exec, first, last, thrust::detail::uninitialized_fill_functor(x)); +} // end uninitialized_fill() + +template + ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, + ForwardIterator first, + Size n, + const T &x, + thrust::detail::true_type) // has_trivial_copy_constructor +{ + return thrust::fill_n(exec, first, n, x); +} // end uninitialized_fill() + +template + ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, + ForwardIterator first, + Size n, + const T &x, + thrust::detail::false_type) // has_trivial_copy_constructor +{ + typedef typename iterator_traits::value_type ValueType; + + return thrust::for_each_n(exec, first, n, thrust::detail::uninitialized_fill_functor(x)); +} // end uninitialized_fill() + +} // end detail + +template + void uninitialized_fill(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + const T &x) +{ + typedef typename iterator_traits::value_type ValueType; + + typedef thrust::detail::has_trivial_copy_constructor ValueTypeHasTrivialCopyConstructor; + + thrust::system::detail::generic::detail::uninitialized_fill(exec, first, last, x, + ValueTypeHasTrivialCopyConstructor()); +} // end uninitialized_fill() + +template + ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, + ForwardIterator first, + Size n, + const T &x) +{ + typedef typename iterator_traits::value_type ValueType; + + typedef thrust::detail::has_trivial_copy_constructor ValueTypeHasTrivialCopyConstructor; + + return thrust::system::detail::generic::detail::uninitialized_fill_n(exec, first, n, x, + ValueTypeHasTrivialCopyConstructor()); +} // end uninitialized_fill() + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/unique.h b/compat/thrust/system/detail/generic/unique.h new file mode 100644 index 0000000..57e17ca --- /dev/null +++ b/compat/thrust/system/detail/generic/unique.h @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template +ForwardIterator unique(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last); + + +template +ForwardIterator unique(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred); + + +template +OutputIterator unique_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output); + + +template +OutputIterator unique_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/unique.inl b/compat/thrust/system/detail/generic/unique.inl new file mode 100644 index 0000000..42d6b15 --- /dev/null +++ b/compat/thrust/system/detail/generic/unique.inl @@ -0,0 +1,114 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file unique.inl + * \brief Inline file for unique.h. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + ForwardIterator unique(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + return thrust::unique(exec, first, last, thrust::equal_to()); +} // end unique() + + +template + ForwardIterator unique(thrust::execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + thrust::detail::temporary_array input(exec, first, last); + + return thrust::unique_copy(exec, input.begin(), input.end(), first, binary_pred); +} // end unique() + + +template + OutputIterator unique_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::unique_copy(exec, first,last,output,thrust::equal_to()); +} // end unique_copy() + + +template + OutputIterator unique_copy(thrust::execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + // empty sequence + if(first == last) + return output; + + thrust::detail::temporary_array stencil(exec, thrust::distance(first, last)); + + // mark first element in each group + stencil[0] = 1; + thrust::transform(exec, first, last - 1, first + 1, stencil.begin() + 1, thrust::detail::not2(binary_pred)); + + return thrust::copy_if(exec, first, last, stencil.begin(), output, thrust::identity()); +} // end unique_copy() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/generic/unique_by_key.h b/compat/thrust/system/detail/generic/unique_by_key.h new file mode 100644 index 0000000..aa62f73 --- /dev/null +++ b/compat/thrust/system/detail/generic/unique_by_key.h @@ -0,0 +1,91 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + thrust::pair + unique_by_key(thrust::execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first); + + +template + thrust::pair + unique_by_key(thrust::execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred); + + +template + thrust::pair + unique_by_key_copy(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output); + + +template + thrust::pair + unique_by_key_copy(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred); + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/generic/unique_by_key.inl b/compat/thrust/system/detail/generic/unique_by_key.inl new file mode 100644 index 0000000..c780fa7 --- /dev/null +++ b/compat/thrust/system/detail/generic/unique_by_key.inl @@ -0,0 +1,142 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace generic +{ + + +template + thrust::pair + unique_by_key(thrust::execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first) +{ + typedef typename thrust::iterator_traits::value_type KeyType; + return thrust::unique_by_key(exec, keys_first, keys_last, values_first, thrust::equal_to()); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key(thrust::execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::value_type InputType1; + typedef typename thrust::iterator_traits::value_type InputType2; + + ForwardIterator2 values_last = values_first + (keys_last - keys_first); + + thrust::detail::temporary_array keys(exec, keys_first, keys_last); + thrust::detail::temporary_array vals(exec, values_first, values_last); + + return thrust::unique_by_key_copy(exec, keys.begin(), keys.end(), vals.begin(), keys_first, values_first, binary_pred); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key_copy(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output) +{ + typedef typename thrust::iterator_traits::value_type KeyType; + return thrust::unique_by_key_copy(exec, keys_first, keys_last, values_first, keys_output, values_output, thrust::equal_to()); +} // end unique_by_key_copy() + + +template + thrust::pair + unique_by_key_copy(thrust::execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::difference_type difference_type; + + // empty sequence + if(keys_first == keys_last) + return thrust::make_pair(keys_output, values_output); + + difference_type n = thrust::distance(keys_first, keys_last); + + thrust::detail::temporary_array stencil(exec,n); + + // mark first element in each group + stencil[0] = 1; + thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, stencil.begin() + 1, thrust::detail::not2(binary_pred)); + + thrust::zip_iterator< thrust::tuple > result = + thrust::copy_if(exec, + thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)), + thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)) + n, + stencil.begin(), + thrust::make_zip_iterator(thrust::make_tuple(keys_output, values_output)), + thrust::identity()); + + difference_type output_size = result - thrust::make_zip_iterator(thrust::make_tuple(keys_output, values_output)); + + return thrust::make_pair(keys_output + output_size, values_output + output_size); +} // end unique_by_key_copy() + + +} // end namespace generic +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/decompose.h b/compat/thrust/system/detail/internal/decompose.h new file mode 100644 index 0000000..dea806d --- /dev/null +++ b/compat/thrust/system/detail/internal/decompose.h @@ -0,0 +1,113 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ + + template + class index_range + { + public: + typedef IndexType index_type; + + __host__ __device__ + index_range(index_type begin, index_type end) : m_begin(begin), m_end(end) {} + + __host__ __device__ + index_type begin(void) const { return m_begin; } + + __host__ __device__ + index_type end(void) const { return m_end; } + + __host__ __device__ + index_type size(void) const { return m_end - m_begin; } + + private: + index_type m_begin; + index_type m_end; + }; + + template + class uniform_decomposition + { + public: + typedef IndexType index_type; + typedef index_range range_type; + + uniform_decomposition(index_type N, index_type granularity, index_type max_intervals) + : m_N(N), + m_intervals((N + granularity - 1) / granularity), + m_threshold(0), + m_small_interval(granularity), + m_large_interval(0) + { + if(m_intervals > max_intervals) + { + m_small_interval = granularity * (m_intervals / max_intervals); + m_large_interval = m_small_interval + granularity; + m_threshold = m_intervals % max_intervals; + m_intervals = max_intervals; + } + } + + __host__ __device__ + index_range operator[](const index_type& i) const + { + if (i < m_threshold) + { + index_type begin = m_large_interval * i; + index_type end = begin + m_large_interval; + return range_type(begin, end); + } + else + { + index_type begin = m_large_interval * m_threshold + m_small_interval * (i - m_threshold); + index_type end = (begin + m_small_interval < m_N) ? begin + m_small_interval : m_N; + return range_type(begin, end); + } + } + + __host__ __device__ + index_type size(void) const + { + return m_intervals; + } + + private: + + index_type m_N; + index_type m_intervals; + index_type m_threshold; + index_type m_small_interval; + index_type m_large_interval; + }; + + +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/adjacent_difference.h b/compat/thrust/system/detail/internal/scalar/adjacent_difference.h new file mode 100644 index 0000000..d1a95ae --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/adjacent_difference.h @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file adjacent_difference.h + * \brief Sequential implementation of adjacent_difference. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +OutputIterator adjacent_difference(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_traits::value_type InputType; + + if (first == last) + return result; + + InputType curr = *first; + + *result = curr; + + while (++first != last) + { + InputType next = *first; + *(++result) = binary_op(next, curr); + curr = next; + } + + return ++result; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/binary_search.h b/compat/thrust/system/detail/internal/scalar/binary_search.h new file mode 100644 index 0000000..c3ac49f --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/binary_search.h @@ -0,0 +1,143 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file binary_search.h + * \brief Sequential implementation of binary search algorithms. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +ForwardIterator lower_bound(ForwardIterator first, + ForwardIterator last, + const T& val, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + typedef typename thrust::iterator_difference::type difference_type; + + difference_type len = thrust::distance(first, last); + + while(len > 0) + { + difference_type half = len >> 1; + ForwardIterator middle = first; + + thrust::advance(middle, half); + + if(wrapped_comp(*middle, val)) + { + first = middle; + ++first; + len = len - half - 1; + } + else + { + len = half; + } + } + + return first; +} + + +template +ForwardIterator upper_bound(ForwardIterator first, + ForwardIterator last, + const T& val, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + typedef typename thrust::iterator_difference::type difference_type; + + difference_type len = thrust::distance(first, last); + + while(len > 0) + { + difference_type half = len >> 1; + ForwardIterator middle = first; + + thrust::advance(middle, half); + + if(wrapped_comp(val, *middle)) + { + len = half; + } + else + { + first = middle; + ++first; + len = len - half - 1; + } + } + + return first; +} + +template +bool binary_search(ForwardIterator first, + ForwardIterator last, + const T& val, + StrictWeakOrdering comp) +{ + ForwardIterator iter = thrust::system::detail::internal::scalar::lower_bound(first, last, val, comp); + + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + return iter != last && !wrapped_comp(val,*iter); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/copy.h b/compat/thrust/system/detail/internal/scalar/copy.h new file mode 100644 index 0000000..42cb385 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/copy.h @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file copy.h + * \brief Sequential implementations of copy algorithms. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result); + +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result); + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/copy.inl b/compat/thrust/system/detail/internal/scalar/copy.inl new file mode 100644 index 0000000..8c9f5c2 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/copy.inl @@ -0,0 +1,127 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ +namespace copy_detail +{ + + +// returns the raw pointer associated with a Pointer-like thing +template + typename thrust::detail::pointer_traits::raw_pointer + get(Pointer ptr) +{ + return thrust::detail::pointer_traits::get(ptr); +} + + +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result, + thrust::detail::true_type) // is_trivial_copy +{ + typedef typename thrust::iterator_difference::type Size; + + const Size n = last - first; + thrust::system::detail::internal::scalar::trivial_copy_n(get(&*first), n, get(&*result)); + return result + n; +} // end copy() + + +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result, + thrust::detail::false_type) // is_trivial_copy +{ + return thrust::system::detail::internal::scalar::general_copy(first,last,result); +} // end copy() + + +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result, + thrust::detail::true_type) // is_trivial_copy +{ + thrust::system::detail::internal::scalar::trivial_copy_n(get(&*first), n, get(&*result)); + return result + n; +} // end copy_n() + + +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result, + thrust::detail::false_type) // is_trivial_copy +{ + return thrust::system::detail::internal::scalar::general_copy_n(first,n,result); +} // end copy_n() + +} // end namespace copy_detail + + +template + OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result) +{ + return thrust::system::detail::internal::scalar::copy_detail::copy(first, last, result, + typename thrust::detail::dispatch::is_trivial_copy::type()); +} // end copy() + + +template + OutputIterator copy_n(InputIterator first, + Size n, + OutputIterator result) +{ + return thrust::system::detail::internal::scalar::copy_detail::copy_n(first, n, result, + typename thrust::detail::dispatch::is_trivial_copy::type()); +} // end copy_n() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/copy_backward.h b/compat/thrust/system/detail/internal/scalar/copy_backward.h new file mode 100644 index 0000000..36f8f66 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/copy_backward.h @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, + BidirectionalIterator1 last, + BidirectionalIterator2 result) +{ + while (first != last) + { + --last; + --result; + *result = *last; + } + + return result; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/copy_if.h b/compat/thrust/system/detail/internal/scalar/copy_if.h new file mode 100644 index 0000000..67f9402 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/copy_if.h @@ -0,0 +1,69 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file copy_if.h + * \brief Sequential implementation of copy_if. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + thrust::detail::host_function wrapped_pred(pred); + + while(first != last) + { + if(wrapped_pred(*stencil)) + { + *result = *first; + ++result; + } // end if + + ++first; + ++stencil; + } // end while + + return result; +} // end copy_if() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/extrema.h b/compat/thrust/system/detail/internal/scalar/extrema.h new file mode 100644 index 0000000..ebea756 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/extrema.h @@ -0,0 +1,127 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file extrema.h + * \brief Sequential implementations of extrema functions. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +ForwardIterator min_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // wrap comp + thrust::detail::host_function< + BinaryPredicate, + bool + > wrapped_comp(comp); + + ForwardIterator imin = first; + + for (; first != last; first++) + { + if (wrapped_comp(*first, *imin)) + { + imin = first; + } + } + + return imin; +} + + +template +ForwardIterator max_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // wrap comp + thrust::detail::host_function< + BinaryPredicate, + bool + > wrapped_comp(comp); + + ForwardIterator imax = first; + + for (; first != last; first++) + { + if (wrapped_comp(*imax, *first)) + { + imax = first; + } + } + + return imax; +} + + +template +thrust::pair minmax_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // wrap comp + thrust::detail::host_function< + BinaryPredicate, + bool + > wrapped_comp(comp); + + ForwardIterator imin = first; + ForwardIterator imax = first; + + for (; first != last; first++) + { + if (wrapped_comp(*first, *imin)) + { + imin = first; + } + + if (wrapped_comp(*imax, *first)) + { + imax = first; + } + } + + return thrust::make_pair(imin, imax); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/find.h b/compat/thrust/system/detail/internal/scalar/find.h new file mode 100644 index 0000000..6b25021 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/find.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file find.h + * \brief Sequential implementation of find_if. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +InputIterator find_if(InputIterator first, + InputIterator last, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + while(first != last) + { + if (wrapped_pred(*first)) + return first; + + ++first; + } + + // return first so zip_iterator works correctly + return first; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/for_each.h b/compat/thrust/system/detail/internal/scalar/for_each.h new file mode 100644 index 0000000..4e31d91 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/for_each.h @@ -0,0 +1,87 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.h + * \brief Sequential implementations of for_each functions. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +InputIterator for_each(InputIterator first, + InputIterator last, + UnaryFunction f) +{ + // wrap f + thrust::detail::host_function< + UnaryFunction, + void + > wrapped_f(f); + + for(; first != last; ++first) + { + wrapped_f(*first); + } + + return first; +} // end for_each() + +template +InputIterator for_each_n(InputIterator first, + Size n, + UnaryFunction f) +{ + // wrap f + thrust::detail::host_function< + UnaryFunction, + void + > wrapped_f(f); + + for(Size i = 0; i != n; i++) + { + // we can dereference an OutputIterator if f does not + // try to use the reference for anything besides assignment + wrapped_f(*first); + ++first; + } + + return first; +} // end for_each_n() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/general_copy.h b/compat/thrust/system/detail/internal/scalar/general_copy.h new file mode 100644 index 0000000..aae061d --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/general_copy.h @@ -0,0 +1,65 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file general_copy.h + * \brief Sequential copy algorithms for general iterators. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator general_copy(InputIterator first, + InputIterator last, + OutputIterator result) +{ + for(; first != last; ++first, ++result) + *result = *first; + return result; +} // end general_copy() + + +template + OutputIterator general_copy_n(InputIterator first, + Size n, + OutputIterator result) +{ + for(; n > Size(0); ++first, ++result, --n) + *result = *first; + return result; +} // end general_copy_n() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/insertion_sort.h b/compat/thrust/system/detail/internal/scalar/insertion_sort.h new file mode 100644 index 0000000..5949ce7 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/insertion_sort.h @@ -0,0 +1,149 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +void insertion_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_value::type value_type; + + if (first == last) return; + + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + for(RandomAccessIterator i = first + 1; i != last; ++i) + { + value_type tmp = *i; + + if (wrapped_comp(tmp, *first)) + { + // tmp is the smallest value encountered so far + thrust::system::detail::internal::scalar::copy_backward(first, i, i + 1); + + *first = tmp; + } + else + { + // tmp is not the smallest value, can avoid checking for j == first + RandomAccessIterator j = i; + RandomAccessIterator k = i - 1; + + while(wrapped_comp(tmp, *k)) + { + *j = *k; + j = k; + --k; + } + + *j = tmp; + } + } +} + +template +void insertion_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_value::type value_type1; + typedef typename thrust::iterator_value::type value_type2; + + if (first1 == last1) return; + + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + RandomAccessIterator1 i1 = first1 + 1; + RandomAccessIterator2 i2 = first2 + 1; + + for(; i1 != last1; ++i1, ++i2) + { + value_type1 tmp1 = *i1; + value_type2 tmp2 = *i2; + + if (wrapped_comp(tmp1, *first1)) + { + // tmp is the smallest value encountered so far + thrust::system::detail::internal::scalar::copy_backward(first1, i1, i1 + 1); + thrust::system::detail::internal::scalar::copy_backward(first2, i2, i2 + 1); + + *first1 = tmp1; + *first2 = tmp2; + } + else + { + // tmp is not the smallest value, can avoid checking for j == first + RandomAccessIterator1 j1 = i1; + RandomAccessIterator1 k1 = i1 - 1; + + RandomAccessIterator2 j2 = i2; + RandomAccessIterator2 k2 = i2 - 1; + + while(wrapped_comp(tmp1, *k1)) + { + *j1 = *k1; + *j2 = *k2; + + j1 = k1; + j2 = k2; + + --k1; + --k2; + } + + *j1 = tmp1; + *j2 = tmp2; + } + } +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/merge.h b/compat/thrust/system/detail/internal/scalar/merge.h new file mode 100644 index 0000000..c02fca4 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/merge.h @@ -0,0 +1,73 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file merge.h + * \brief Sequential implementation of merge algorithms. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + +template +thrust::pair + merge_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp); + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/merge.inl b/compat/thrust/system/detail/internal/scalar/merge.inl new file mode 100644 index 0000000..a7c2a39 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/merge.inl @@ -0,0 +1,145 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + while(first1 != last1 && first2 != last2) + { + if(wrapped_comp(*first2, *first1)) + { + *result = *first2; + ++first2; + } // end if + else + { + *result = *first1; + ++first1; + } // end else + + ++result; + } // end while + + return thrust::system::detail::internal::scalar::copy(first2, last2, thrust::system::detail::internal::scalar::copy(first1, last1, result)); +} // end merge() + + +template +thrust::pair + merge_by_key(InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + while(keys_first1 != keys_last1 && keys_first2 != keys_last2) + { + if(!wrapped_comp(*keys_first2, *keys_first1)) + { + // *keys_first1 <= *keys_first2 + *keys_result = *keys_first1; + *values_result = *values_first1; + ++keys_first1; + ++values_first1; + } + else + { + // *keys_first1 > keys_first2 + *keys_result = *keys_first2; + *values_result = *values_first2; + ++keys_first2; + ++values_first2; + } + + ++keys_result; + ++values_result; + } + + while(keys_first1 != keys_last1) + { + *keys_result = *keys_first1; + *values_result = *values_first1; + ++keys_first1; + ++values_first1; + ++keys_result; + ++values_result; + } + + while(keys_first2 != keys_last2) + { + *keys_result = *keys_first2; + *values_result = *values_first2; + ++keys_first2; + ++values_first2; + ++keys_result; + ++values_result; + } + + return thrust::make_pair(keys_result, values_result); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/partition.h b/compat/thrust/system/detail/internal/scalar/partition.h new file mode 100644 index 0000000..7ba677e --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/partition.h @@ -0,0 +1,262 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file partition.h + * \brief Sequential implementations of partition functions. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +void iter_swap(ForwardIterator1 iter1, ForwardIterator2 iter2) +{ + // XXX this isn't correct because it doesn't use thrust::swap + using namespace thrust::detail; + + typedef typename thrust::iterator_value::type T; + + T temp = *iter1; + *iter1 = *iter2; + *iter2 = temp; +} + +template + ForwardIterator partition(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + if (first == last) + return first; + + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + while (wrapped_pred(*first)) + { + if (++first == last) + return first; + } + + ForwardIterator next = first; + + while (++next != last) + { + if (wrapped_pred(*next)) + { + iter_swap(first, next); + ++first; + } + } + + return first; +} + +template + ForwardIterator stable_partition(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + // XXX the type of exec should be: + // typedef decltype(select_system(first, last)) system; + typedef typename thrust::iterator_system::type ExecutionPolicy; + typedef typename thrust::iterator_value::type T; + + typedef thrust::detail::temporary_array TempRange; + typedef typename TempRange::iterator TempIterator; + + // XXX presumes ExecutionPolicy is default constructible + ExecutionPolicy exec; + TempRange temp(exec, first, last); + + for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter) + { + if (wrapped_pred(*iter)) + { + *first = *iter; + ++first; + } + } + + ForwardIterator middle = first; + + for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter) + { + if (!wrapped_pred(*iter)) + { + *first = *iter; + ++first; + } + } + + return middle; +} + +template + ForwardIterator stable_partition(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + // XXX the type of exec should be: + // typedef decltype(select_system(first, stencil)) system; + typedef typename thrust::iterator_system::type ExecutionPolicy; + typedef typename thrust::iterator_value::type T; + + typedef thrust::detail::temporary_array TempRange; + typedef typename TempRange::iterator TempIterator; + + // XXX presumes ExecutionPolicy is default constructible + ExecutionPolicy exec; + TempRange temp(exec, first, last); + + InputIterator stencil_iter = stencil; + for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter, ++stencil_iter) + { + if (wrapped_pred(*stencil_iter)) + { + *first = *iter; + ++first; + } + } + + ForwardIterator middle = first; + stencil_iter = stencil; + + for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter, ++stencil_iter) + { + if (!wrapped_pred(*stencil_iter)) + { + *first = *iter; + ++first; + } + } + + return middle; +} + +template + thrust::pair + stable_partition_copy(InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + for(; first != last; ++first) + { + if(wrapped_pred(*first)) + { + *out_true = *first; + ++out_true; + } // end if + else + { + *out_false = *first; + ++out_false; + } // end else + } + + return thrust::make_pair(out_true, out_false); +} + +template + thrust::pair + stable_partition_copy(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + for(; first != last; ++first, ++stencil) + { + if(wrapped_pred(*stencil)) + { + *out_true = *first; + ++out_true; + } // end if + else + { + *out_false = *first; + ++out_false; + } // end else + } + + return thrust::make_pair(out_true, out_false); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/reduce.h b/compat/thrust/system/detail/internal/scalar/reduce.h new file mode 100644 index 0000000..7ad430e --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/reduce.h @@ -0,0 +1,69 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief Sequential implementation of reduce algorithm. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputType reduce(InputIterator begin, + InputIterator end, + OutputType init, + BinaryFunction binary_op) +{ + // wrap binary_op + thrust::detail::host_function< + BinaryFunction, + OutputType + > wrapped_binary_op(binary_op); + + // initialize the result + OutputType result = init; + + while(begin != end) + { + result = wrapped_binary_op(result, *begin); + ++begin; + } // end while + + return result; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/reduce_by_key.h b/compat/thrust/system/detail/internal/scalar/reduce_by_key.h new file mode 100644 index 0000000..eeacb9d --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/reduce_by_key.h @@ -0,0 +1,103 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + thrust::pair + reduce_by_key(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_traits::value_type InputKeyType; + typedef typename thrust::iterator_traits::value_type InputValueType; + + typedef typename thrust::detail::intermediate_type_from_function_and_iterators< + InputIterator2, + OutputIterator2, + BinaryFunction + >::type TemporaryType; + + if(keys_first != keys_last) + { + InputKeyType temp_key = *keys_first; + TemporaryType temp_value = *values_first; + + for(++keys_first, ++values_first; + keys_first != keys_last; + ++keys_first, ++values_first) + { + InputKeyType key = *keys_first; + InputValueType value = *values_first; + + if (binary_pred(temp_key, key)) + { + temp_value = binary_op(temp_value, value); + } + else + { + *keys_output = temp_key; + *values_output = temp_value; + + ++keys_output; + ++values_output; + + temp_key = key; + temp_value = value; + } + } + + *keys_output = temp_key; + *values_output = temp_value; + + ++keys_output; + ++values_output; + } + + return thrust::make_pair(keys_output, values_output); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/remove.h b/compat/thrust/system/detail/internal/scalar/remove.h new file mode 100644 index 0000000..2360019 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/remove.h @@ -0,0 +1,185 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file remove.h + * \brief Sequential implementations of remove functions. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + ForwardIterator remove_if(ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + // advance iterators until wrapped_pred(*first) is true or we reach the end of input + while(first != last && !wrapped_pred(*first)) + ++first; + + if(first == last) + return first; + + // result always trails first + ForwardIterator result = first; + + ++first; + + while(first != last) + { + if(!wrapped_pred(*first)) + { + *result = *first; + ++result; + } + ++first; + } + + return result; +} + + +template + ForwardIterator remove_if(ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + // advance iterators until wrapped_pred(*stencil) is true or we reach the end of input + while(first != last && !wrapped_pred(*stencil)) + { + ++first; + ++stencil; + } + + if(first == last) + return first; + + // result always trails first + ForwardIterator result = first; + + ++first; + ++stencil; + + while(first != last) + { + if(!wrapped_pred(*stencil)) + { + *result = *first; + ++result; + } + ++first; + ++stencil; + } + + return result; +} + + +template + OutputIterator remove_copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + while (first != last) + { + if (!wrapped_pred(*first)) + { + *result = *first; + ++result; + } + + ++first; + } + + return result; +} + +template + OutputIterator remove_copy_if(InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + // wrap pred + thrust::detail::host_function< + Predicate, + bool + > wrapped_pred(pred); + + while (first != last) + { + if (!wrapped_pred(*stencil)) + { + *result = *first; + ++result; + } + + ++first; + ++stencil; + } + + return result; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/scan.h b/compat/thrust/system/detail/internal/scalar/scan.h new file mode 100644 index 0000000..8f41150 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/scan.h @@ -0,0 +1,153 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.h + * \brief Sequential implementations of scan functions. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + using namespace thrust::detail; + + typedef typename eval_if< + has_result_type::value, + result_type, + eval_if< + is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + // wrap binary_op + thrust::detail::host_function< + BinaryFunction, + ValueType + > wrapped_binary_op(binary_op); + + if(first != last) + { + ValueType sum = *first; + + *result = sum; + + for(++first, ++result; first != last; ++first, ++result) + *result = sum = wrapped_binary_op(sum,*first); + } + + return result; +} + + +template + OutputIterator exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + using namespace thrust::detail; + + typedef typename eval_if< + has_result_type::value, + result_type, + eval_if< + is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + if(first != last) + { + ValueType tmp = *first; // temporary value allows in-situ scan + ValueType sum = init; + + *result = sum; + sum = binary_op(sum, tmp); + + for(++first, ++result; first != last; ++first, ++result) + { + tmp = *first; + *result = sum; + sum = binary_op(sum, tmp); + } + } + + return result; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/scan_by_key.h b/compat/thrust/system/detail/internal/scalar/scan_by_key.h new file mode 100644 index 0000000..a31fc60 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/scan_by_key.h @@ -0,0 +1,147 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan_by_key.h + * \brief Sequential implementation of scan_by_key functions. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator inclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + using namespace thrust::detail; + + typedef typename thrust::iterator_traits::value_type KeyType; + typedef typename thrust::iterator_traits::value_type ValueType; + + // wrap binary_op + thrust::detail::host_function< + BinaryFunction, + ValueType + > wrapped_binary_op(binary_op); + + if(first1 != last1) + { + KeyType prev_key = *first1; + ValueType prev_value = *first2; + + *result = prev_value; + + for(++first1, ++first2, ++result; + first1 != last1; + ++first1, ++first2, ++result) + { + KeyType key = *first1; + + if (binary_pred(prev_key, key)) + *result = prev_value = wrapped_binary_op(prev_value,*first2); + else + *result = prev_value = *first2; + + prev_key = key; + } + } + + return result; +} + + +template + OutputIterator exclusive_scan_by_key(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + T init, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + using namespace thrust::detail; + + typedef typename thrust::iterator_traits::value_type KeyType; + typedef typename thrust::iterator_traits::value_type ValueType; + + if(first1 != last1) + { + KeyType temp_key = *first1; + ValueType temp_value = *first2; + + ValueType next = init; + + // first one is init + *result = next; + + next = binary_op(next, temp_value); + + for(++first1, ++first2, ++result; + first1 != last1; + ++first1, ++first2, ++result) + { + KeyType key = *first1; + + // use temp to permit in-place scans + temp_value = *first2; + + if (!binary_pred(temp_key, key)) + next = init; // reset sum + + *result = next; + next = binary_op(next, temp_value); + + temp_key = key; + } + } + + return result; +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/set_operations.h b/compat/thrust/system/detail/internal/scalar/set_operations.h new file mode 100644 index 0000000..f85b510 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/set_operations.h @@ -0,0 +1,208 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file set_operations.h + * \brief Sequential implementation of set operation functions. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + while(first1 != last1 && first2 != last2) + { + if(wrapped_comp(*first1,*first2)) + { + *result = *first1; + ++first1; + ++result; + } // end if + else if(wrapped_comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + ++first1; + ++first2; + } // end else + } // end while + + return scalar::copy(first1, last1, result); +} // end set_difference() + + +template + OutputIterator set_intersection(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + while(first1 != last1 && first2 != last2) + { + if(wrapped_comp(*first1,*first2)) + { + ++first1; + } // end if + else if(wrapped_comp(*first2,*first1)) + { + ++first2; + } // end else if + else + { + *result = *first1; + ++first1; + ++first2; + ++result; + } // end else + } // end while + + return result; +} // end set_intersection() + + +template + OutputIterator set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + while(first1 != last1 && first2 != last2) + { + if(wrapped_comp(*first1,*first2)) + { + *result = *first1; + ++first1; + ++result; + } // end if + else if(wrapped_comp(*first2,*first1)) + { + *result = *first2; + ++first2; + ++result; + } // end else if + else + { + ++first1; + ++first2; + } // end else + } // end while + + return scalar::copy(first2, last2, scalar::copy(first1, last1, result)); +} // end set_symmetric_difference() + + +template + OutputIterator set_union(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // wrap comp + thrust::detail::host_function< + StrictWeakOrdering, + bool + > wrapped_comp(comp); + + while(first1 != last1 && first2 != last2) + { + if(wrapped_comp(*first1,*first2)) + { + *result = *first1; + ++first1; + } // end if + else if(wrapped_comp(*first2,*first1)) + { + *result = *first2; + ++first2; + } // end else if + else + { + *result = *first1; + ++first1; + ++first2; + } // end else + + ++result; + } // end while + + return scalar::copy(first2, last2, scalar::copy(first1, last1, result)); +} // end set_union() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/sort.h b/compat/thrust/system/detail/internal/scalar/sort.h new file mode 100644 index 0000000..9e465c8 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/sort.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file sort.h + * \brief Sequential implementations of sort algorithms. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + +template +void stable_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp); + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/sort.inl b/compat/thrust/system/detail/internal/scalar/sort.inl new file mode 100644 index 0000000..c6ed273 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/sort.inl @@ -0,0 +1,161 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ +namespace sort_detail +{ + +//////////////////// +// Primitive Sort // +//////////////////// + +template +void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp, + thrust::detail::true_type) +{ + thrust::system::detail::internal::scalar::stable_primitive_sort(first, last); + + // if comp is greater then reverse the keys + typedef typename thrust::iterator_traits::value_type KeyType; + const static bool reverse = thrust::detail::is_same >::value; + + if (reverse) + thrust::reverse(first, last); +} + +template +void stable_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp, + thrust::detail::true_type) +{ + // if comp is greater then reverse the keys and values + typedef typename thrust::iterator_traits::value_type KeyType; + const static bool reverse = thrust::detail::is_same >::value; + + // note, we also have to reverse the (unordered) input to preserve stability + if (reverse) + { + thrust::reverse(first1, last1); + thrust::reverse(first2, first2 + (last1 - first1)); + } + + thrust::system::detail::internal::scalar::stable_primitive_sort_by_key(first1, last1, first2); + + if (reverse) + { + thrust::reverse(first1, last1); + thrust::reverse(first2, first2 + (last1 - first1)); + } +} + +//////////////// +// Merge Sort // +//////////////// + +template +void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp, + thrust::detail::false_type) +{ + thrust::system::detail::internal::scalar::stable_merge_sort(first, last, comp); +} + +template +void stable_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp, + thrust::detail::false_type) +{ + thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, last1, first2, comp); +} + + +} // end namespace sort_detail + +template +void stable_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_traits::value_type KeyType; + static const bool use_primitive_sort = thrust::detail::is_arithmetic::value && + (thrust::detail::is_same >::value || + thrust::detail::is_same >::value); + + // supress unused variable warning + (void) use_primitive_sort; + + thrust::system::detail::internal::scalar::sort_detail::stable_sort + (first, last, comp, + thrust::detail::integral_constant()); +} + +template +void stable_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_traits::value_type KeyType; + static const bool use_primitive_sort = thrust::detail::is_arithmetic::value && + (thrust::detail::is_same >::value || + thrust::detail::is_same >::value); + + // supress unused variable warning + (void) use_primitive_sort; + + thrust::system::detail::internal::scalar::sort_detail::stable_sort_by_key + (first1, last1, first2, comp, + thrust::detail::integral_constant()); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h new file mode 100644 index 0000000..f68242c --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file stable_merge_sort.h + * \brief Sequential implementation of merge sort. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace cpp +{ +namespace detail +{ + +template +void stable_merge_sort(RandomAccessIterator begin, + RandomAccessIterator end, + StrictWeakOrdering comp); + +template +void stable_merge_sort_by_key(RandomAccessIterator1 keys_begin, + RandomAccessIterator1 keys_end, + RandomAccessIterator2 values_begin, + StrictWeakOrdering comp); + +} // end namespace detail +} // end namespace cpp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl new file mode 100644 index 0000000..41d320c --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl @@ -0,0 +1,150 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ +namespace detail +{ + +template +void inplace_merge(RandomAccessIterator first, + RandomAccessIterator middle, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // XXX the type of exec should be: + // typedef decltype(select_system(first, middle, last)) DerivedPolicy; + typedef typename thrust::iterator_system::type DerivedPolicy; + typedef typename thrust::iterator_value::type value_type; + + // XXX assumes DerivedPolicy is default constructible + // XXX find a way to get a stateful execution policy into this function + // or simply pass scratch space + DerivedPolicy exec; + thrust::detail::temporary_array a(exec, first, middle); + thrust::detail::temporary_array b(exec, middle, last); + + thrust::system::detail::internal::scalar::merge(a.begin(), a.end(), b.begin(), b.end(), first, comp); +} + +template +void inplace_merge_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 middle1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp) +{ + // XXX the type of exec should be: + // typedef decltype(select_system(first1, middle1, last1, first2)) DerivedPolicy; + typedef typename thrust::iterator_system::type DerivedPolicy; + typedef typename thrust::iterator_value::type value_type1; + typedef typename thrust::iterator_value::type value_type2; + + RandomAccessIterator2 middle2 = first2 + (middle1 - first1); + RandomAccessIterator2 last2 = first2 + (last1 - first1); + + // XXX assumes DerivedPolicy is default constructible + // XXX find a way to get a stateful exec into this function + // or simply pass scratch space + DerivedPolicy exec; + thrust::detail::temporary_array lhs1(exec, first1, middle1); + thrust::detail::temporary_array rhs1(exec, middle1, last1); + thrust::detail::temporary_array lhs2(exec, first2, middle2); + thrust::detail::temporary_array rhs2(exec, middle2, last2); + + thrust::system::detail::internal::scalar::merge_by_key + (lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(), + lhs2.begin(), rhs2.begin(), + first1, first2, comp); +} + +} // end namespace detail + +////////////// +// Key Sort // +////////////// + +template +void stable_merge_sort(RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + if (last - first < 32) + { + thrust::system::detail::internal::scalar::insertion_sort(first, last, comp); + } + else + { + RandomAccessIterator middle = first + (last - first) / 2; + + thrust::system::detail::internal::scalar::stable_merge_sort(first, middle, comp); + thrust::system::detail::internal::scalar::stable_merge_sort(middle, last, comp); + detail::inplace_merge(first, middle, last, comp); + } +} + + +//////////////////// +// Key-Value Sort // +//////////////////// + +template +void stable_merge_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp) +{ + if (last1 - first1 <= 32) + { + thrust::system::detail::internal::scalar::insertion_sort_by_key(first1, last1, first2, comp); + } + else + { + RandomAccessIterator1 middle1 = first1 + (last1 - first1) / 2; + RandomAccessIterator2 middle2 = first2 + (last1 - first1) / 2; + + thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, middle1, first2, comp); + thrust::system::detail::internal::scalar::stable_merge_sort_by_key(middle1, last1, middle2, comp); + detail::inplace_merge_by_key(first1, middle1, last1, first2, comp); + } +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h new file mode 100644 index 0000000..f37bf27 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h @@ -0,0 +1,49 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +void stable_primitive_sort(RandomAccessIterator first, + RandomAccessIterator last); + +template +void stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first); + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl new file mode 100644 index 0000000..c22b15c --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl @@ -0,0 +1,142 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ +namespace stable_primitive_sort_detail +{ + + +template + struct enable_if_bool_sort + : thrust::detail::enable_if< + thrust::detail::is_same< + bool, + typename thrust::iterator_value::type + >::value + > +{}; + + +template + struct disable_if_bool_sort + : thrust::detail::disable_if< + thrust::detail::is_same< + bool, + typename thrust::iterator_value::type + >::value + > +{}; + + + +template + typename enable_if_bool_sort::type + stable_primitive_sort(RandomAccessIterator first, RandomAccessIterator last) +{ + // use stable_partition if we're sorting bool + // stable_partition puts true values first, so we need to logical_not + scalar::stable_partition(first, last, thrust::logical_not()); +} + + +template + typename disable_if_bool_sort::type + stable_primitive_sort(RandomAccessIterator first, RandomAccessIterator last) +{ + // call stable_radix_sort + scalar::stable_radix_sort(first,last); +} + + +struct logical_not_first +{ + template + __host__ __device__ + bool operator()(Tuple t) + { + return !thrust::get<0>(t); + } +}; + + +template + typename enable_if_bool_sort::type + stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + // use stable_partition if we're sorting bool + // stable_partition puts true values first, so we need to logical_not + scalar::stable_partition(thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)), + thrust::make_zip_iterator(thrust::make_tuple(keys_last, values_first)), + logical_not_first()); +} + + +template + typename disable_if_bool_sort::type + stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + // call stable_radix_sort_by_key + scalar::stable_radix_sort_by_key(keys_first, keys_last, values_first); +} + + +} + +template +void stable_primitive_sort(RandomAccessIterator first, + RandomAccessIterator last) +{ + scalar::stable_primitive_sort_detail::stable_primitive_sort(first,last); +} + +template +void stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first) +{ + scalar::stable_primitive_sort_detail::stable_primitive_sort_by_key(keys_first, keys_last, values_first); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h new file mode 100644 index 0000000..f2af222 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file stable_radix_sort.h + * \brief Sequential implementation of radix sort. + */ + +#pragma once + +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template +void stable_radix_sort(RandomAccessIterator begin, + RandomAccessIterator end); + +template +void stable_radix_sort_by_key(RandomAccessIterator1 keys_begin, + RandomAccessIterator1 keys_end, + RandomAccessIterator2 values_begin); + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl new file mode 100644 index 0000000..98846ab --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl @@ -0,0 +1,434 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ +namespace detail +{ + +template +struct RadixEncoder : public thrust::identity +{}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + unsigned char operator()(char x) const + { + if(std::numeric_limits::is_signed) + return x ^ static_cast(1) << (8 * sizeof(unsigned char) - 1); + else + return x; + } +}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + unsigned char operator()(signed char x) const + { + return x ^ static_cast(1) << (8 * sizeof(unsigned char) - 1); + } +}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + unsigned short operator()(short x) const + { + return x ^ static_cast(1) << (8 * sizeof(unsigned short) - 1); + } +}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + unsigned long operator()(long x) const + { + return x ^ static_cast(1) << (8 * sizeof(unsigned int) - 1); + } +}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + unsigned long operator()(long x) const + { + return x ^ static_cast(1) << (8 * sizeof(unsigned long) - 1); + } +}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + unsigned long long operator()(long long x) const + { + return x ^ static_cast(1) << (8 * sizeof(unsigned long long) - 1); + } +}; + +// ideally we'd use uint32 here and uint64 below +template <> +struct RadixEncoder : public thrust::unary_function +{ + thrust::detail::uint32_t operator()(float x) const + { + union { float f; thrust::detail::uint32_t i; } u; + u.f = x; + thrust::detail::uint32_t mask = -static_cast(u.i >> 31) | (static_cast(1) << 31); + return u.i ^ mask; + } +}; + +template <> +struct RadixEncoder : public thrust::unary_function +{ + thrust::detail::uint64_t operator()(double x) const + { + union { double f; thrust::detail::uint64_t i; } u; + u.f = x; + thrust::detail::uint64_t mask = -static_cast(u.i >> 63) | (static_cast(1) << 63); + return u.i ^ mask; + } +}; + + +template +void radix_sort(RandomAccessIterator1 keys1, + RandomAccessIterator2 keys2, + RandomAccessIterator3 vals1, + RandomAccessIterator4 vals2, + const size_t N) +{ + typedef typename thrust::iterator_value::type KeyType; + + typedef RadixEncoder Encoder; + typedef typename Encoder::result_type EncodedType; + + static const unsigned int NumHistograms = (8 * sizeof(EncodedType) + (RadixBits - 1)) / RadixBits; + static const unsigned int HistogramSize = 1 << RadixBits; + + static const EncodedType BitMask = static_cast((1 << RadixBits) - 1); + + Encoder encode; + + // storage for histograms + size_t histograms[NumHistograms][HistogramSize] = {{0}}; + + // see which passes can be eliminated + bool skip_shuffle[NumHistograms] = {false}; + + // false if most recent data is stored in (keys1,vals1) + bool flip = false; + + // compute histograms + for (size_t i = 0; i < N; i++) + { + const EncodedType x = encode(keys1[i]); + + for (unsigned int j = 0; j < NumHistograms; j++) + { + const EncodedType BitShift = RadixBits * j; + histograms[j][(x >> BitShift) & BitMask]++; + } + } + + // scan histograms + for (unsigned int i = 0; i < NumHistograms; i++) + { + size_t sum = 0; + + for (unsigned int j = 0; j < HistogramSize; j++) + { + size_t bin = histograms[i][j]; + + if (bin == N) + skip_shuffle[i] = true; + + histograms[i][j] = sum; + + sum = sum + bin; + } + } + + // shuffle keys and (optionally) values + for (unsigned int i = 0; i < NumHistograms; i++) + { + const EncodedType BitShift = static_cast(RadixBits * i); + + if (!skip_shuffle[i]) + { + if (flip) + { + for (size_t j = 0; j < N; j++) + { + const EncodedType x = encode(keys2[j]); + size_t position = histograms[i][(x >> BitShift) & BitMask]++; + + RandomAccessIterator1 temp_keys1 = keys1; + temp_keys1 += position; + + RandomAccessIterator2 temp_keys2 = keys2; + temp_keys2 += j; + + // keys1[position] = keys2[j] + *temp_keys1 = *temp_keys2; + + if (HasValues) + { + RandomAccessIterator3 temp_vals1 = vals1; + temp_vals1 += position; + + RandomAccessIterator4 temp_vals2 = vals2; + temp_vals2 += j; + + // vals1[position] = vals2[j] + *temp_vals1 = *temp_vals2; + } + } + } + else + { + for (size_t j = 0; j < N; j++) + { + const EncodedType x = encode(keys1[j]); + size_t position = histograms[i][(x >> BitShift) & BitMask]++; + + RandomAccessIterator1 temp_keys1 = keys1; + temp_keys1 += j; + + RandomAccessIterator2 temp_keys2 = keys2; + temp_keys2 += position; + + // keys2[position] = keys1[j]; + *temp_keys2 = *temp_keys1; + + if (HasValues) + { + RandomAccessIterator3 temp_vals1 = vals1; + temp_vals1 += j; + + RandomAccessIterator4 temp_vals2 = vals2; + temp_vals2 += position; + + // vals2[position] = vals1[j] + *temp_vals2 = *temp_vals1; + } + } + } + + flip = (flip) ? false : true; + } + } + + // ensure final values are in (keys1,vals1) + if (flip) + { + thrust::copy(keys2, keys2 + N, keys1); + if (HasValues) + thrust::copy(vals2, vals2 + N, vals1); + } +} + + +// Select best radix sort parameters based on sizeof(T) and input size +// These particular values were determined through empirical testing on a Core i7 950 CPU +template +struct radix_sort_dispatcher +{ +}; + +template <> +struct radix_sort_dispatcher<1> +{ + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) + { + detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); + } + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) + { + detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); + } +}; + +template <> +struct radix_sort_dispatcher<2> +{ + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) + { + if (N < (1 << 16)) + detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); + else + detail::radix_sort<16,false>(keys1, keys2, static_cast(0), static_cast(0), N); + } + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) + { + if (N < (1 << 15)) + detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); + else + detail::radix_sort<16,true>(keys1, keys2, vals1, vals2, N); + } +}; + +template <> +struct radix_sort_dispatcher<4> +{ + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) + { + if (N < (1 << 22)) + detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); + else + detail::radix_sort<4,false>(keys1, keys2, static_cast(0), static_cast(0), N); + } + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) + { + if (N < (1 << 22)) + detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); + else + detail::radix_sort<3,true>(keys1, keys2, vals1, vals2, N); + } +}; + +template <> +struct radix_sort_dispatcher<8> +{ + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) + { + if (N < (1 << 21)) + detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); + else + detail::radix_sort<4,false>(keys1, keys2, static_cast(0), static_cast(0), N); + } + template + void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) + { + if (N < (1 << 21)) + detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); + else + detail::radix_sort<3,true>(keys1, keys2, vals1, vals2, N); + } +}; + +template +void radix_sort(RandomAccessIterator1 keys1, + RandomAccessIterator2 keys2, + const size_t N) +{ + typedef typename thrust::iterator_value::type KeyType; + radix_sort_dispatcher()(keys1, keys2, N); +} + +template +void radix_sort(RandomAccessIterator1 keys1, + RandomAccessIterator2 keys2, + RandomAccessIterator3 vals1, + RandomAccessIterator4 vals2, + const size_t N) +{ + typedef typename thrust::iterator_value::type KeyType; + radix_sort_dispatcher()(keys1, keys2, vals1, vals2, N); +} + +} // namespace detail + +////////////// +// Key Sort // +////////////// + +template +void stable_radix_sort(RandomAccessIterator first, + RandomAccessIterator last) +{ + typedef typename thrust::iterator_system::type ExecutionPolicy; + typedef typename thrust::iterator_value::type KeyType; + + size_t N = last - first; + + // XXX assumes ExecutionPolicy is default constructible + // XXX consider how to get stateful systems into this function + ExecutionPolicy exec; + thrust::detail::temporary_array temp(exec, N); + + detail::radix_sort(first, temp.begin(), N); +} + + +//////////////////// +// Key-Value Sort // +//////////////////// + +template +void stable_radix_sort_by_key(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2) +{ + // XXX the type of exec should be + // typedef decltype(select_system(first1,last1,first2)) system; + typedef typename thrust::iterator_system::type ExecutionPolicy; + typedef typename thrust::iterator_value::type KeyType; + typedef typename thrust::iterator_value::type ValueType; + + size_t N = last1 - first1; + + // XXX assumes ExecutionPolicy is default constructible + // XXX consider how to get stateful systems into this function + ExecutionPolicy exec; + thrust::detail::temporary_array temp1(exec, N); + thrust::detail::temporary_array temp2(exec, N); + + detail::radix_sort(first1, temp1.begin(), first2, temp2.begin(), N); +} + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/trivial_copy.h b/compat/thrust/system/detail/internal/scalar/trivial_copy.h new file mode 100644 index 0000000..8f008b5 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/trivial_copy.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file trivial_copy.h + * \brief Sequential copy algorithms for plain-old-data. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + T *trivial_copy_n(const T *first, + std::ptrdiff_t n, + T *result) +{ + std::memmove(result, first, n * sizeof(T)); + return result + n; +} // end trivial_copy_n() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/unique.h b/compat/thrust/system/detail/internal/scalar/unique.h new file mode 100644 index 0000000..cfc60c9 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/unique.h @@ -0,0 +1,90 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file unique.h + * \brief Sequential implementations of unique algorithms. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::value_type T; + + if(first != last) + { + T prev = *first; + + for(++first; first != last; ++first) + { + T temp = *first; + + if (!binary_pred(prev, temp)) + { + *output = prev; + + ++output; + + prev = temp; + } + } + + *output = prev; + ++output; + } + + return output; +} // end unique_copy() + + +template + ForwardIterator unique(ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + // unique_copy() permits in-situ operation + return thrust::system::detail::internal::scalar::unique_copy(first, last, first, binary_pred); +} // end unique() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/internal/scalar/unique_by_key.h b/compat/thrust/system/detail/internal/scalar/unique_by_key.h new file mode 100644 index 0000000..b0be266 --- /dev/null +++ b/compat/thrust/system/detail/internal/scalar/unique_by_key.h @@ -0,0 +1,109 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file unique_by_key.h + * \brief Sequential implementations of unique_by_key algorithms. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace detail +{ +namespace internal +{ +namespace scalar +{ + +template + thrust::pair + unique_by_key_copy(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + typedef typename thrust::iterator_traits::value_type InputKeyType; + typedef typename thrust::iterator_traits::value_type OutputValueType; + + if(keys_first != keys_last) + { + InputKeyType temp_key = *keys_first; + OutputValueType temp_value = *values_first; + + for(++keys_first, ++values_first; + keys_first != keys_last; + ++keys_first, ++values_first) + { + InputKeyType key = *keys_first; + OutputValueType value = *values_first; + + if(!binary_pred(temp_key, key)) + { + *keys_output = temp_key; + *values_output = temp_value; + + ++keys_output; + ++values_output; + + temp_key = key; + temp_value = value; + } + } + + *keys_output = temp_key; + *values_output = temp_value; + + ++keys_output; + ++values_output; + } + + return thrust::make_pair(keys_output, values_output); +} // end unique_by_key_copy() + + +template + thrust::pair + unique_by_key(ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + // unique_by_key_copy() permits in-situ operation + return thrust::system::detail::internal::scalar::unique_by_key_copy(keys_first, keys_last, values_first, keys_first, values_first, binary_pred); +} // end unique_by_key() + +} // end namespace scalar +} // end namespace internal +} // end namespace detail +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/detail/system_error.inl b/compat/thrust/system/detail/system_error.inl new file mode 100644 index 0000000..74909be --- /dev/null +++ b/compat/thrust/system/detail/system_error.inl @@ -0,0 +1,111 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace thrust +{ + +namespace system +{ + + +system_error + ::system_error(error_code ec, const std::string &what_arg) + : std::runtime_error(what_arg), m_error_code(ec) +{ + +} // end system_error::system_error() + + +system_error + ::system_error(error_code ec, const char *what_arg) + : std::runtime_error(what_arg), m_error_code(ec) +{ + ; +} // end system_error::system_error() + + +system_error + ::system_error(error_code ec) + : std::runtime_error(""), m_error_code(ec) +{ + ; +} // end system_error::system_error() + + +system_error + ::system_error(int ev, const error_category &ecat, const std::string &what_arg) + : std::runtime_error(what_arg), m_error_code(ev,ecat) +{ + ; +} // end system_error::system_error() + + +system_error + ::system_error(int ev, const error_category &ecat, const char *what_arg) + : std::runtime_error(what_arg), m_error_code(ev,ecat) +{ + ; +} // end system_error::system_error() + + +system_error + ::system_error(int ev, const error_category &ecat) + : std::runtime_error(""), m_error_code(ev,ecat) +{ + ; +} // end system_error::system_error() + + +const error_code &system_error + ::code(void) const throw() +{ + return m_error_code; +} // end system_error::code() + + +const char *system_error + ::what(void) const throw() +{ + if(m_what.empty()) + { + try + { + m_what = this->std::runtime_error::what(); + if(m_error_code) + { + if(!m_what.empty()) m_what += ": "; + m_what += m_error_code.message(); + } + } + catch(...) + { + return std::runtime_error::what(); + } + } + + return m_what.c_str(); +} // end system_error::what() + + +} // end system + +} // end thrust + diff --git a/compat/thrust/system/error_code.h b/compat/thrust/system/error_code.h new file mode 100644 index 0000000..2b6582c --- /dev/null +++ b/compat/thrust/system/error_code.h @@ -0,0 +1,521 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file error_code.h + * \brief An object used to hold error values, such as those originating from the + * operating system or other low-level application program interfaces. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +namespace system +{ + + +/*! \addtogroup system_diagnostics + * \{ + */ + +class error_condition; +class error_code; + +/*! A metafunction returning whether or not the parameter is an \p error_code enum. + */ +template struct is_error_code_enum : public thrust::detail::false_type {}; + +/*! A metafunction returning whether or not the parameter is an \p error_condition enum. + */ +template struct is_error_condition_enum : public thrust::detail::false_type {}; + + +// XXX N3092 prefers enum class errc { ... } +namespace errc +{ + +enum errc_t +{ + address_family_not_supported = detail::eafnosupport, + address_in_use = detail::eaddrinuse, + address_not_available = detail::eaddrnotavail, + already_connected = detail::eisconn, + argument_list_too_long = detail::e2big, + argument_out_of_domain = detail::edom, + bad_address = detail::efault, + bad_file_descriptor = detail::ebadf, + bad_message = detail::ebadmsg, + broken_pipe = detail::epipe, + connection_aborted = detail::econnaborted, + connection_already_in_progress = detail::ealready, + connection_refused = detail::econnrefused, + connection_reset = detail::econnreset, + cross_device_link = detail::exdev, + destination_address_required = detail::edestaddrreq, + device_or_resource_busy = detail::ebusy, + directory_not_empty = detail::enotempty, + executable_format_error = detail::enoexec, + file_exists = detail::eexist, + file_too_large = detail::efbig, + filename_too_long = detail::enametoolong, + function_not_supported = detail::enosys, + host_unreachable = detail::ehostunreach, + identifier_removed = detail::eidrm, + illegal_byte_sequence = detail::eilseq, + inappropriate_io_control_operation = detail::enotty, + interrupted = detail::eintr, + invalid_argument = detail::einval, + invalid_seek = detail::espipe, + io_error = detail::eio, + is_a_directory = detail::eisdir, + message_size = detail::emsgsize, + network_down = detail::enetdown, + network_reset = detail::enetreset, + network_unreachable = detail::enetunreach, + no_buffer_space = detail::enobufs, + no_child_process = detail::echild, + no_link = detail::enolink, + no_lock_available = detail::enolck, + no_message_available = detail::enodata, + no_message = detail::enomsg, + no_protocol_option = detail::enoprotoopt, + no_space_on_device = detail::enospc, + no_stream_resources = detail::enosr, + no_such_device_or_address = detail::enxio, + no_such_device = detail::enodev, + no_such_file_or_directory = detail::enoent, + no_such_process = detail::esrch, + not_a_directory = detail::enotdir, + not_a_socket = detail::enotsock, + not_a_stream = detail::enostr, + not_connected = detail::enotconn, + not_enough_memory = detail::enomem, + not_supported = detail::enotsup, + operation_canceled = detail::ecanceled, + operation_in_progress = detail::einprogress, + operation_not_permitted = detail::eperm, + operation_not_supported = detail::eopnotsupp, + operation_would_block = detail::ewouldblock, + owner_dead = detail::eownerdead, + permission_denied = detail::eacces, + protocol_error = detail::eproto, + protocol_not_supported = detail::eprotonosupport, + read_only_file_system = detail::erofs, + resource_deadlock_would_occur = detail::edeadlk, + resource_unavailable_try_again = detail::eagain, + result_out_of_range = detail::erange, + state_not_recoverable = detail::enotrecoverable, + stream_timeout = detail::etime, + text_file_busy = detail::etxtbsy, + timed_out = detail::etimedout, + too_many_files_open_in_system = detail::enfile, + too_many_files_open = detail::emfile, + too_many_links = detail::emlink, + too_many_symbolic_link_levels = detail::eloop, + value_too_large = detail::eoverflow, + wrong_protocol_type = detail::eprototype +}; // end errc_t + +} // end namespace errc + + +/*! Specialization of \p is_error_condition_enum for \p errc::errc_t + */ +template<> struct is_error_condition_enum : public thrust::detail::true_type {}; + + +// [19.5.1.1] class error_category + +/*! \brief The class \p error_category serves as a base class for types used to identify the + * source and encoding of a particular category of error code. Classes may be derived + * from \p error_category to support categories of errors in addition to those defined + * in the C++ International Standard. + */ +class error_category +{ + public: + /*! Destructor does nothing. + */ + inline virtual ~error_category(void); + + // XXX enable upon c++0x + // error_category(const error_category &) = delete; + // error_category &operator=(const error_category &) = delete; + + /*! \return A string naming the error category. + */ + inline virtual const char *name(void) const = 0; + + /*! \return \p error_condition(ev, *this). + */ + inline virtual error_condition default_error_condition(int ev) const; + + /*! \return default_error_condition(code) == condition + */ + inline virtual bool equivalent(int code, const error_condition &condition) const; + + /*! \return *this == code.category() && code.value() == condition + */ + inline virtual bool equivalent(const error_code &code, int condition) const; + + /*! \return A string that describes the error condition denoted by \p ev. + */ + virtual std::string message(int ev) const = 0; + + /*! \return *this == &rhs + */ + inline bool operator==(const error_category &rhs) const; + + /*! \return !(*this == rhs) + */ + inline bool operator!=(const error_category &rhs) const; + + /*! \return less()(this, &rhs) + * \note \c less provides a total ordering for pointers. + */ + inline bool operator<(const error_category &rhs) const; +}; // end error_category + + +// [19.5.1.5] error_category objects + + +/*! \return A reference to an object of a type derived from class \p error_category. + * \note The object's \p default_error_condition and \p equivalent virtual functions + * shall behave as specified for the class \p error_category. The object's + * \p name virtual function shall return a pointer to the string "generic". + */ +inline const error_category &generic_category(void); + + +/*! \return A reference to an object of a type derived from class \p error_category. + * \note The object's \p equivalent virtual functions shall behave as specified for + * class \p error_category. The object's \p name virtual function shall return + * a pointer to the string "system". The object's \p default_error_condition + * virtual function shall behave as follows: + * + * If the argument ev corresponds to a POSIX errno value + * \c posv, the function shall return error_condition(ev,generic_category()). + * Otherwise, the function shall return error_condition(ev,system_category()). + * What constitutes correspondence for any given operating system is unspecified. + */ +inline const error_category &system_category(void); + + +// [19.5.2] Class error_code + + +/*! \brief The class \p error_code describes an object used to hold error code values, such as + * those originating from the operating system or other low-level application program + * interfaces. + */ +class error_code +{ + public: + // [19.5.2.2] constructors: + + /*! Effects: Constructs an object of type \p error_code. + * \post value() == 0 and category() == &system_category(). + */ + inline error_code(void); + + /*! Effects: Constructs an object of type \p error_code. + * \post value() == val and category() == &cat. + */ + inline error_code(int val, const error_category &cat); + + /*! Effects: Constructs an object of type \p error_code. + * \post *this == make_error_code(e). + */ + template + error_code(ErrorCodeEnum e +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + , typename thrust::detail::enable_if::value>::type * = 0 +#endif // THRUST_HOST_COMPILER_MSVC + ); + + // [19.5.2.3] modifiers: + + /*! \post value() == val and category() == &cat. + */ + inline void assign(int val, const error_category &cat); + + /*! \post *this == make_error_code(e). + */ + template +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + typename thrust::detail::enable_if::value, error_code>::type & +#else + error_code & +#endif // THRUST_HOST_COMPILER_MSVC + operator=(ErrorCodeEnum e); + + /*! \post value() == 0 and category() == system_category(). + */ + inline void clear(void); + + // [19.5.2.4] observers: + + /*! \return An integral value of this \p error_code object. + */ + inline int value(void) const; + + /*! \return An \p error_category describing the category of this \p error_code object. + */ + inline const error_category &category(void) const; + + /*! \return category().default_error_condition(). + */ + inline error_condition default_error_condition(void) const; + + /*! \return category().message(value()). + */ + inline std::string message(void) const; + + // XXX replace the below upon c++0x + // inline explicit operator bool (void) const; + + /*! \return value() != 0. + */ + inline operator bool (void) const; + + /*! \cond + */ + private: + int m_val; + const error_category *m_cat; + /*! \endcond + */ +}; // end error_code + + +// [19.5.2.5] Class error_code non-member functions + + +// XXX replace errc::errc_t with errc upon c++0x +/*! \return error_code(static_cast(e), generic_category()) + */ +inline error_code make_error_code(errc::errc_t e); + + +/*! \return lhs.category() < rhs.category() || lhs.category() == rhs.category() && lhs.value() < rhs.value(). + */ +inline bool operator<(const error_code &lhs, const error_code &rhs); + + +/*! Effects: os << ec.category().name() << ':' << ec.value(). + */ +template + std::basic_ostream& + operator<<(std::basic_ostream& os, const error_code &ec); + + +// [19.5.3] class error_condition + + +/*! \brief The class \p error_condition describes an object used to hold values identifying + * error conditions. + * + * \note \p error_condition values are portable abstractions, while \p error_code values + * are implementation specific. + */ +class error_condition +{ + public: + // [19.5.3.2] constructors + + /*! Constructs an object of type \p error_condition. + * \post value() == 0. + * \post category() == generic_category(). + */ + inline error_condition(void); + + /*! Constructs an object of type \p error_condition. + * \post value() == val. + * \post category() == cat. + */ + inline error_condition(int val, const error_category &cat); + + /*! Constructs an object of type \p error_condition. + * \post *this == make_error_condition(e). + * \note This constructor shall not participate in overload resolution unless + * is_error_condition_enum::value is true. + */ + template + error_condition(ErrorConditionEnum e +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + , typename thrust::detail::enable_if::value>::type * = 0 +#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + ); + + // [19.5.3.3] modifiers + + /*! Assigns to this \p error_code object from an error value and an \p error_category. + * \param val The new value to return from value(). + * \param cat The new \p error_category to return from category(). + * \post value() == val. + * \post category() == cat. + */ + inline void assign(int val, const error_category &cat); + + /*! Assigns to this \p error_code object from an error condition enumeration. + * \return *this + * \post *this == make_error_condition(e). + * \note This operator shall not participate in overload resolution unless + * is_error_condition_enum::value is true. + */ + template +// XXX WAR msvc's problem with enable_if +#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + typename thrust::detail::enable_if::value, error_condition>::type & +#else + error_condition & +#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC + operator=(ErrorConditionEnum e); + + /*! Clears this \p error_code object. + * \post value == 0 + * \post category() == generic_category(). + */ + inline void clear(void); + + // [19.5.3.4] observers + + /*! \return The value encoded by this \p error_condition. + */ + inline int value(void) const; + + /*! \return A const reference to the \p error_category encoded by this \p error_condition. + */ + inline const error_category &category(void) const; + + /*! \return category().message(value()). + */ + inline std::string message(void) const; + + // XXX replace below with this upon c++0x + //explicit operator bool (void) const; + + /*! \return value() != 0. + */ + inline operator bool (void) const; + + /*! \cond + */ + + private: + int m_val; + const error_category *m_cat; + + /*! \endcond + */ +}; // end error_condition + + + +// [19.5.3.5] Class error_condition non-member functions + +// XXX replace errc::errc_t with errc upon c++0x +/*! \return error_condition(static_cast(e), generic_category()). + */ +inline error_condition make_error_condition(errc::errc_t e); + + +/*! \return lhs.category() < rhs.category() || lhs.category() == rhs.category() && lhs.value() < rhs.value(). + */ +inline bool operator<(const error_condition &lhs, const error_condition &rhs); + + +// [19.5.4] Comparison operators + + +/*! \return lhs.category() == rhs.category() && lhs.value() == rhs.value(). + */ +inline bool operator==(const error_code &lhs, const error_code &rhs); + + +/*! \return lhs.category().equivalent(lhs.value(), rhs) || rhs.category().equivalent(lhs,rhs.value()). + */ +inline bool operator==(const error_code &lhs, const error_condition &rhs); + + +/*! \return rhs.category().equivalent(lhs.value(), lhs) || lhs.category().equivalent(rhs, lhs.value()). + */ +inline bool operator==(const error_condition &lhs, const error_code &rhs); + + +/*! \return lhs.category() == rhs.category() && lhs.value() == rhs.value() + */ +inline bool operator==(const error_condition &lhs, const error_condition &rhs); + + +/*! \return !(lhs == rhs) + */ +inline bool operator!=(const error_code &lhs, const error_code &rhs); + + +/*! \return !(lhs == rhs) + */ +inline bool operator!=(const error_code &lhs, const error_condition &rhs); + + +/*! \return !(lhs == rhs) + */ +inline bool operator!=(const error_condition &lhs, const error_code &rhs); + + +/*! \return !(lhs == rhs) + */ +inline bool operator!=(const error_condition &lhs, const error_condition &rhs); + +/*! \} // end system_diagnostics + */ + + +} // end system + + +// import names into thrust:: +using system::error_category; +using system::error_code; +using system::error_condition; +using system::is_error_code_enum; +using system::is_error_condition_enum; +using system::make_error_code; +using system::make_error_condition; + +// XXX replace with using system::errc upon c++0x +namespace errc = system::errc; + +using system::generic_category; +using system::system_category; + +} // end thrust + +#include +#include +#include + diff --git a/compat/thrust/system/omp/detail/adjacent_difference.h b/compat/thrust/system/omp/detail/adjacent_difference.h new file mode 100644 index 0000000..0bbc188 --- /dev/null +++ b/compat/thrust/system/omp/detail/adjacent_difference.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template + OutputIterator adjacent_difference(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + // omp prefers generic::adjacent_difference to cpp::adjacent_difference + return thrust::system::detail::generic::adjacent_difference(exec, first, last, result, binary_op); +} // end adjacent_difference() + +} // end detail +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/detail/assign_value.h b/compat/thrust/system/omp/detail/assign_value.h new file mode 100644 index 0000000..eda3b97 --- /dev/null +++ b/compat/thrust/system/omp/detail/assign_value.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits assign_value +#include + diff --git a/compat/thrust/system/omp/detail/binary_search.h b/compat/thrust/system/omp/detail/binary_search.h new file mode 100644 index 0000000..254e6fd --- /dev/null +++ b/compat/thrust/system/omp/detail/binary_search.h @@ -0,0 +1,73 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template +ForwardIterator lower_bound(execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp) +{ + // omp prefers generic::lower_bound to cpp::lower_bound + return thrust::system::detail::generic::lower_bound(exec, begin, end, value, comp); +} + + +template +ForwardIterator upper_bound(execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp) +{ + // omp prefers generic::upper_bound to cpp::upper_bound + return thrust::system::detail::generic::upper_bound(exec, begin, end, value, comp); +} + + +template +bool binary_search(execution_policy &exec, + ForwardIterator begin, + ForwardIterator end, + const T& value, + StrictWeakOrdering comp) +{ + // omp prefers generic::binary_search to cpp::binary_search + return thrust::system::detail::generic::binary_search(exec, begin, end, value, comp); +} + + +} // end detail +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/detail/copy.h b/compat/thrust/system/omp/detail/copy.h new file mode 100644 index 0000000..b23ac18 --- /dev/null +++ b/compat/thrust/system/omp/detail/copy.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template +OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template +OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/copy.inl b/compat/thrust/system/omp/detail/copy.inl new file mode 100644 index 0000000..915ff92 --- /dev/null +++ b/compat/thrust/system/omp/detail/copy.inl @@ -0,0 +1,147 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ +namespace dispatch +{ + +template + OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + thrust::incrementable_traversal_tag) +{ + return thrust::system::cpp::detail::copy(exec, first, last, result); +} // end copy() + + +template + OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + thrust::random_access_traversal_tag) +{ + // XXX WAR problems reconciling unrelated types such as omp & tbb + // reinterpret iterators as the policy we were passed + // this ensures that generic::copy's implementation, which eventually results in + // zip_iterator works correctly + thrust::detail::tagged_iterator retagged_result(result); + + return thrust::system::detail::generic::copy(exec, thrust::reinterpret_tag(first), thrust::reinterpret_tag(last), retagged_result).base(); +} // end copy() + + +template + OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result, + thrust::incrementable_traversal_tag) +{ + return thrust::system::cpp::detail::copy_n(exec, first, n, result); +} // end copy_n() + + +template + OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result, + thrust::random_access_traversal_tag) +{ + // XXX WAR problems reconciling unrelated types such as omp & tbb + // reinterpret iterators as the policy we were passed + // this ensures that generic::copy's implementation, which eventually results in + // zip_iterator works correctly + thrust::detail::tagged_iterator retagged_result(result); + + return thrust::system::detail::generic::copy_n(exec, thrust::reinterpret_tag(first), n, retagged_result).base(); +} // end copy_n() + +} // end dispatch + + +template +OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + typedef typename thrust::iterator_traversal::type traversal1; + typedef typename thrust::iterator_traversal::type traversal2; + + typedef typename thrust::detail::minimum_type::type traversal; + + // dispatch on minimum traversal + return thrust::system::omp::detail::dispatch::copy(exec, first,last,result,traversal()); +} // end copy() + + + +template +OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result) +{ + typedef typename thrust::iterator_traversal::type traversal1; + typedef typename thrust::iterator_traversal::type traversal2; + + typedef typename thrust::detail::minimum_type::type traversal; + + // dispatch on minimum traversal + return thrust::system::omp::detail::dispatch::copy_n(exec,first,n,result,traversal()); +} // end copy_n() + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/copy_if.h b/compat/thrust/system/omp/detail/copy_if.h new file mode 100644 index 0000000..46754a9 --- /dev/null +++ b/compat/thrust/system/omp/detail/copy_if.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + OutputIterator copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +} // end detail +} // end omp +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/omp/detail/copy_if.inl b/compat/thrust/system/omp/detail/copy_if.inl new file mode 100644 index 0000000..1af6a21 --- /dev/null +++ b/compat/thrust/system/omp/detail/copy_if.inl @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + OutputIterator copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + // omp prefers generic::copy_if to cpp::copy_if + return thrust::system::detail::generic::copy_if(exec, first, last, stencil, result, pred); +} // end copy_if() + + +} // end detail +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/detail/count.h b/compat/thrust/system/omp/detail/count.h new file mode 100644 index 0000000..da31ee8 --- /dev/null +++ b/compat/thrust/system/omp/detail/count.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits count +#include + diff --git a/compat/thrust/system/omp/detail/default_decomposition.h b/compat/thrust/system/omp/detail/default_decomposition.h new file mode 100644 index 0000000..f1904c2 --- /dev/null +++ b/compat/thrust/system/omp/detail/default_decomposition.h @@ -0,0 +1,45 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file default_decomposition.h + * \brief Return a decomposition that is appropriate for the OpenMP backend. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n); + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/default_decomposition.inl b/compat/thrust/system/omp/detail/default_decomposition.inl new file mode 100644 index 0000000..366b4f5 --- /dev/null +++ b/compat/thrust/system/omp/detail/default_decomposition.inl @@ -0,0 +1,56 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +// don't attempt to #include this file without omp support +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) +#include +#endif // omp support + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n) +{ + // we're attempting to launch an omp kernel, assert we're compiling with omp support + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to OpenMP support in your compiler. X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) + return thrust::system::detail::internal::uniform_decomposition(n, 1, omp_get_num_procs()); +#else + return thrust::system::detail::internal::uniform_decomposition(n, 1, 1); +#endif +} + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/equal.h b/compat/thrust/system/omp/detail/equal.h new file mode 100644 index 0000000..74e5518 --- /dev/null +++ b/compat/thrust/system/omp/detail/equal.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits equal +#include + diff --git a/compat/thrust/system/omp/detail/execution_policy.h b/compat/thrust/system/omp/detail/execution_policy.h new file mode 100644 index 0000000..1b06224 --- /dev/null +++ b/compat/thrust/system/omp/detail/execution_policy.h @@ -0,0 +1,110 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +// put the canonical tag in the same ns as the backend's entry points +namespace omp +{ +namespace detail +{ + +// this awkward sequence of definitions arise +// from the desire both for tag to derive +// from execution_policy and for execution_policy +// to convert to tag (when execution_policy is not +// an ancestor of tag) + +// forward declaration of tag +struct tag; + +// forward declaration of execution_policy +template struct execution_policy; + +// specialize execution_policy for tag +template<> + struct execution_policy + : thrust::system::cpp::detail::execution_policy +{}; + +// tag's definition comes before the +// generic definition of execution_policy +struct tag : execution_policy {}; + +// allow conversion to tag when it is not a successor +template + struct execution_policy + : thrust::system::cpp::detail::execution_policy +{ + // allow conversion to tag + inline operator tag () const + { + return tag(); + } +}; + + +// overloads of select_system + +// XXX select_system(tbb, omp) & select_system(omp, tbb) are ambiguous +// because both convert to cpp without these overloads, which we +// arbitrarily define in the omp backend + +template +inline __host__ __device__ + System1 select_system(execution_policy s, thrust::system::tbb::detail::execution_policy) +{ + return thrust::detail::derived_cast(s); +} // end select_system() + + +template +inline __host__ __device__ + System2 select_system(thrust::system::tbb::detail::execution_policy, execution_policy s) +{ + return thrust::detail::derived_cast(s); +} // end select_system() + + +} // end detail + +// alias execution_policy and tag here +using thrust::system::omp::detail::execution_policy; +using thrust::system::omp::detail::tag; + +} // end omp +} // end system + +// alias items at top-level +namespace omp +{ + +using thrust::system::omp::execution_policy; +using thrust::system::omp::tag; + +} // end omp +} // end thrust + diff --git a/compat/thrust/system/omp/detail/extrema.h b/compat/thrust/system/omp/detail/extrema.h new file mode 100644 index 0000000..fb96770 --- /dev/null +++ b/compat/thrust/system/omp/detail/extrema.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +ForwardIterator max_element(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // omp prefers generic::max_element to cpp::max_element + return thrust::system::detail::generic::max_element(exec, first, last, comp); +} // end max_element() + +template +ForwardIterator min_element(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // omp prefers generic::min_element to cpp::min_element + return thrust::system::detail::generic::min_element(exec, first, last, comp); +} // end min_element() + +template +thrust::pair minmax_element(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // omp prefers generic::minmax_element to cpp::minmax_element + return thrust::system::detail::generic::minmax_element(exec, first, last, comp); +} // end minmax_element() + +} // end detail +} // end omp +} // end system +} // end thrust + + diff --git a/compat/thrust/system/omp/detail/fill.h b/compat/thrust/system/omp/detail/fill.h new file mode 100644 index 0000000..5219e1c --- /dev/null +++ b/compat/thrust/system/omp/detail/fill.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits fill +#include + diff --git a/compat/thrust/system/omp/detail/find.h b/compat/thrust/system/omp/detail/find.h new file mode 100644 index 0000000..a8dca5a --- /dev/null +++ b/compat/thrust/system/omp/detail/find.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file find.h + * \brief OpenMP implementation of find_if. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +InputIterator find_if(execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + // omp prefers generic::find_if to cpp::find_if + return thrust::system::detail::generic::find_if(exec, first, last, pred); +} + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/for_each.h b/compat/thrust/system/omp/detail/for_each.h new file mode 100644 index 0000000..1030623 --- /dev/null +++ b/compat/thrust/system/omp/detail/for_each.h @@ -0,0 +1,60 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.h + * \brief Defines the interface for a function that executes a + * function or functional for each value in a given range. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template + RandomAccessIterator for_each(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + UnaryFunction f); + +template + RandomAccessIterator for_each_n(execution_policy &exec, + RandomAccessIterator first, + Size n, + UnaryFunction f); + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/for_each.inl b/compat/thrust/system/omp/detail/for_each.inl new file mode 100644 index 0000000..c6ab827 --- /dev/null +++ b/compat/thrust/system/omp/detail/for_each.inl @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file for_each.inl + * \brief Inline file for for_each.h. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +RandomAccessIterator for_each_n(execution_policy &, + RandomAccessIterator first, + Size n, + UnaryFunction f) +{ + // we're attempting to launch an omp kernel, assert we're compiling with omp support + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to enable OpenMP support in your compiler. X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + + if (n <= 0) return first; //empty range + + // create a wrapped function for f + typedef typename thrust::iterator_reference::type reference; + thrust::detail::host_function wrapped_f(f); + +// do not attempt to compile the body of this function, which depends on #pragma omp, +// without support from the compiler +// XXX implement the body of this function in another file to eliminate this ugliness +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) + // use a signed type for the iteration variable or suffer the consequences of warnings + typedef typename thrust::iterator_difference::type DifferenceType; + DifferenceType signed_n = n; +#pragma omp parallel for + for(DifferenceType i = 0; + i < signed_n; + ++i) + { + RandomAccessIterator temp = first + i; + wrapped_f(*temp); + } +#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE + + return first + n; +} // end for_each_n() + +template + RandomAccessIterator for_each(execution_policy &s, + RandomAccessIterator first, + RandomAccessIterator last, + UnaryFunction f) +{ + return omp::detail::for_each_n(s, first, thrust::distance(first,last), f); +} // end for_each() + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/gather.h b/compat/thrust/system/omp/detail/gather.h new file mode 100644 index 0000000..dfb7d7f --- /dev/null +++ b/compat/thrust/system/omp/detail/gather.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits gather +#include + diff --git a/compat/thrust/system/omp/detail/generate.h b/compat/thrust/system/omp/detail/generate.h new file mode 100644 index 0000000..0cb33b9 --- /dev/null +++ b/compat/thrust/system/omp/detail/generate.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits generate +#include + diff --git a/compat/thrust/system/omp/detail/get_value.h b/compat/thrust/system/omp/detail/get_value.h new file mode 100644 index 0000000..e376e65 --- /dev/null +++ b/compat/thrust/system/omp/detail/get_value.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits get_value +#include + diff --git a/compat/thrust/system/omp/detail/inner_product.h b/compat/thrust/system/omp/detail/inner_product.h new file mode 100644 index 0000000..351421a --- /dev/null +++ b/compat/thrust/system/omp/detail/inner_product.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits inner_product +#include + diff --git a/compat/thrust/system/omp/detail/iter_swap.h b/compat/thrust/system/omp/detail/iter_swap.h new file mode 100644 index 0000000..16176ec --- /dev/null +++ b/compat/thrust/system/omp/detail/iter_swap.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits iter_swap +#include + diff --git a/compat/thrust/system/omp/detail/logical.h b/compat/thrust/system/omp/detail/logical.h new file mode 100644 index 0000000..b2a80de --- /dev/null +++ b/compat/thrust/system/omp/detail/logical.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits logical +#include + diff --git a/compat/thrust/system/omp/detail/malloc_and_free.h b/compat/thrust/system/omp/detail/malloc_and_free.h new file mode 100644 index 0000000..811a552 --- /dev/null +++ b/compat/thrust/system/omp/detail/malloc_and_free.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits malloc and free +#include + diff --git a/compat/thrust/system/omp/detail/memory.inl b/compat/thrust/system/omp/detail/memory.inl new file mode 100644 index 0000000..7d53de6 --- /dev/null +++ b/compat/thrust/system/omp/detail/memory.inl @@ -0,0 +1,110 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ + + +template + template + reference & + reference + ::operator=(const reference &other) +{ + return super_t::operator=(other); +} // end reference::operator=() + +template + reference & + reference + ::operator=(const value_type &x) +{ + return super_t::operator=(x); +} // end reference::operator=() + +template +__host__ __device__ +void swap(reference a, reference b) +{ + a.swap(b); +} // end swap() + +namespace detail +{ + +// XXX circular #inclusion problems cause the compiler to believe that cpp::malloc +// is not defined +// WAR the problem by using adl to call cpp::malloc, which requires it to depend +// on a template parameter +template + pointer malloc_workaround(Tag t, std::size_t n) +{ + return pointer(malloc(t, n)); +} // end malloc_workaround() + +// XXX circular #inclusion problems cause the compiler to believe that cpp::free +// is not defined +// WAR the problem by using adl to call cpp::free, which requires it to depend +// on a template parameter +template + void free_workaround(Tag t, pointer ptr) +{ + free(t, ptr.get()); +} // end free_workaround() + +} // end detail + +inline pointer malloc(std::size_t n) +{ + // XXX this is how we'd like to implement this function, + // if not for circular #inclusion problems: + // + // return pointer(thrust::system::cpp::malloc(n)) + // + return detail::malloc_workaround(cpp::tag(), n); +} // end malloc() + +template +pointer malloc(std::size_t n) +{ + pointer raw_ptr = thrust::system::omp::malloc(sizeof(T) * n); + return pointer(reinterpret_cast(raw_ptr.get())); +} // end malloc() + +inline void free(pointer ptr) +{ + // XXX this is how we'd like to implement this function, + // if not for circular #inclusion problems: + // + // thrust::system::cpp::free(ptr) + // + detail::free_workaround(cpp::tag(), ptr); +} // end free() + +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/detail/merge.h b/compat/thrust/system/omp/detail/merge.h new file mode 100644 index 0000000..a7047aa --- /dev/null +++ b/compat/thrust/system/omp/detail/merge.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits merge +#include + diff --git a/compat/thrust/system/omp/detail/mismatch.h b/compat/thrust/system/omp/detail/mismatch.h new file mode 100644 index 0000000..03980cf --- /dev/null +++ b/compat/thrust/system/omp/detail/mismatch.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits mismatch +#include + diff --git a/compat/thrust/system/omp/detail/par.h b/compat/thrust/system/omp/detail/par.h new file mode 100644 index 0000000..fa6d18e --- /dev/null +++ b/compat/thrust/system/omp/detail/par.h @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +struct par_t : thrust::system::omp::detail::execution_policy +{ + par_t() : thrust::system::omp::detail::execution_policy() {} + + template + thrust::detail::execute_with_allocator + operator()(Allocator &alloc) const + { + return thrust::detail::execute_with_allocator(alloc); + } +}; + + +} // end detail + + +static const detail::par_t par; + + +} // end omp +} // end system + + +// alias par here +namespace omp +{ + + +using thrust::system::omp::par; + + +} // end omp +} // end thrust + diff --git a/compat/thrust/system/omp/detail/partition.h b/compat/thrust/system/omp/detail/partition.h new file mode 100644 index 0000000..edcbc30 --- /dev/null +++ b/compat/thrust/system/omp/detail/partition.h @@ -0,0 +1,91 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief OpenMP implementation of reduce algorithms. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/partition.inl b/compat/thrust/system/omp/detail/partition.inl new file mode 100644 index 0000000..da629e5 --- /dev/null +++ b/compat/thrust/system/omp/detail/partition.inl @@ -0,0 +1,108 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief OpenMP implementation of reduce algorithms. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + // omp prefers generic::stable_partition to cpp::stable_partition + return thrust::system::detail::generic::stable_partition(exec, first, last, pred); +} // end stable_partition() + + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + // omp prefers generic::stable_partition to cpp::stable_partition + return thrust::system::detail::generic::stable_partition(exec, first, last, stencil, pred); +} // end stable_partition() + + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + // omp prefers generic::stable_partition_copy to cpp::stable_partition_copy + return thrust::system::detail::generic::stable_partition_copy(exec, first, last, out_true, out_false, pred); +} // end stable_partition_copy() + + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + // omp prefers generic::stable_partition_copy to cpp::stable_partition_copy + return thrust::system::detail::generic::stable_partition_copy(exec, first, last, stencil, out_true, out_false, pred); +} // end stable_partition_copy() + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/reduce.h b/compat/thrust/system/omp/detail/reduce.h new file mode 100644 index 0000000..0cc5ceb --- /dev/null +++ b/compat/thrust/system/omp/detail/reduce.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief OpenMP implementation of reduce algorithms. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + OutputType reduce(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputType init, + BinaryFunction binary_op); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/reduce.inl b/compat/thrust/system/omp/detail/reduce.inl new file mode 100644 index 0000000..1347bfd --- /dev/null +++ b/compat/thrust/system/omp/detail/reduce.inl @@ -0,0 +1,72 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + OutputType reduce(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputType init, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_difference::type difference_type; + + const difference_type n = thrust::distance(first,last); + + // determine first and second level decomposition + thrust::system::detail::internal::uniform_decomposition decomp1 = thrust::system::omp::detail::default_decomposition(n); + thrust::system::detail::internal::uniform_decomposition decomp2(decomp1.size() + 1, 1, 1); + + // allocate storage for the initializer and partial sums + // XXX use select_system for Tag + thrust::detail::temporary_array partial_sums(exec, decomp1.size() + 1); + + // set first element of temp array to init + partial_sums[0] = init; + + // accumulate partial sums (first level reduction) + thrust::system::omp::detail::reduce_intervals(exec, first, partial_sums.begin() + 1, binary_op, decomp1); + + // reduce partial sums (second level reduction) + thrust::system::omp::detail::reduce_intervals(exec, partial_sums.begin(), partial_sums.begin(), binary_op, decomp2); + + return partial_sums[0]; +} // end reduce() + + +} // end detail +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/detail/reduce_by_key.h b/compat/thrust/system/omp/detail/reduce_by_key.h new file mode 100644 index 0000000..d7243ee --- /dev/null +++ b/compat/thrust/system/omp/detail/reduce_by_key.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief OpenMP implementation of reduce algorithms. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + thrust::pair + reduce_by_key(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/reduce_by_key.inl b/compat/thrust/system/omp/detail/reduce_by_key.inl new file mode 100644 index 0000000..91402d8 --- /dev/null +++ b/compat/thrust/system/omp/detail/reduce_by_key.inl @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template + thrust::pair + reduce_by_key(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + // omp prefers generic::reduce_by_key to cpp::reduce_by_key + return thrust::system::detail::generic::reduce_by_key(exec, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); +} // end reduce_by_key() + + +} // end detail +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/detail/reduce_intervals.h b/compat/thrust/system/omp/detail/reduce_intervals.h new file mode 100644 index 0000000..7bce207 --- /dev/null +++ b/compat/thrust/system/omp/detail/reduce_intervals.h @@ -0,0 +1,53 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce_intervals.h + * \brief OpenMP implementations of reduce_intervals algorithms. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +void reduce_intervals(execution_policy &exec, + InputIterator input, + OutputIterator output, + BinaryFunction binary_op, + Decomposition decomp); + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/reduce_intervals.inl b/compat/thrust/system/omp/detail/reduce_intervals.inl new file mode 100644 index 0000000..0752b8a --- /dev/null +++ b/compat/thrust/system/omp/detail/reduce_intervals.inl @@ -0,0 +1,93 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +void reduce_intervals(execution_policy &, + InputIterator input, + OutputIterator output, + BinaryFunction binary_op, + Decomposition decomp) +{ + // we're attempting to launch an omp kernel, assert we're compiling with omp support + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to enable OpenMP support in your compiler. X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) + typedef typename thrust::iterator_value::type OutputType; + + // wrap binary_op + thrust::detail::host_function wrapped_binary_op(binary_op); + + typedef thrust::detail::intptr_t index_type; + + index_type n = static_cast(decomp.size()); + +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) +# pragma omp parallel for +#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE + for(index_type i = 0; i < n; i++) + { + InputIterator begin = input + decomp[i].begin(); + InputIterator end = input + decomp[i].end(); + + if (begin != end) + { + OutputType sum = thrust::raw_reference_cast(*begin); + + ++begin; + + while (begin != end) + { + sum = wrapped_binary_op(sum, *begin); + ++begin; + } + + OutputIterator tmp = output + i; + *tmp = sum; + } + } +#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE +} + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/remove.h b/compat/thrust/system/omp/detail/remove.h new file mode 100644 index 0000000..ebcb496 --- /dev/null +++ b/compat/thrust/system/omp/detail/remove.h @@ -0,0 +1,81 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/remove.inl b/compat/thrust/system/omp/detail/remove.inl new file mode 100644 index 0000000..c056f96 --- /dev/null +++ b/compat/thrust/system/omp/detail/remove.inl @@ -0,0 +1,94 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + // omp prefers generic::remove_if to cpp::remove_if + return thrust::system::detail::generic::remove_if(exec, first, last, pred); +} + + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + // omp prefers generic::remove_if to cpp::remove_if + return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred); +} + + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + // omp prefers generic::remove_copy_if to cpp::remove_copy_if + return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred); +} + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + // omp prefers generic::remove_copy_if to cpp::remove_copy_if + return thrust::system::detail::generic::remove_copy_if(exec, first, last, stencil, result, pred); +} + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/replace.h b/compat/thrust/system/omp/detail/replace.h new file mode 100644 index 0000000..c48555d --- /dev/null +++ b/compat/thrust/system/omp/detail/replace.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits this algorithm +#include + diff --git a/compat/thrust/system/omp/detail/reverse.h b/compat/thrust/system/omp/detail/reverse.h new file mode 100644 index 0000000..04923d1 --- /dev/null +++ b/compat/thrust/system/omp/detail/reverse.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits reverse +#include + diff --git a/compat/thrust/system/omp/detail/scan.h b/compat/thrust/system/omp/detail/scan.h new file mode 100644 index 0000000..c105951 --- /dev/null +++ b/compat/thrust/system/omp/detail/scan.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits scan +#include + diff --git a/compat/thrust/system/omp/detail/scan_by_key.h b/compat/thrust/system/omp/detail/scan_by_key.h new file mode 100644 index 0000000..bfbd5d6 --- /dev/null +++ b/compat/thrust/system/omp/detail/scan_by_key.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits this algorithm +#include + diff --git a/compat/thrust/system/omp/detail/scatter.h b/compat/thrust/system/omp/detail/scatter.h new file mode 100644 index 0000000..c48555d --- /dev/null +++ b/compat/thrust/system/omp/detail/scatter.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits this algorithm +#include + diff --git a/compat/thrust/system/omp/detail/sequence.h b/compat/thrust/system/omp/detail/sequence.h new file mode 100644 index 0000000..811d8f5 --- /dev/null +++ b/compat/thrust/system/omp/detail/sequence.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits sequence +#include + diff --git a/compat/thrust/system/omp/detail/set_operations.h b/compat/thrust/system/omp/detail/set_operations.h new file mode 100644 index 0000000..687edb2 --- /dev/null +++ b/compat/thrust/system/omp/detail/set_operations.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits set_operations +#include + diff --git a/compat/thrust/system/omp/detail/sort.h b/compat/thrust/system/omp/detail/sort.h new file mode 100644 index 0000000..9a480f2 --- /dev/null +++ b/compat/thrust/system/omp/detail/sort.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template +void stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + +template +void stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/sort.inl b/compat/thrust/system/omp/detail/sort.inl new file mode 100644 index 0000000..ab4f4a1 --- /dev/null +++ b/compat/thrust/system/omp/detail/sort.inl @@ -0,0 +1,249 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +// don't attempt to #include this file without omp support +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) +#include +#endif // omp support + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ +namespace sort_detail +{ + + +template +void inplace_merge(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator middle, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_value::type value_type; + + thrust::detail::temporary_array a(exec, first, middle); + thrust::detail::temporary_array b(exec, middle, last); + + thrust::system::cpp::detail::merge(exec, a.begin(), a.end(), b.begin(), b.end(), first, comp); +} + + +template +void inplace_merge_by_key(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 middle1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_value::type value_type1; + typedef typename thrust::iterator_value::type value_type2; + + RandomAccessIterator2 middle2 = first2 + (middle1 - first1); + RandomAccessIterator2 last2 = first2 + (last1 - first1); + + thrust::detail::temporary_array lhs1(exec, first1, middle1); + thrust::detail::temporary_array rhs1(exec, middle1, last1); + thrust::detail::temporary_array lhs2(exec, first2, middle2); + thrust::detail::temporary_array rhs2(exec, middle2, last2); + + thrust::system::cpp::detail::merge_by_key + (exec, + lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(), + lhs2.begin(), rhs2.begin(), + first1, first2, comp); +} + + +} // end sort_detail + + +template +void stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + // we're attempting to launch an omp kernel, assert we're compiling with omp support + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to enable OpenMP support in your compiler. X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) + typedef typename thrust::iterator_difference::type IndexType; + + if (first == last) + return; + + #pragma omp parallel + { + thrust::system::detail::internal::uniform_decomposition decomp(last - first, 1, omp_get_num_threads()); + + // process id + IndexType p_i = omp_get_thread_num(); + + // every thread sorts its own tile + if (p_i < decomp.size()) + { + thrust::system::cpp::detail::stable_sort(exec, + first + decomp[p_i].begin(), + first + decomp[p_i].end(), + comp); + } + + #pragma omp barrier + + IndexType nseg = decomp.size(); + IndexType h = 2; + + // keep track of which sub-range we're processing + IndexType a=p_i, b=p_i, c=p_i+1; + + while( nseg>1 ) + { + if(c >= decomp.size()) + c = decomp.size() - 1; + + if((p_i % h) == 0 && c > b) + { + thrust::system::omp::detail::sort_detail::inplace_merge + (exec, + first + decomp[a].begin(), + first + decomp[b].end(), + first + decomp[c].end(), + comp); + b = c; + c += h; + } + + nseg = (nseg + 1) / 2; + h *= 2; + + #pragma omp barrier + } + } +#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE +} + + +template +void stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp) +{ + // we're attempting to launch an omp kernel, assert we're compiling with omp support + // ======================================================================== + // X Note to the user: If you've found this line due to a compiler error, X + // X you need to enable OpenMP support in your compiler. X + // ======================================================================== + THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); + +#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) + typedef typename thrust::iterator_difference::type IndexType; + + if (keys_first == keys_last) + return; + + #pragma omp parallel + { + thrust::system::detail::internal::uniform_decomposition decomp(keys_last - keys_first, 1, omp_get_num_threads()); + + // process id + IndexType p_i = omp_get_thread_num(); + + // every thread sorts its own tile + if (p_i < decomp.size()) + { + thrust::system::cpp::detail::stable_sort_by_key(exec, + keys_first + decomp[p_i].begin(), + keys_first + decomp[p_i].end(), + values_first + decomp[p_i].begin(), + comp); + } + + #pragma omp barrier + + IndexType nseg = decomp.size(); + IndexType h = 2; + + // keep track of which sub-range we're processing + IndexType a=p_i, b=p_i, c=p_i+1; + + while( nseg>1 ) + { + if(c >= decomp.size()) + c = decomp.size() - 1; + + if((p_i % h) == 0 && c > b) + { + thrust::system::omp::detail::sort_detail::inplace_merge_by_key + (exec, + keys_first + decomp[a].begin(), + keys_first + decomp[b].end(), + keys_first + decomp[c].end(), + values_first + decomp[a].begin(), + comp); + b = c; + c += h; + } + + nseg = (nseg + 1) / 2; + h *= 2; + + #pragma omp barrier + } + } +#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE +} + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/swap_ranges.h b/compat/thrust/system/omp/detail/swap_ranges.h new file mode 100644 index 0000000..e683aaa --- /dev/null +++ b/compat/thrust/system/omp/detail/swap_ranges.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// omp inherits swap_ranges +#include + diff --git a/compat/thrust/system/omp/detail/tabulate.h b/compat/thrust/system/omp/detail/tabulate.h new file mode 100644 index 0000000..da65d8e --- /dev/null +++ b/compat/thrust/system/omp/detail/tabulate.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits tabulate +#include + diff --git a/compat/thrust/system/omp/detail/temporary_buffer.h b/compat/thrust/system/omp/detail/temporary_buffer.h new file mode 100644 index 0000000..628bd75 --- /dev/null +++ b/compat/thrust/system/omp/detail/temporary_buffer.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special temporary buffer functions + diff --git a/compat/thrust/system/omp/detail/transform.h b/compat/thrust/system/omp/detail/transform.h new file mode 100644 index 0000000..70ce1f4 --- /dev/null +++ b/compat/thrust/system/omp/detail/transform.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// omp inherits transform +#include + diff --git a/compat/thrust/system/omp/detail/transform_reduce.h b/compat/thrust/system/omp/detail/transform_reduce.h new file mode 100644 index 0000000..23ed070 --- /dev/null +++ b/compat/thrust/system/omp/detail/transform_reduce.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits transform_reduce +#include + diff --git a/compat/thrust/system/omp/detail/transform_scan.h b/compat/thrust/system/omp/detail/transform_scan.h new file mode 100644 index 0000000..fc2e55d --- /dev/null +++ b/compat/thrust/system/omp/detail/transform_scan.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits transform_scan +#include + diff --git a/compat/thrust/system/omp/detail/uninitialized_copy.h b/compat/thrust/system/omp/detail/uninitialized_copy.h new file mode 100644 index 0000000..944f4ba --- /dev/null +++ b/compat/thrust/system/omp/detail/uninitialized_copy.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits uninitialized_copy +#include + diff --git a/compat/thrust/system/omp/detail/uninitialized_fill.h b/compat/thrust/system/omp/detail/uninitialized_fill.h new file mode 100644 index 0000000..b9d6de2 --- /dev/null +++ b/compat/thrust/system/omp/detail/uninitialized_fill.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits uninitialized_fill +#include + diff --git a/compat/thrust/system/omp/detail/unique.h b/compat/thrust/system/omp/detail/unique.h new file mode 100644 index 0000000..60c617b --- /dev/null +++ b/compat/thrust/system/omp/detail/unique.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + ForwardIterator unique(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred); + + +template + OutputIterator unique_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/unique.inl b/compat/thrust/system/omp/detail/unique.inl new file mode 100644 index 0000000..d66ac3b --- /dev/null +++ b/compat/thrust/system/omp/detail/unique.inl @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + ForwardIterator unique(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + // omp prefers generic::unique to cpp::unique + return thrust::system::detail::generic::unique(exec,first,last,binary_pred); +} // end unique() + + +template + OutputIterator unique_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + // omp prefers generic::unique_copy to cpp::unique_copy + return thrust::system::detail::generic::unique_copy(exec,first,last,output,binary_pred); +} // end unique_copy() + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/unique_by_key.h b/compat/thrust/system/omp/detail/unique_by_key.h new file mode 100644 index 0000000..8fdde66 --- /dev/null +++ b/compat/thrust/system/omp/detail/unique_by_key.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + thrust::pair + unique_by_key(execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred); + + +template + thrust::pair + unique_by_key_copy(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/omp/detail/unique_by_key.inl b/compat/thrust/system/omp/detail/unique_by_key.inl new file mode 100644 index 0000000..644b5ed --- /dev/null +++ b/compat/thrust/system/omp/detail/unique_by_key.inl @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + + +template + thrust::pair + unique_by_key(execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + // omp prefers generic::unique_by_key to cpp::unique_by_key + return thrust::system::detail::generic::unique_by_key(exec,keys_first,keys_last,values_first,binary_pred); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key_copy(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + // omp prefers generic::unique_by_key_copy to cpp::unique_by_key_copy + return thrust::system::detail::generic::unique_by_key_copy(exec,keys_first,keys_last,values_first,keys_output,values_output,binary_pred); +} // end unique_by_key_copy() + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/omp/detail/vector.inl b/compat/thrust/system/omp/detail/vector.inl new file mode 100644 index 0000000..32c845c --- /dev/null +++ b/compat/thrust/system/omp/detail/vector.inl @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ + +template + vector + ::vector() + : super_t() +{} + +template + vector + ::vector(size_type n) + : super_t(n) +{} + +template + vector + ::vector(size_type n, const value_type &value) + : super_t(n,value) +{} + +template + vector + ::vector(const vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(const thrust::detail::vector_base &x) + : super_t(x) +{} + +template + template + vector + ::vector(const std::vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(InputIterator first, InputIterator last) + : super_t(first,last) +{} + +template + template + vector & + vector + ::operator=(const std::vector &x) +{ + super_t::operator=(x); + return *this; +} + +template + template + vector & + vector + ::operator=(const thrust::detail::vector_base &x) +{ + super_t::operator=(x); + return *this; +} + +} // end omp +} // end system +} // end thrust + diff --git a/compat/thrust/system/omp/execution_policy.h b/compat/thrust/system/omp/execution_policy.h new file mode 100644 index 0000000..7d5d1d8 --- /dev/null +++ b/compat/thrust/system/omp/execution_policy.h @@ -0,0 +1,156 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +/*! \file thrust/system/omp/execution_policy.h + * \brief Execution policies for Thrust's OpenMP system. + */ + +#include + +// get the execution policies definitions first +#include + +// get the definition of par +#include + +// now get all the algorithm definitions + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// define these entities here for the purpose of Doxygenating them +// they are actually defined elsewhere +#if 0 +namespace thrust +{ +namespace system +{ +namespace omp +{ + + +/*! \addtogroup execution_policies + * \{ + */ + + +/*! \p thrust::omp::execution_policy is the base class for all Thrust parallel execution + * policies which are derived from Thrust's OpenMP backend system. + */ +template +struct execution_policy : thrust::execution_policy +{}; + + +/*! \p omp::tag is a type representing Thrust's standard C++ backend system in C++'s type system. + * Iterators "tagged" with a type which is convertible to \p omp::tag assert that they may be + * "dispatched" to algorithm implementations in the \p omp system. + */ +struct tag : thrust::system::omp::execution_policy { unspecified }; + + +/*! \p thrust::omp::par is the parallel execution policy associated with Thrust's OpenMP + * backend system. + * + * Instead of relying on implicit algorithm dispatch through iterator system tags, users may + * directly target Thrust's OpenMP backend system by providing \p thrust::omp::par as an algorithm + * parameter. + * + * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such + * as \p thrust::omp::vector. + * + * The type of \p thrust::omp::par is implementation-defined. + * + * The following code snippet demonstrates how to use \p thrust::omp::par to explicitly dispatch an + * invocation of \p thrust::for_each to the OpenMP backend system: + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * printf("%d\n"); + * } + * }; + * ... + * int vec[3]; + * vec[0] = 0; vec[1] = 1; vec[2] = 2; + * + * thrust::for_each(thrust::omp::par, vec.begin(), vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + */ +static const unspecified par; + + +/*! \} + */ + + +} // end cpp +} // end system +} // end thrust +#endif + + diff --git a/compat/thrust/system/omp/memory.h b/compat/thrust/system/omp/memory.h new file mode 100644 index 0000000..0a23434 --- /dev/null +++ b/compat/thrust/system/omp/memory.h @@ -0,0 +1,414 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/omp/memory.h + * \brief Managing memory associated with Thrust's OpenMP system. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ + +template class pointer; + +} // end omp +} // end system +} // end thrust + + +/*! \cond + */ + +// specialize std::iterator_traits to avoid problems with the name of +// pointer's constructor shadowing its nested pointer type +// do this before pointer is defined so the specialization is correctly +// used inside the definition +namespace std +{ + +template + struct iterator_traits > +{ + private: + typedef thrust::system::omp::pointer ptr; + + public: + typedef typename ptr::iterator_category iterator_category; + typedef typename ptr::value_type value_type; + typedef typename ptr::difference_type difference_type; + typedef ptr pointer; + typedef typename ptr::reference reference; +}; // end iterator_traits + +} // end std + +/*! \endcond + */ + + +namespace thrust +{ +namespace system +{ + +/*! \addtogroup system_backends Systems + * \ingroup system + * \{ + */ + +/*! \namespace thrust::system::omp + * \brief \p thrust::system::omp is the namespace containing functionality for allocating, manipulating, + * and deallocating memory available to Thrust's OpenMP backend system. + * The identifiers are provided in a separate namespace underneath thrust::system + * for import convenience but are also aliased in the top-level thrust::omp + * namespace for easy access. + * + */ +namespace omp +{ + +// forward declaration of reference for pointer +template class reference; + +/*! \cond + */ + +// XXX nvcc + msvc have trouble instantiating reference below +// this is a workaround +namespace detail +{ + +template + struct reference_msvc_workaround +{ + typedef thrust::system::omp::reference type; +}; // end reference_msvc_workaround + +} // end detail + +/*! \endcond + */ + + +/*! \p pointer stores a pointer to an object allocated in memory available to the omp system. + * This type provides type safety when dispatching standard algorithms on ranges resident + * in omp memory. + * + * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. + * + * \p pointer can be created with the function \p omp::malloc, or by explicitly calling its constructor + * with a raw pointer. + * + * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function + * or the \p raw_pointer_cast function. + * + * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory + * pointed to by \p pointer. + * + * \tparam T specifies the type of the pointee. + * + * \see omp::malloc + * \see omp::free + * \see raw_pointer_cast + */ +template + class pointer + : public thrust::pointer< + T, + thrust::system::omp::tag, + thrust::system::omp::reference, + thrust::system::omp::pointer + > +{ + /*! \cond + */ + + private: + typedef thrust::pointer< + T, + thrust::system::omp::tag, + //thrust::system::omp::reference, + typename detail::reference_msvc_workaround::type, + thrust::system::omp::pointer + > super_t; + + /*! \endcond + */ + + public: + // note that omp::pointer's member functions need __host__ __device__ + // to interoperate with nvcc + iterators' dereference member function + + /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. + */ + __host__ __device__ + pointer() : super_t() {} + + /*! This constructor allows construction of a pointer from a T*. + * + * \param ptr A raw pointer to copy from, presumed to point to a location in memory + * accessible by the \p omp system. + * \tparam OtherT \p OtherT shall be convertible to \p T. + */ + template + __host__ __device__ + explicit pointer(OtherT *ptr) : super_t(ptr) {} + + /*! This constructor allows construction from another pointer-like object with related type. + * + * \param other The \p OtherPointer to copy. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::omp::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type * = 0) : super_t(other) {} + + /*! Assignment operator allows assigning from another pointer-like object with related type. + * + * \param other The other pointer-like object to assign from. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::omp::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + pointer & + >::type + operator=(const OtherPointer &other) + { + return super_t::operator=(other); + } +}; // end pointer + + +/*! \p reference is a wrapped reference to an object stored in memory available to the \p omp system. + * \p reference is the type of the result of dereferencing a \p omp::pointer. + * + * \tparam T Specifies the type of the referenced object. + */ +template + class reference + : public thrust::reference< + T, + thrust::system::omp::pointer, + thrust::system::omp::reference + > +{ + /*! \cond + */ + + private: + typedef thrust::reference< + T, + thrust::system::omp::pointer, + thrust::system::omp::reference + > super_t; + + /*! \endcond + */ + + public: + /*! \cond + */ + + typedef typename super_t::value_type value_type; + typedef typename super_t::pointer pointer; + + /*! \endcond + */ + + /*! This constructor initializes this \p reference to refer to an object + * pointed to by the given \p pointer. After this \p reference is constructed, + * it shall refer to the object pointed to by \p ptr. + * + * \param ptr A \p pointer to copy from. + */ + __host__ __device__ + explicit reference(const pointer &ptr) + : super_t(ptr) + {} + + /*! This constructor accepts a const reference to another \p reference of related type. + * After this \p reference is constructed, it shall refer to the same object as \p other. + * + * \param other A \p reference to copy from. + * \tparam OtherT The element type of the other \p reference. + * + * \note This constructor is templated primarily to allow initialization of reference + * from reference. + */ + template + __host__ __device__ + reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type * = 0) + : super_t(other) + {} + + /*! Copy assignment operator copy assigns from another \p reference of related type. + * + * \param other The other \p reference to assign from. + * \return *this + * \tparam OtherT The element type of the other \p reference. + */ + template + reference &operator=(const reference &other); + + /*! Assignment operator assigns from a \p value_type. + * + * \param x The \p value_type to assign from. + * \return *this + */ + reference &operator=(const value_type &x); +}; // end reference + +/*! Exchanges the values of two objects referred to by \p reference. + * \p x The first \p reference of interest. + * \p y The second \p reference ot interest. + */ +template +__host__ __device__ +void swap(reference x, reference y); + +/*! Allocates an area of memory available to Thrust's omp system. + * \param n Number of bytes to allocate. + * \return A omp::pointer pointing to the beginning of the newly + * allocated memory. A null omp::pointer is returned if + * an error occurs. + * \note The omp::pointer returned by this function must be + * deallocated with \p omp::free. + * \see omp::free + * \see std::malloc + */ +inline pointer malloc(std::size_t n); + +/*! Allocates a typed area of memory available to Thrust's omp system. + * \param n Number of elements to allocate. + * \return A omp::pointer pointing to the beginning of the newly + * allocated memory. A null omp::pointer is returned if + * an error occurs. + * \note The omp::pointer returned by this function must be + * deallocated with \p omp::free. + * \see omp::free + * \see std::malloc + */ +template +inline pointer malloc(std::size_t n); + +/*! Deallocates an area of memory previously allocated by omp::malloc. + * \param ptr A omp::pointer pointing to the beginning of an area + * of memory previously allocated with omp::malloc. + * \see omp::malloc + * \see std::free + */ +inline void free(pointer ptr); + +// XXX upon c++11 +// template using allocator = thrust::detail::malloc_allocator >; + +/*! \p omp::allocator is the default allocator used by the \p omp system's containers such as + * omp::vector if no user-specified allocator is provided. \p omp::allocator allocates + * (deallocates) storage with \p omp::malloc (\p omp::free). + */ +template + struct allocator + : thrust::detail::malloc_allocator< + T, + tag, + pointer + > +{ + /*! The \p rebind metafunction provides the type of an \p allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p allocator. + */ + typedef allocator other; + }; + + /*! No-argument constructor has no effect. + */ + __host__ __device__ + inline allocator() {} + + /*! Copy constructor has no effect. + */ + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Constructor from other \p allocator has no effect. + */ + template + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Destructor has no effect. + */ + __host__ __device__ + inline ~allocator() {} +}; // end allocator + +} // end omp + +/*! \} + */ + +} // end system + +/*! \namespace thrust::omp + * \brief \p thrust::omp is a top-level alias for thrust::system::omp. + */ +namespace omp +{ + +using thrust::system::omp::pointer; +using thrust::system::omp::reference; +using thrust::system::omp::malloc; +using thrust::system::omp::free; +using thrust::system::omp::allocator; + +} // end omp + +} // end thrust + +#include + diff --git a/compat/thrust/system/omp/vector.h b/compat/thrust/system/omp/vector.h new file mode 100644 index 0000000..5f45a91 --- /dev/null +++ b/compat/thrust/system/omp/vector.h @@ -0,0 +1,149 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/omp/vector.h + * \brief A dynamically-sizable array of elements which reside in memory available to + * Thrust's OpenMP system. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ + +// forward declaration of host_vector +// XXX why is this here? it doesn't seem necessary for anything below +template class host_vector; + +namespace system +{ +namespace omp +{ + +// XXX upon c++11 +// template > using vector = thrust::detail::vector_base; + +/*! \p omp::vector is a container that supports random access to elements, + * constant time removal of elements at the end, and linear time insertion + * and removal of elements at the beginning or in the middle. The number of + * elements in a \p omp::vector may vary dynamically; memory management is + * automatic. The elements contained in an \p omp::vector reside in memory + * available to the \p omp system. + * + * \tparam T The element type of the \p omp::vector. + * \tparam Allocator The allocator type of the \p omp::vector. Defaults to \p omp::allocator. + * + * \see http://www.sgi.com/tech/stl/Vector.html + * \see host_vector For the documentation of the complete interface which is + * shared by \p omp::vector + * \see device_vector + */ +template > + class vector + : public thrust::detail::vector_base +{ + /*! \cond + */ + private: + typedef thrust::detail::vector_base super_t; + /*! \endcond + */ + + public: + + /*! \cond + */ + typedef typename super_t::size_type size_type; + typedef typename super_t::value_type value_type; + /*! \endcond + */ + + /*! This constructor creates an empty \p omp::vector. + */ + vector(); + + /*! This constructor creates a \p omp::vector with \p n default-constructed elements. + * \param n The size of the \p omp::vector to create. + */ + explicit vector(size_type n); + + /*! This constructor creates a \p omp::vector with \p n copies of \p value. + * \param n The size of the \p omp::vector to create. + * \param value An element to copy. + */ + explicit vector(size_type n, const value_type &value); + + /*! Copy constructor copies from another \p omp::vector. + * \param x The other \p omp::vector to copy. + */ + vector(const vector &x); + + /*! This constructor copies from another Thrust vector-like object. + * \param x The other object to copy from. + */ + template + vector(const thrust::detail::vector_base &x); + + /*! This constructor copies from a \c std::vector. + * \param x The \c std::vector to copy from. + */ + template + vector(const std::vector &x); + + /*! This constructor creates an \p omp::vector by copying from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + vector(InputIterator first, InputIterator last); + + // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns + + /*! Assignment operator assigns from a \c std::vector. + * \param x The \c std::vector to assign from. + * \return *this + */ + template + vector &operator=(const std::vector &x); + + /*! Assignment operator assigns from another Thrust vector-like object. + * \param x The other object to assign from. + * \return *this + */ + template + vector &operator=(const thrust::detail::vector_base &x); +}; // end vector + +} // end omp +} // end system + +// alias system::omp names at top-level +namespace omp +{ + +using thrust::system::omp::vector; + +} // end omp + +} // end thrust + +#include + diff --git a/compat/thrust/system/system_error.h b/compat/thrust/system/system_error.h new file mode 100644 index 0000000..6f94b61 --- /dev/null +++ b/compat/thrust/system/system_error.h @@ -0,0 +1,179 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file system/system_error.h + * \brief An exception object used to report error conditions that have an + * associated error code + */ + +#pragma once + +#include +#include +#include + +#include + +namespace thrust +{ + +namespace system +{ + +// [19.5.5] Class system_error + +// [19.5.5.1] Class system_error overview + +/*! \addtogroup system_diagnostics System Diagnostics + * \ingroup system + * \{ + */ + +/*! \brief The class \p system_error describes an exception object used to report error + * conditions that have an associated \p error_code. Such error conditions typically + * originate from the operating system or other low-level application program interfaces. + * + * Thrust uses \p system_error to report the error codes returned from device backends + * such as the CUDA runtime. + * + * The following code listing demonstrates how to catch a \p system_error to recover + * from an error. + * + * \code + * + * #include + * #include + * #include + * + * void terminate_gracefully(void) + * { + * // application-specific termination code here + * ... + * } + * + * int main(void) + * { + * try + * { + * thrust::device_vector vec; + * thrust::sort(vec.begin(), vec.end()); + * } + * catch(thrust::system_error e) + * { + * std::cerr << "Error inside sort: " << e.what() << std::endl; + * terminate_gracefully(); + * } + * + * return 0; + * } + * + * \endcode + * + * \note If an error represents an out-of-memory condition, implementations are encouraged + * to throw an exception object of type \p std::bad_alloc rather than \p system_error. + */ +class system_error + : public std::runtime_error +{ + public: + // [19.5.5.2] Class system_error members + + /*! Constructs an object of class \p system_error. + * \param ec The value returned by \p code(). + * \param what_arg A string to include in the result returned by \p what(). + * \post code() == ec. + * \post std::string(what()).find(what_arg) != string::npos. + */ + inline system_error(error_code ec, const std::string &what_arg); + + /*! Constructs an object of class \p system_error. + * \param ec The value returned by \p code(). + * \param what_arg A string to include in the result returned by \p what(). + * \post code() == ec. + * \post std::string(what()).find(what_arg) != string::npos. + */ + inline system_error(error_code ec, const char *what_arg); + + /*! Constructs an object of class \p system_error. + * \param ec The value returned by \p code(). + * \post code() == ec. + */ + inline system_error(error_code ec); + + /*! Constructs an object of class \p system_error. + * \param ev The error value used to create an \p error_code. + * \param ecat The \p error_category used to create an \p error_code. + * \param what_arg A string to include in the result returned by \p what(). + * \post code() == error_code(ev, ecat). + * \post std::string(what()).find(what_arg) != string::npos. + */ + inline system_error(int ev, const error_category &ecat, const std::string &what_arg); + + /*! Constructs an object of class \p system_error. + * \param ev The error value used to create an \p error_code. + * \param ecat The \p error_category used to create an \p error_code. + * \param what_arg A string to include in the result returned by \p what(). + * \post code() == error_code(ev, ecat). + * \post std::string(what()).find(what_arg) != string::npos. + */ + inline system_error(int ev, const error_category &ecat, const char *what_arg); + + /*! Constructs an object of class \p system_error. + * \param ev The error value used to create an \p error_code. + * \param ecat The \p error_category used to create an \p error_code. + * \post code() == error_code(ev, ecat). + */ + inline system_error(int ev, const error_category &ecat); + + /*! Destructor does not throw. + */ + inline virtual ~system_error(void) throw () {}; + + /*! Returns an object encoding the error. + * \return ec or error_code(ev, ecat), from the + * constructor, as appropriate. + */ + inline const error_code &code(void) const throw(); + + /*! Returns a human-readable string indicating the nature of the error. + * \return a string incorporating code().message() and the + * arguments supplied in the constructor. + */ + inline const char *what(void) const throw(); + + /*! \cond + */ + private: + error_code m_error_code; + mutable std::string m_what; + + /*! \endcond + */ +}; // end system_error + +} // end system + +/*! \} // end system_diagnostics + */ + +// import names into thrust:: +using system::system_error; + +} // end thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/adjacent_difference.h b/compat/thrust/system/tbb/detail/adjacent_difference.h new file mode 100644 index 0000000..37c9adc --- /dev/null +++ b/compat/thrust/system/tbb/detail/adjacent_difference.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template + OutputIterator adjacent_difference(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + // tbb prefers generic::adjacent_difference to cpp::adjacent_difference + return thrust::system::detail::generic::adjacent_difference(exec, first, last, result, binary_op); +} // end adjacent_difference() + +} // end detail +} // end tbb +} // end system +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/assign_value.h b/compat/thrust/system/tbb/detail/assign_value.h new file mode 100644 index 0000000..eda3b97 --- /dev/null +++ b/compat/thrust/system/tbb/detail/assign_value.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits assign_value +#include + diff --git a/compat/thrust/system/tbb/detail/binary_search.h b/compat/thrust/system/tbb/detail/binary_search.h new file mode 100644 index 0000000..8dec989 --- /dev/null +++ b/compat/thrust/system/tbb/detail/binary_search.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits binary_search +#include + diff --git a/compat/thrust/system/tbb/detail/copy.h b/compat/thrust/system/tbb/detail/copy.h new file mode 100644 index 0000000..7604e6f --- /dev/null +++ b/compat/thrust/system/tbb/detail/copy.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template +OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +template +OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/copy.inl b/compat/thrust/system/tbb/detail/copy.inl new file mode 100644 index 0000000..6d354d0 --- /dev/null +++ b/compat/thrust/system/tbb/detail/copy.inl @@ -0,0 +1,134 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace dispatch +{ + +template + OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + thrust::incrementable_traversal_tag) +{ + return thrust::system::cpp::detail::copy(exec, first, last, result); +} // end copy() + + +template + OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + thrust::random_access_traversal_tag) +{ + return thrust::system::detail::generic::copy(exec, first, last, result); +} // end copy() + + +template + OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result, + thrust::incrementable_traversal_tag) +{ + return thrust::system::cpp::detail::copy_n(exec, first, n, result); +} // end copy_n() + + +template + OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result, + thrust::random_access_traversal_tag) +{ + return thrust::system::detail::generic::copy_n(exec, first, n, result); +} // end copy_n() + +} // end dispatch + + +template +OutputIterator copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result) +{ + typedef typename thrust::iterator_traversal::type traversal1; + typedef typename thrust::iterator_traversal::type traversal2; + + typedef typename thrust::detail::minimum_type::type traversal; + + // dispatch on minimum traversal + return thrust::system::tbb::detail::dispatch::copy(exec,first,last,result,traversal()); +} // end copy() + + + +template +OutputIterator copy_n(execution_policy &exec, + InputIterator first, + Size n, + OutputIterator result) +{ + typedef typename thrust::iterator_traversal::type traversal1; + typedef typename thrust::iterator_traversal::type traversal2; + + typedef typename thrust::detail::minimum_type::type traversal; + + // dispatch on minimum traversal + return thrust::system::tbb::detail::dispatch::copy_n(exec,first,n,result,traversal()); +} // end copy_n() + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/copy_if.h b/compat/thrust/system/tbb/detail/copy_if.h new file mode 100644 index 0000000..ffbd4f8 --- /dev/null +++ b/compat/thrust/system/tbb/detail/copy_if.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + OutputIterator copy_if(tag, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +} // end detail +} // end tbb +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/copy_if.inl b/compat/thrust/system/tbb/detail/copy_if.inl new file mode 100644 index 0000000..4353b3b --- /dev/null +++ b/compat/thrust/system/tbb/detail/copy_if.inl @@ -0,0 +1,131 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace copy_if_detail +{ + +template +struct body +{ + + InputIterator1 first; + InputIterator2 stencil; + OutputIterator result; + thrust::detail::host_function pred; + Size sum; + + body(InputIterator1 first, InputIterator2 stencil, OutputIterator result, Predicate pred) + : first(first), stencil(stencil), result(result), pred(pred), sum(0) + {} + + body(body& b, ::tbb::split) + : first(b.first), stencil(b.stencil), result(b.result), pred(b.pred), sum(0) + {} + + void operator()(const ::tbb::blocked_range& r, ::tbb::pre_scan_tag) + { + InputIterator2 iter = stencil + r.begin(); + + for (Size i = r.begin(); i != r.end(); ++i, ++iter) + { + if (pred(*iter)) + ++sum; + } + } + + void operator()(const ::tbb::blocked_range& r, ::tbb::final_scan_tag) + { + InputIterator1 iter1 = first + r.begin(); + InputIterator2 iter2 = stencil + r.begin(); + OutputIterator iter3 = result + sum; + + for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2) + { + if (pred(*iter2)) + { + *iter3 = *iter1; + ++sum; + ++iter3; + } + } + } + + void reverse_join(body& b) + { + sum = b.sum + sum; + } + + void assign(body& b) + { + sum = b.sum; + } +}; // end body + +} // end copy_if_detail + +template + OutputIterator copy_if(tag, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + typedef typename thrust::iterator_difference::type Size; + typedef typename copy_if_detail::body Body; + + Size n = thrust::distance(first, last); + + if (n != 0) + { + Body body(first, stencil, result, pred); + ::tbb::parallel_scan(::tbb::blocked_range(0,n), body); + thrust::advance(result, body.sum); + } + + return result; +} // end copy_if() + +} // end detail +} // end tbb +} // end system +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/count.h b/compat/thrust/system/tbb/detail/count.h new file mode 100644 index 0000000..da31ee8 --- /dev/null +++ b/compat/thrust/system/tbb/detail/count.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits count +#include + diff --git a/compat/thrust/system/tbb/detail/equal.h b/compat/thrust/system/tbb/detail/equal.h new file mode 100644 index 0000000..74e5518 --- /dev/null +++ b/compat/thrust/system/tbb/detail/equal.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits equal +#include + diff --git a/compat/thrust/system/tbb/detail/execution_policy.h b/compat/thrust/system/tbb/detail/execution_policy.h new file mode 100644 index 0000000..167d1dc --- /dev/null +++ b/compat/thrust/system/tbb/detail/execution_policy.h @@ -0,0 +1,86 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +// put the canonical tag in the same ns as the backend's entry points +namespace tbb +{ +namespace detail +{ + +// this awkward sequence of definitions arise +// from the desire both for tag to derive +// from execution_policy and for execution_policy +// to convert to tag (when execution_policy is not +// an ancestor of tag) + +// forward declaration of tag +struct tag; + +// forward declaration of execution_policy +template struct execution_policy; + +// specialize execution_policy for tag +template<> + struct execution_policy + : thrust::system::cpp::detail::execution_policy +{}; + +// tag's definition comes before the +// generic definition of execution_policy +struct tag : execution_policy {}; + +// allow conversion to tag when it is not a successor +template + struct execution_policy + : thrust::system::cpp::detail::execution_policy +{ + // allow conversion to tag + inline operator tag () const + { + return tag(); + } +}; + +} // end detail + +// alias execution_policy and tag here +using thrust::system::tbb::detail::execution_policy; +using thrust::system::tbb::detail::tag; + +} // end tbb +} // end system + +// alias items at top-level +namespace tbb +{ + +using thrust::system::tbb::execution_policy; +using thrust::system::tbb::tag; + +} // end tbb +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/extrema.h b/compat/thrust/system/tbb/detail/extrema.h new file mode 100644 index 0000000..4715a89 --- /dev/null +++ b/compat/thrust/system/tbb/detail/extrema.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template +ForwardIterator max_element(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // tbb prefers generic::max_element to cpp::max_element + return thrust::system::detail::generic::max_element(exec, first, last, comp); +} // end max_element() + +template +ForwardIterator min_element(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // tbb prefers generic::min_element to cpp::min_element + return thrust::system::detail::generic::min_element(exec, first, last, comp); +} // end min_element() + +template +thrust::pair minmax_element(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) +{ + // tbb prefers generic::minmax_element to cpp::minmax_element + return thrust::system::detail::generic::minmax_element(exec, first, last, comp); +} // end minmax_element() + +} // end detail +} // end tbb +} // end system +} // end thrust + + diff --git a/compat/thrust/system/tbb/detail/fill.h b/compat/thrust/system/tbb/detail/fill.h new file mode 100644 index 0000000..5219e1c --- /dev/null +++ b/compat/thrust/system/tbb/detail/fill.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits fill +#include + diff --git a/compat/thrust/system/tbb/detail/find.h b/compat/thrust/system/tbb/detail/find.h new file mode 100644 index 0000000..d351454 --- /dev/null +++ b/compat/thrust/system/tbb/detail/find.h @@ -0,0 +1,46 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template +InputIterator find_if(execution_policy &exec, + InputIterator first, + InputIterator last, + Predicate pred) +{ + // tbb prefers generic::find_if to cpp::find_if + return thrust::system::detail::generic::find_if(exec, first, last, pred); +} + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/for_each.h b/compat/thrust/system/tbb/detail/for_each.h new file mode 100644 index 0000000..573bb81 --- /dev/null +++ b/compat/thrust/system/tbb/detail/for_each.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template + RandomAccessIterator for_each(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + UnaryFunction f); + +template + RandomAccessIterator for_each_n(execution_policy &exec, + RandomAccessIterator first, + Size n, + UnaryFunction f); + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/for_each.inl b/compat/thrust/system/tbb/detail/for_each.inl new file mode 100644 index 0000000..b09c7be --- /dev/null +++ b/compat/thrust/system/tbb/detail/for_each.inl @@ -0,0 +1,100 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace for_each_detail +{ + +template + struct body +{ + RandomAccessIterator m_first; + UnaryFunction m_f; + + body(RandomAccessIterator first, UnaryFunction f) + : m_first(first), m_f(f) + {} + + void operator()(const ::tbb::blocked_range &r) const + { + // we assume that blocked_range specifies a contiguous range of integers + thrust::system::detail::internal::scalar::for_each_n(m_first + r.begin(), r.size(), m_f); + } // end operator()() +}; // end body + + +template + body + make_body(RandomAccessIterator first, UnaryFunction f) +{ + return body(first, f); +} // end make_body() + + +} // end for_each_detail + + +template +RandomAccessIterator for_each_n(execution_policy &, + RandomAccessIterator first, + Size n, + UnaryFunction f) +{ + ::tbb::parallel_for(::tbb::blocked_range(0,n), for_each_detail::make_body(first,f)); + + // return the end of the range + return first + n; +} // end for_each_n + + +template + RandomAccessIterator for_each(execution_policy &s, + RandomAccessIterator first, + RandomAccessIterator last, + UnaryFunction f) +{ + return tbb::detail::for_each_n(s, first, thrust::distance(first,last), f); +} // end for_each() + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/gather.h b/compat/thrust/system/tbb/detail/gather.h new file mode 100644 index 0000000..dfb7d7f --- /dev/null +++ b/compat/thrust/system/tbb/detail/gather.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits gather +#include + diff --git a/compat/thrust/system/tbb/detail/generate.h b/compat/thrust/system/tbb/detail/generate.h new file mode 100644 index 0000000..0cb33b9 --- /dev/null +++ b/compat/thrust/system/tbb/detail/generate.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits generate +#include + diff --git a/compat/thrust/system/tbb/detail/get_value.h b/compat/thrust/system/tbb/detail/get_value.h new file mode 100644 index 0000000..e376e65 --- /dev/null +++ b/compat/thrust/system/tbb/detail/get_value.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits get_value +#include + diff --git a/compat/thrust/system/tbb/detail/inner_product.h b/compat/thrust/system/tbb/detail/inner_product.h new file mode 100644 index 0000000..351421a --- /dev/null +++ b/compat/thrust/system/tbb/detail/inner_product.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits inner_product +#include + diff --git a/compat/thrust/system/tbb/detail/iter_swap.h b/compat/thrust/system/tbb/detail/iter_swap.h new file mode 100644 index 0000000..16176ec --- /dev/null +++ b/compat/thrust/system/tbb/detail/iter_swap.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits iter_swap +#include + diff --git a/compat/thrust/system/tbb/detail/logical.h b/compat/thrust/system/tbb/detail/logical.h new file mode 100644 index 0000000..b2a80de --- /dev/null +++ b/compat/thrust/system/tbb/detail/logical.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits logical +#include + diff --git a/compat/thrust/system/tbb/detail/malloc_and_free.h b/compat/thrust/system/tbb/detail/malloc_and_free.h new file mode 100644 index 0000000..811a552 --- /dev/null +++ b/compat/thrust/system/tbb/detail/malloc_and_free.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits malloc and free +#include + diff --git a/compat/thrust/system/tbb/detail/memory.inl b/compat/thrust/system/tbb/detail/memory.inl new file mode 100644 index 0000000..420a8a1 --- /dev/null +++ b/compat/thrust/system/tbb/detail/memory.inl @@ -0,0 +1,110 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ + + +template + template + reference & + reference + ::operator=(const reference &other) +{ + return super_t::operator=(other); +} // end reference::operator=() + +template + reference & + reference + ::operator=(const value_type &x) +{ + return super_t::operator=(x); +} // end reference::operator=() + +template +__host__ __device__ +void swap(reference a, reference b) +{ + a.swap(b); +} // end swap() + +namespace detail +{ + +// XXX circular #inclusion problems cause the compiler to believe that cpp::malloc +// is not defined +// WAR the problem by using adl to call cpp::malloc, which requires it to depend +// on a template parameter +template + pointer malloc_workaround(Tag t, std::size_t n) +{ + return pointer(malloc(t, n)); +} // end malloc_workaround() + +// XXX circular #inclusion problems cause the compiler to believe that cpp::free +// is not defined +// WAR the problem by using adl to call cpp::free, which requires it to depend +// on a template parameter +template + void free_workaround(Tag t, pointer ptr) +{ + free(t, ptr.get()); +} // end free_workaround() + +} // end detail + +inline pointer malloc(std::size_t n) +{ + // XXX this is how we'd like to implement this function, + // if not for circular #inclusion problems: + // + // return pointer(thrust::system::cpp::malloc(n)) + // + return detail::malloc_workaround(cpp::tag(), n); +} // end malloc() + +template +pointer malloc(std::size_t n) +{ + pointer raw_ptr = thrust::system::tbb::malloc(sizeof(T) * n); + return pointer(reinterpret_cast(raw_ptr.get())); +} // end malloc() + +inline void free(pointer ptr) +{ + // XXX this is how we'd like to implement this function, + // if not for circular #inclusion problems: + // + // thrust::system::cpp::free(ptr) + // + detail::free_workaround(cpp::tag(), ptr); +} // end free() + +} // end tbb +} // end system +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/merge.h b/compat/thrust/system/tbb/detail/merge.h new file mode 100644 index 0000000..7b203ec --- /dev/null +++ b/compat/thrust/system/tbb/detail/merge.h @@ -0,0 +1,70 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template +OutputIterator merge(execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp); + +template +thrust::pair + merge_by_key(execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first3, + InputIterator4 values_first4, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp); + +} // end detail +} // end tbb +} // end system +} // end thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/merge.inl b/compat/thrust/system/tbb/detail/merge.inl new file mode 100644 index 0000000..cc902af --- /dev/null +++ b/compat/thrust/system/tbb/detail/merge.inl @@ -0,0 +1,285 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace merge_detail +{ + +template +struct range +{ + InputIterator1 first1, last1; + InputIterator2 first2, last2; + OutputIterator result; + StrictWeakOrdering comp; + size_t grain_size; + + range(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp, + size_t grain_size = 1024) + : first1(first1), last1(last1), + first2(first2), last2(last2), + result(result), comp(comp), grain_size(grain_size) + {} + + range(range& r, ::tbb::split) + : first1(r.first1), last1(r.last1), + first2(r.first2), last2(r.last2), + result(r.result), comp(r.comp), grain_size(r.grain_size) + { + // we can assume n1 and n2 are not both 0 + size_t n1 = thrust::distance(first1, last1); + size_t n2 = thrust::distance(first2, last2); + + InputIterator1 mid1 = first1; + InputIterator2 mid2 = first2; + + if (n1 > n2) + { + mid1 += n1 / 2; + mid2 = thrust::system::detail::internal::scalar::lower_bound(first2, last2, raw_reference_cast(*mid1), comp); + } + else + { + mid2 += n2 / 2; + mid1 = thrust::system::detail::internal::scalar::upper_bound(first1, last1, raw_reference_cast(*mid2), comp); + } + + // set first range to [first1, mid1), [first2, mid2), result + r.last1 = mid1; + r.last2 = mid2; + + // set second range to [mid1, last1), [mid2, last2), result + (mid1 - first1) + (mid2 - first2) + first1 = mid1; + first2 = mid2; + result += thrust::distance(r.first1, mid1) + thrust::distance(r.first2, mid2); + } + + bool empty(void) const + { + return (first1 == last1) && (first2 == last2); + } + + bool is_divisible(void) const + { + return static_cast(thrust::distance(first1, last1) + thrust::distance(first2, last2)) > grain_size; + } +}; + +struct body +{ + template + void operator()(Range& r) const + { + thrust::system::detail::internal::scalar::merge + (r.first1, r.last1, + r.first2, r.last2, + r.result, + r.comp); + } +}; + +} // end namespace merge_detail + +namespace merge_by_key_detail +{ + +template +struct range +{ + InputIterator1 keys_first1, keys_last1; + InputIterator2 keys_first2, keys_last2; + InputIterator3 values_first1; + InputIterator4 values_first2; + OutputIterator1 keys_result; + OutputIterator2 values_result; + StrictWeakOrdering comp; + size_t grain_size; + + range(InputIterator1 keys_first1, InputIterator1 keys_last1, + InputIterator2 keys_first2, InputIterator2 keys_last2, + InputIterator3 values_first1, + InputIterator4 values_first2, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp, + size_t grain_size = 1024) + : keys_first1(keys_first1), keys_last1(keys_last1), + keys_first2(keys_first2), keys_last2(keys_last2), + values_first1(values_first1), + values_first2(values_first2), + keys_result(keys_result), values_result(values_result), + comp(comp), grain_size(grain_size) + {} + + range(range& r, ::tbb::split) + : keys_first1(r.keys_first1), keys_last1(r.keys_last1), + keys_first2(r.keys_first2), keys_last2(r.keys_last2), + values_first1(r.values_first1), + values_first2(r.values_first2), + keys_result(r.keys_result), values_result(r.values_result), + comp(r.comp), grain_size(r.grain_size) + { + // we can assume n1 and n2 are not both 0 + size_t n1 = thrust::distance(keys_first1, keys_last1); + size_t n2 = thrust::distance(keys_first2, keys_last2); + + InputIterator1 mid1 = keys_first1; + InputIterator2 mid2 = keys_first2; + + if (n1 > n2) + { + mid1 += n1 / 2; + mid2 = thrust::system::detail::internal::scalar::lower_bound(keys_first2, keys_last2, raw_reference_cast(*mid1), comp); + } + else + { + mid2 += n2 / 2; + mid1 = thrust::system::detail::internal::scalar::upper_bound(keys_first1, keys_last1, raw_reference_cast(*mid2), comp); + } + + // set first range to [keys_first1, mid1), [keys_first2, mid2), keys_result, values_result + r.keys_last1 = mid1; + r.keys_last2 = mid2; + + // set second range to [mid1, keys_last1), [mid2, keys_last2), keys_result + (mid1 - keys_first1) + (mid2 - keys_first2), values_result + (mid1 - keys_first1) + (mid2 - keys_first2) + keys_first1 = mid1; + keys_first2 = mid2; + values_first1 += thrust::distance(r.keys_first1, mid1); + values_first2 += thrust::distance(r.keys_first2, mid2); + keys_result += thrust::distance(r.keys_first1, mid1) + thrust::distance(r.keys_first2, mid2); + values_result += thrust::distance(r.keys_first1, mid1) + thrust::distance(r.keys_first2, mid2); + } + + bool empty(void) const + { + return (keys_first1 == keys_last1) && (keys_first2 == keys_last2); + } + + bool is_divisible(void) const + { + return static_cast(thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)) > grain_size; + } +}; + +struct body +{ + template + void operator()(Range& r) const + { + thrust::system::detail::internal::scalar::merge_by_key + (r.keys_first1, r.keys_last1, + r.keys_first2, r.keys_last2, + r.values_first1, + r.values_first2, + r.keys_result, + r.values_result, + r.comp); + } +}; + +} // end namespace merge_by_key_detail + + +template +OutputIterator merge(execution_policy &exec, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + typedef typename merge_detail::range Range; + typedef merge_detail::body Body; + Range range(first1, last1, first2, last2, result, comp); + Body body; + + ::tbb::parallel_for(range, body); + + thrust::advance(result, thrust::distance(first1, last1) + thrust::distance(first2, last2)); + + return result; +} // end merge() + +template +thrust::pair + merge_by_key(execution_policy &exec, + InputIterator1 keys_first1, + InputIterator1 keys_last1, + InputIterator2 keys_first2, + InputIterator2 keys_last2, + InputIterator3 values_first3, + InputIterator4 values_first4, + OutputIterator1 keys_result, + OutputIterator2 values_result, + StrictWeakOrdering comp) +{ + typedef typename merge_by_key_detail::range Range; + typedef merge_by_key_detail::body Body; + + Range range(keys_first1, keys_last1, keys_first2, keys_last2, values_first3, values_first4, keys_result, values_result, comp); + Body body; + + ::tbb::parallel_for(range, body); + + thrust::advance(keys_result, thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)); + thrust::advance(values_result, thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)); + + return thrust::make_pair(keys_result,values_result); +} + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/mismatch.h b/compat/thrust/system/tbb/detail/mismatch.h new file mode 100644 index 0000000..03980cf --- /dev/null +++ b/compat/thrust/system/tbb/detail/mismatch.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits mismatch +#include + diff --git a/compat/thrust/system/tbb/detail/par.h b/compat/thrust/system/tbb/detail/par.h new file mode 100644 index 0000000..74801ab --- /dev/null +++ b/compat/thrust/system/tbb/detail/par.h @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +struct par_t : thrust::system::tbb::detail::execution_policy +{ + par_t() : thrust::system::tbb::detail::execution_policy() {} + + template + thrust::detail::execute_with_allocator + operator()(Allocator &alloc) const + { + return thrust::detail::execute_with_allocator(alloc); + } +}; + + +} // end detail + + +static const detail::par_t par; + + +} // end tbb +} // end system + + +// alias par here +namespace tbb +{ + + +using thrust::system::tbb::par; + + +} // end tbb +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/partition.h b/compat/thrust/system/tbb/detail/partition.h new file mode 100644 index 0000000..af37121 --- /dev/null +++ b/compat/thrust/system/tbb/detail/partition.h @@ -0,0 +1,87 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/partition.inl b/compat/thrust/system/tbb/detail/partition.inl new file mode 100644 index 0000000..1e421e1 --- /dev/null +++ b/compat/thrust/system/tbb/detail/partition.inl @@ -0,0 +1,102 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + // tbb prefers generic::stable_partition to cpp::stable_partition + return thrust::system::detail::generic::stable_partition(exec, first, last, pred); +} // end stable_partition() + + +template + ForwardIterator stable_partition(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + // tbb prefers generic::stable_partition to cpp::stable_partition + return thrust::system::detail::generic::stable_partition(exec, first, last, stencil, pred); +} // end stable_partition() + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + // tbb prefers generic::stable_partition_copy to cpp::stable_partition_copy + return thrust::system::detail::generic::stable_partition_copy(exec, first, last, out_true, out_false, pred); +} // end stable_partition_copy() + + +template + thrust::pair + stable_partition_copy(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator1 out_true, + OutputIterator2 out_false, + Predicate pred) +{ + // tbb prefers generic::stable_partition_copy to cpp::stable_partition_copy + return thrust::system::detail::generic::stable_partition_copy(exec, first, last, stencil, out_true, out_false, pred); +} // end stable_partition_copy() + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/reduce.h b/compat/thrust/system/tbb/detail/reduce.h new file mode 100644 index 0000000..83a7cc3 --- /dev/null +++ b/compat/thrust/system/tbb/detail/reduce.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file reduce.h + * \brief TBB implementation of reduce. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + OutputType reduce(execution_policy &exec, + InputIterator begin, + InputIterator end, + OutputType init, + BinaryFunction binary_op); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/reduce.inl b/compat/thrust/system/tbb/detail/reduce.inl new file mode 100644 index 0000000..c249852 --- /dev/null +++ b/compat/thrust/system/tbb/detail/reduce.inl @@ -0,0 +1,131 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace reduce_detail +{ + +template +struct body +{ + RandomAccessIterator first; + OutputType sum; + bool first_call; // TBB can invoke operator() multiple times on the same body + thrust::detail::host_function binary_op; + + // note: we only initalize sum with init to avoid calling OutputType's default constructor + body(RandomAccessIterator first, OutputType init, BinaryFunction binary_op) + : first(first), sum(init), first_call(true), binary_op(binary_op) + {} + + // note: we only initalize sum with b.sum to avoid calling OutputType's default constructor + body(body& b, ::tbb::split) + : first(b.first), sum(b.sum), first_call(true), binary_op(b.binary_op) + {} + + template + void operator()(const ::tbb::blocked_range &r) + { + // we assume that blocked_range specifies a contiguous range of integers + + if (r.empty()) return; // nothing to do + + RandomAccessIterator iter = first + r.begin(); + + OutputType temp = thrust::raw_reference_cast(*iter); + + ++iter; + + for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter) + temp = binary_op(temp, *iter); + + + if (first_call) + { + // first time body has been invoked + first_call = false; + sum = temp; + } + else + { + // body has been previously invoked, accumulate temp into sum + sum = binary_op(sum, temp); + } + } // end operator()() + + void join(body& b) + { + sum = binary_op(sum, b.sum); + } +}; // end body + +} // end reduce_detail + + +template + OutputType reduce(execution_policy &exec, + InputIterator begin, + InputIterator end, + OutputType init, + BinaryFunction binary_op) +{ + typedef typename thrust::iterator_difference::type Size; + + Size n = thrust::distance(begin, end); + + if (n == 0) + { + return init; + } + else + { + typedef typename reduce_detail::body Body; + Body reduce_body(begin, init, binary_op); + ::tbb::parallel_reduce(::tbb::blocked_range(0,n), reduce_body); + return binary_op(init, reduce_body.sum); + } +} + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/reduce_by_key.h b/compat/thrust/system/tbb/detail/reduce_by_key.h new file mode 100644 index 0000000..0149a76 --- /dev/null +++ b/compat/thrust/system/tbb/detail/reduce_by_key.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + thrust::pair + reduce_by_key(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/reduce_by_key.inl b/compat/thrust/system/tbb/detail/reduce_by_key.inl new file mode 100644 index 0000000..10d2d8b --- /dev/null +++ b/compat/thrust/system/tbb/detail/reduce_by_key.inl @@ -0,0 +1,344 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace reduce_by_key_detail +{ + + +template + inline L divide_ri(const L x, const R y) +{ + return (x + (y - 1)) / y; +} + + +template + struct partial_sum_type + : thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::detail::eval_if< + thrust::detail::is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + > +{}; + + +template + struct partial_sum_type + : thrust::detail::eval_if< + thrust::detail::has_result_type::value, + thrust::detail::result_type, + thrust::iterator_value + > +{}; + + +template + thrust::pair< + InputIterator1, + thrust::pair< + typename InputIterator1::value_type, + typename partial_sum_type::type + > + > + reduce_last_segment_backward(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + typename thrust::iterator_difference::type n = keys_last - keys_first; + + // reverse the ranges and consume from the end + thrust::reverse_iterator keys_first_r(keys_last); + thrust::reverse_iterator keys_last_r(keys_first); + thrust::reverse_iterator values_first_r(values_first + n); + + typename InputIterator1::value_type result_key = *keys_first_r; + typename partial_sum_type::type result_value = *values_first_r; + + // consume the entirety of the first key's sequence + for(++keys_first_r, ++values_first_r; + (keys_first_r != keys_last_r) && binary_pred(*keys_first_r, result_key); + ++keys_first_r, ++values_first_r) + { + result_value = binary_op(result_value, *values_first_r); + } + + return thrust::make_pair(keys_first_r.base(), thrust::make_pair(result_key, result_value)); +} + + +template + thrust::tuple< + OutputIterator1, + OutputIterator2, + typename InputIterator1::value_type, + typename partial_sum_type::type + > + reduce_by_key_with_carry(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + // first, consume the last sequence to produce the carry + // XXX is there an elegant way to pose this such that we don't need to default construct carry? + thrust::pair< + typename InputIterator1::value_type, + typename partial_sum_type::type + > carry; + + thrust::tie(keys_last, carry) = reduce_last_segment_backward(keys_first, keys_last, values_first, binary_pred, binary_op); + + // finish with sequential reduce_by_key + thrust::cpp::tag seq; + thrust::tie(keys_output, values_output) = + thrust::reduce_by_key(seq, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); + + return thrust::make_tuple(keys_output, values_output, carry.first, carry.second); +} + + +template + bool interval_has_carry(size_t interval_idx, size_t interval_size, size_t num_intervals, Iterator tail_flags) +{ + // to discover whether the interval has a carry, look at the tail_flag corresponding to its last element + // the final interval never has a carry by definition + return (interval_idx + 1 < num_intervals) ? !tail_flags[(interval_idx + 1) * interval_size - 1] : false; +} + + +template + struct serial_reduce_by_key_body +{ + typedef typename thrust::iterator_difference::type size_type; + + Iterator1 keys_first; + Iterator2 values_first; + Iterator3 result_offset; + Iterator4 keys_result; + Iterator5 values_result; + Iterator6 carry_result; + + size_type n; + size_type interval_size; + size_type num_intervals; + + BinaryPredicate binary_pred; + BinaryFunction binary_op; + + serial_reduce_by_key_body(Iterator1 keys_first, Iterator2 values_first, Iterator3 result_offset, Iterator4 keys_result, Iterator5 values_result, Iterator6 carry_result, size_type n, size_type interval_size, size_type num_intervals, BinaryPredicate binary_pred, BinaryFunction binary_op) + : keys_first(keys_first), values_first(values_first), + result_offset(result_offset), + keys_result(keys_result), + values_result(values_result), + carry_result(carry_result), + n(n), + interval_size(interval_size), + num_intervals(num_intervals), + binary_pred(binary_pred), + binary_op(binary_op) + {} + + void operator()(const ::tbb::blocked_range &r) const + { + assert(r.size() == 1); + + const size_type interval_idx = r.begin(); + + const size_type offset_to_first = interval_size * interval_idx; + const size_type offset_to_last = thrust::min(n, offset_to_first + interval_size); + + Iterator1 my_keys_first = keys_first + offset_to_first; + Iterator1 my_keys_last = keys_first + offset_to_last; + Iterator2 my_values_first = values_first + offset_to_first; + Iterator3 my_result_offset = result_offset + interval_idx; + Iterator4 my_keys_result = keys_result + *my_result_offset; + Iterator5 my_values_result = values_result + *my_result_offset; + Iterator6 my_carry_result = carry_result + interval_idx; + + // consume the rest of the interval with reduce_by_key + typedef typename thrust::iterator_value::type key_type; + typedef typename partial_sum_type::type value_type; + + // XXX is there a way to pose this so that we don't require default construction of carry? + thrust::pair carry; + + thrust::tie(my_keys_result, my_values_result, carry.first, carry.second) = + reduce_by_key_with_carry(my_keys_first, + my_keys_last, + my_values_first, + my_keys_result, + my_values_result, + binary_pred, + binary_op); + + // store to carry only when we actually have a carry + // store to my_keys_result & my_values_result otherwise + + // create tail_flags so we can check for a carry + thrust::detail::tail_flags flags = thrust::detail::make_tail_flags(keys_first, keys_first + n, binary_pred); + + if(interval_has_carry(interval_idx, interval_size, num_intervals, flags.begin())) + { + // we can ignore the carry's key + // XXX because the carry result is uninitialized, we should copy construct + *my_carry_result = carry.second; + } + else + { + *my_keys_result = carry.first; + *my_values_result = carry.second; + } + } +}; + + +template + serial_reduce_by_key_body + make_serial_reduce_by_key_body(Iterator1 keys_first, Iterator2 values_first, Iterator3 result_offset, Iterator4 keys_result, Iterator5 values_result, Iterator6 carry_result, typename thrust::iterator_difference::type n, size_t interval_size, size_t num_intervals, BinaryPredicate binary_pred, BinaryFunction binary_op) +{ + return serial_reduce_by_key_body(keys_first, values_first, result_offset, keys_result, values_result, carry_result, n, interval_size, num_intervals, binary_pred, binary_op); +} + + +} // end reduce_by_key_detail + + +template + thrust::pair + reduce_by_key(thrust::tbb::execution_policy &exec, + Iterator1 keys_first, Iterator1 keys_last, + Iterator2 values_first, + Iterator3 keys_result, + Iterator4 values_result, + BinaryPredicate binary_pred, + BinaryFunction binary_op) +{ + + typedef typename thrust::iterator_difference::type difference_type; + difference_type n = keys_last - keys_first; + if(n == 0) return thrust::make_pair(keys_result, values_result); + + // XXX this value is a tuning opportunity + const difference_type parallelism_threshold = 10000; + + if(n < parallelism_threshold) + { + // don't bother parallelizing for small n + thrust::cpp::tag seq; + return thrust::reduce_by_key(seq, keys_first, keys_last, values_first, keys_result, values_result, binary_pred, binary_op); + } + + // count the number of processors + const unsigned int p = thrust::max(1u, ::tbb::tbb_thread::hardware_concurrency()); + + // generate O(P) intervals of sequential work + // XXX oversubscribing is a tuning opportunity + const unsigned int subscription_rate = 1; + difference_type interval_size = thrust::min(parallelism_threshold, thrust::max(n, n / (subscription_rate * p))); + difference_type num_intervals = reduce_by_key_detail::divide_ri(n, interval_size); + + // decompose the input into intervals of size N / num_intervals + // add one extra element to this vector to store the size of the entire result + thrust::detail::temporary_array interval_output_offsets(0, exec, num_intervals + 1); + + // first count the number of tail flags in each interval + thrust::detail::tail_flags tail_flags = thrust::detail::make_tail_flags(keys_first, keys_last, binary_pred); + thrust::system::tbb::detail::reduce_intervals(exec, tail_flags.begin(), tail_flags.end(), interval_size, interval_output_offsets.begin() + 1, thrust::plus()); + interval_output_offsets[0] = 0; + + // scan the counts to get each body's output offset + thrust::cpp::tag seq; + thrust::inclusive_scan(seq, + interval_output_offsets.begin() + 1, interval_output_offsets.end(), + interval_output_offsets.begin() + 1); + + // do a reduce_by_key serially in each thread + // the final interval never has a carry by definition, so don't reserve space for it + typedef typename reduce_by_key_detail::partial_sum_type::type carry_type; + thrust::detail::temporary_array carries(0, exec, num_intervals - 1); + + // force grainsize == 1 with simple_partioner() + ::tbb::parallel_for(::tbb::blocked_range(0, num_intervals, 1), + reduce_by_key_detail::make_serial_reduce_by_key_body(keys_first, values_first, interval_output_offsets.begin(), keys_result, values_result, carries.begin(), n, interval_size, num_intervals, binary_pred, binary_op), + ::tbb::simple_partitioner()); + + difference_type size_of_result = interval_output_offsets[num_intervals]; + + // sequentially accumulate the carries + // note that the last interval does not have a carry + // XXX find a way to express this loop via a sequential algorithm, perhaps reduce_by_key + for(typename thrust::detail::temporary_array::size_type i = 0; i < carries.size(); ++i) + { + // if our interval has a carry, then we need to sum the carry to the next interval's output offset + // if it does not have a carry, then we need to ignore carry_value[i] + if(reduce_by_key_detail::interval_has_carry(i, interval_size, num_intervals, tail_flags.begin())) + { + difference_type output_idx = interval_output_offsets[i+1]; + + values_result[output_idx] = binary_op(values_result[output_idx], carries[i]); + } + } + + return thrust::make_pair(keys_result + size_of_result, values_result + size_of_result); +} + + +} // end detail +} // end tbb +} // end system +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/reduce_intervals.h b/compat/thrust/system/tbb/detail/reduce_intervals.h new file mode 100644 index 0000000..0647ffd --- /dev/null +++ b/compat/thrust/system/tbb/detail/reduce_intervals.h @@ -0,0 +1,126 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace reduce_intervals_detail +{ + + +template + inline L divide_ri(const L x, const R y) +{ + return (x + (y - 1)) / y; +} + + +template + struct body +{ + RandomAccessIterator1 first; + RandomAccessIterator2 result; + Size n, interval_size; + BinaryFunction binary_op; + + body(RandomAccessIterator1 first, RandomAccessIterator2 result, Size n, Size interval_size, BinaryFunction binary_op) + : first(first), result(result), n(n), interval_size(interval_size), binary_op(binary_op) + {} + + void operator()(const ::tbb::blocked_range &r) const + { + assert(r.size() == 1); + + Size interval_idx = r.begin(); + + Size offset_to_first = interval_size * interval_idx; + Size offset_to_last = thrust::min(n, offset_to_first + interval_size); + + RandomAccessIterator1 my_first = first + offset_to_first; + RandomAccessIterator1 my_last = first + offset_to_last; + + thrust::cpp::tag seq; + + // carefully pass the init value for the interval with raw_reference_cast + typedef typename BinaryFunction::result_type sum_type; + result[interval_idx] = + thrust::reduce(seq, my_first + 1, my_last, sum_type(thrust::raw_reference_cast(*my_first)), binary_op); + } +}; + + +template + body + make_body(RandomAccessIterator1 first, RandomAccessIterator2 result, Size n, Size interval_size, BinaryFunction binary_op) +{ + return body(first, result, n, interval_size, binary_op); +} + + +} // end reduce_intervals_detail + + +template + void reduce_intervals(thrust::tbb::execution_policy &, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + Size interval_size, + RandomAccessIterator2 result, + BinaryFunction binary_op) +{ + typename thrust::iterator_difference::type n = last - first; + + Size num_intervals = reduce_intervals_detail::divide_ri(n, interval_size); + + ::tbb::parallel_for(::tbb::blocked_range(0, num_intervals, 1), reduce_intervals_detail::make_body(first, result, Size(n), interval_size, binary_op), ::tbb::simple_partitioner()); +} + + +template + void reduce_intervals(thrust::tbb::execution_policy &exec, + RandomAccessIterator1 first, + RandomAccessIterator1 last, + Size interval_size, + RandomAccessIterator2 result) +{ + typedef typename thrust::iterator_value::type value_type; + + return thrust::system::tbb::detail::reduce_intervals(exec, first, last, interval_size, result, thrust::plus()); +} + + +} // end detail +} // end tbb +} // end system +} // end thrust + diff --git a/compat/thrust/system/tbb/detail/remove.h b/compat/thrust/system/tbb/detail/remove.h new file mode 100644 index 0000000..48cbb5c --- /dev/null +++ b/compat/thrust/system/tbb/detail/remove.h @@ -0,0 +1,81 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace omp +{ +namespace detail +{ + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred); + + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred); + + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + + +} // end namespace detail +} // end namespace omp +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/remove.inl b/compat/thrust/system/tbb/detail/remove.inl new file mode 100644 index 0000000..01916c5 --- /dev/null +++ b/compat/thrust/system/tbb/detail/remove.inl @@ -0,0 +1,94 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + Predicate pred) +{ + // tbb prefers generic::remove_if to cpp::remove_if + return thrust::system::detail::generic::remove_if(exec, first, last, pred); +} + + +template + ForwardIterator remove_if(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + InputIterator stencil, + Predicate pred) +{ + // tbb prefers generic::remove_if to cpp::remove_if + return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred); +} + + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred) +{ + // tbb prefers generic::remove_copy_if to cpp::remove_copy_if + return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred); +} + +template + OutputIterator remove_copy_if(execution_policy &exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred) +{ + // tbb prefers generic::remove_copy_if to cpp::remove_copy_if + return thrust::system::detail::generic::remove_copy_if(exec, first, last, stencil, result, pred); +} + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/replace.h b/compat/thrust/system/tbb/detail/replace.h new file mode 100644 index 0000000..c48555d --- /dev/null +++ b/compat/thrust/system/tbb/detail/replace.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits this algorithm +#include + diff --git a/compat/thrust/system/tbb/detail/reverse.h b/compat/thrust/system/tbb/detail/reverse.h new file mode 100644 index 0000000..04923d1 --- /dev/null +++ b/compat/thrust/system/tbb/detail/reverse.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits reverse +#include + diff --git a/compat/thrust/system/tbb/detail/scan.h b/compat/thrust/system/tbb/detail/scan.h new file mode 100644 index 0000000..ed5cacd --- /dev/null +++ b/compat/thrust/system/tbb/detail/scan.h @@ -0,0 +1,64 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file scan.h + * \brief TBB implementations of scan functions. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template + OutputIterator inclusive_scan(tag, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op); + + +template + OutputIterator exclusive_scan(tag, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/scan.inl b/compat/thrust/system/tbb/detail/scan.inl new file mode 100644 index 0000000..4887824 --- /dev/null +++ b/compat/thrust/system/tbb/detail/scan.inl @@ -0,0 +1,293 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace scan_detail +{ + +template +struct inclusive_body +{ + InputIterator input; + OutputIterator output; + thrust::detail::host_function binary_op; + ValueType sum; + bool first_call; + + inclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType dummy) + : input(input), output(output), binary_op(binary_op), sum(dummy), first_call(true) + {} + + inclusive_body(inclusive_body& b, ::tbb::split) + : input(b.input), output(b.output), binary_op(b.binary_op), sum(b.sum), first_call(true) + {} + + template + void operator()(const ::tbb::blocked_range& r, ::tbb::pre_scan_tag) + { + InputIterator iter = input + r.begin(); + + ValueType temp = *iter; + + ++iter; + + for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter) + temp = binary_op(temp, *iter); + + if (first_call) + sum = temp; + else + sum = binary_op(sum, temp); + + first_call = false; + } + + template + void operator()(const ::tbb::blocked_range& r, ::tbb::final_scan_tag) + { + InputIterator iter1 = input + r.begin(); + OutputIterator iter2 = output + r.begin(); + + if (first_call) + { + *iter2 = sum = *iter1; + ++iter1; + ++iter2; + for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter1, ++iter2) + *iter2 = sum = binary_op(sum, *iter1); + } + else + { + for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2) + *iter2 = sum = binary_op(sum, *iter1); + } + + first_call = false; + } + + void reverse_join(inclusive_body& b) + { + sum = binary_op(b.sum, sum); + } + + void assign(inclusive_body& b) + { + sum = b.sum; + } +}; + + +template +struct exclusive_body +{ + InputIterator input; + OutputIterator output; + thrust::detail::host_function binary_op; + ValueType sum; + bool first_call; + + exclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType init) + : input(input), output(output), binary_op(binary_op), sum(init), first_call(true) + {} + + exclusive_body(exclusive_body& b, ::tbb::split) + : input(b.input), output(b.output), binary_op(b.binary_op), sum(b.sum), first_call(true) + {} + + template + void operator()(const ::tbb::blocked_range& r, ::tbb::pre_scan_tag) + { + InputIterator iter = input + r.begin(); + + ValueType temp = *iter; + + ++iter; + + for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter) + temp = binary_op(temp, *iter); + + if (first_call && r.begin() > 0) + sum = temp; + else + sum = binary_op(sum, temp); + + first_call = false; + } + + template + void operator()(const ::tbb::blocked_range& r, ::tbb::final_scan_tag) + { + InputIterator iter1 = input + r.begin(); + OutputIterator iter2 = output + r.begin(); + + for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2) + { + ValueType temp = binary_op(sum, *iter1); + *iter2 = sum; + sum = temp; + } + + first_call = false; + } + + void reverse_join(exclusive_body& b) + { + sum = binary_op(b.sum, sum); + } + + void assign(exclusive_body& b) + { + sum = b.sum; + } +}; + +} // end scan_detail + + + +template + OutputIterator inclusive_scan(tag, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction binary_op) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + using namespace thrust::detail; + + typedef typename eval_if< + has_result_type::value, + result_type, + eval_if< + is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + typedef typename thrust::iterator_difference::type Size; + + Size n = thrust::distance(first, last); + + if (n != 0) + { + typedef typename scan_detail::inclusive_body Body; + Body scan_body(first, result, binary_op, *first); + ::tbb::parallel_scan(::tbb::blocked_range(0,n), scan_body); + } + + thrust::advance(result, n); + + return result; +} + + +template + OutputIterator exclusive_scan(tag, + InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction binary_op) +{ + // the pseudocode for deducing the type of the temporary used below: + // + // if BinaryFunction is AdaptableBinaryFunction + // TemporaryType = AdaptableBinaryFunction::result_type + // else if OutputIterator is a "pure" output iterator + // TemporaryType = InputIterator::value_type + // else + // TemporaryType = OutputIterator::value_type + // + // XXX upon c++0x, TemporaryType needs to be: + // result_of::type + + using namespace thrust::detail; + + typedef typename eval_if< + has_result_type::value, + result_type, + eval_if< + is_output_iterator::value, + thrust::iterator_value, + thrust::iterator_value + > + >::type ValueType; + + typedef typename thrust::iterator_difference::type Size; + + Size n = thrust::distance(first, last); + + if (n != 0) + { + typedef typename scan_detail::exclusive_body Body; + Body scan_body(first, result, binary_op, init); + ::tbb::parallel_scan(::tbb::blocked_range(0,n), scan_body); + } + + thrust::advance(result, n); + + return result; +} + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/scan_by_key.h b/compat/thrust/system/tbb/detail/scan_by_key.h new file mode 100644 index 0000000..cad4fc1 --- /dev/null +++ b/compat/thrust/system/tbb/detail/scan_by_key.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits scan_by_key +#include + diff --git a/compat/thrust/system/tbb/detail/scatter.h b/compat/thrust/system/tbb/detail/scatter.h new file mode 100644 index 0000000..c48555d --- /dev/null +++ b/compat/thrust/system/tbb/detail/scatter.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits this algorithm +#include + diff --git a/compat/thrust/system/tbb/detail/sequence.h b/compat/thrust/system/tbb/detail/sequence.h new file mode 100644 index 0000000..811d8f5 --- /dev/null +++ b/compat/thrust/system/tbb/detail/sequence.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits sequence +#include + diff --git a/compat/thrust/system/tbb/detail/set_operations.h b/compat/thrust/system/tbb/detail/set_operations.h new file mode 100644 index 0000000..687edb2 --- /dev/null +++ b/compat/thrust/system/tbb/detail/set_operations.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits set_operations +#include + diff --git a/compat/thrust/system/tbb/detail/sort.h b/compat/thrust/system/tbb/detail/sort.h new file mode 100644 index 0000000..3b6f630 --- /dev/null +++ b/compat/thrust/system/tbb/detail/sort.h @@ -0,0 +1,55 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + +template + void stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp); + +template + void stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 keys_first, + RandomAccessIterator1 keys_last, + RandomAccessIterator2 values_first, + StrictWeakOrdering comp); + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/sort.inl b/compat/thrust/system/tbb/detail/sort.inl new file mode 100644 index 0000000..f292789 --- /dev/null +++ b/compat/thrust/system/tbb/detail/sort.inl @@ -0,0 +1,251 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ +namespace sort_detail +{ + +// TODO tune this based on data type and comp +const static int threshold = 128 * 1024; + +template +void merge_sort(execution_policy &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace); + +template +struct merge_sort_closure +{ + execution_policy &exec; + Iterator1 first1, last1; + Iterator2 first2; + StrictWeakOrdering comp; + bool inplace; + + merge_sort_closure(execution_policy &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace) + : exec(exec), first1(first1), last1(last1), first2(first2), comp(comp), inplace(inplace) + {} + + void operator()(void) const + { + merge_sort(exec, first1, last1, first2, comp, inplace); + } +}; + + +template +void merge_sort(execution_policy &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace) +{ + typedef typename thrust::iterator_difference::type difference_type; + + difference_type n = thrust::distance(first1, last1); + + if (n < threshold) + { + thrust::system::detail::internal::scalar::stable_sort(first1, last1, comp); + + if (!inplace) + thrust::system::detail::internal::scalar::copy(first1, last1, first2); + + return; + } + + Iterator1 mid1 = first1 + (n / 2); + Iterator2 mid2 = first2 + (n / 2); + Iterator2 last2 = first2 + n; + + typedef merge_sort_closure Closure; + + Closure left (exec, first1, mid1, first2, comp, !inplace); + Closure right(exec, mid1, last1, mid2, comp, !inplace); + + ::tbb::parallel_invoke(left, right); + + if (inplace) thrust::merge(exec, first2, mid2, mid2, last2, first1, comp); + else thrust::merge(exec, first1, mid1, mid1, last1, first2, comp); +} + +} // end namespace sort_detail + + +namespace sort_by_key_detail +{ + +// TODO tune this based on data type and comp +const static int threshold = 128 * 1024; + +template +void merge_sort_by_key(execution_policy &exec, + Iterator1 first1, + Iterator1 last1, + Iterator2 first2, + Iterator3 first3, + Iterator4 first4, + StrictWeakOrdering comp, + bool inplace); + +template +struct merge_sort_by_key_closure +{ + execution_policy &exec; + Iterator1 first1, last1; + Iterator2 first2; + Iterator3 first3; + Iterator4 first4; + StrictWeakOrdering comp; + bool inplace; + + merge_sort_by_key_closure(execution_policy &exec, + Iterator1 first1, + Iterator1 last1, + Iterator2 first2, + Iterator3 first3, + Iterator4 first4, + StrictWeakOrdering comp, + bool inplace) + : exec(exec), first1(first1), last1(last1), first2(first2), first3(first3), first4(first4), comp(comp), inplace(inplace) + {} + + void operator()(void) const + { + merge_sort_by_key(exec, first1, last1, first2, first3, first4, comp, inplace); + } +}; + + +template +void merge_sort_by_key(execution_policy &exec, + Iterator1 first1, + Iterator1 last1, + Iterator2 first2, + Iterator3 first3, + Iterator4 first4, + StrictWeakOrdering comp, + bool inplace) +{ + typedef typename thrust::iterator_difference::type difference_type; + + difference_type n = thrust::distance(first1, last1); + + Iterator1 mid1 = first1 + (n / 2); + Iterator2 mid2 = first2 + (n / 2); + Iterator3 mid3 = first3 + (n / 2); + Iterator4 mid4 = first4 + (n / 2); + Iterator2 last2 = first2 + n; + Iterator3 last3 = first3 + n; + + if (n < threshold) + { + thrust::system::detail::internal::scalar::stable_sort_by_key(first1, last1, first2, comp); + + if (!inplace) + { + thrust::system::detail::internal::scalar::copy(first1, last1, first3); + thrust::system::detail::internal::scalar::copy(first2, last2, first4); + } + + return; + } + + typedef merge_sort_by_key_closure Closure; + + Closure left (exec, first1, mid1, first2, first3, first4, comp, !inplace); + Closure right(exec, mid1, last1, mid2, mid3, mid4, comp, !inplace); + + ::tbb::parallel_invoke(left, right); + + if(inplace) + { + thrust::merge_by_key(exec, first3, mid3, mid3, last3, first4, mid4, first1, first2, comp); + } + else + { + thrust::merge_by_key(exec, first1, mid1, mid1, last1, first2, mid2, first3, first4, comp); + } +} + +} // end namespace sort_detail + +template +void stable_sort(execution_policy &exec, + RandomAccessIterator first, + RandomAccessIterator last, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_value::type key_type; + + thrust::detail::temporary_array temp(exec, first, last); + + sort_detail::merge_sort(exec, first, last, temp.begin(), comp, true); +} + +template + void stable_sort_by_key(execution_policy &exec, + RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_value::type key_type; + typedef typename thrust::iterator_value::type val_type; + + RandomAccessIterator2 last2 = first2 + thrust::distance(first1, last1); + + thrust::detail::temporary_array temp1(exec, first1, last1); + thrust::detail::temporary_array temp2(exec, first2, last2); + + sort_by_key_detail::merge_sort_by_key(exec, first1, last1, first2, temp1.begin(), temp2.begin(), comp, true); +} + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/swap_ranges.h b/compat/thrust/system/tbb/detail/swap_ranges.h new file mode 100644 index 0000000..15f8f55 --- /dev/null +++ b/compat/thrust/system/tbb/detail/swap_ranges.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// tbb inherits swap_ranges +#include + diff --git a/compat/thrust/system/tbb/detail/tabulate.h b/compat/thrust/system/tbb/detail/tabulate.h new file mode 100644 index 0000000..da65d8e --- /dev/null +++ b/compat/thrust/system/tbb/detail/tabulate.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits tabulate +#include + diff --git a/compat/thrust/system/tbb/detail/temporary_buffer.h b/compat/thrust/system/tbb/detail/temporary_buffer.h new file mode 100644 index 0000000..628bd75 --- /dev/null +++ b/compat/thrust/system/tbb/detail/temporary_buffer.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system has no special temporary buffer functions + diff --git a/compat/thrust/system/tbb/detail/transform.h b/compat/thrust/system/tbb/detail/transform.h new file mode 100644 index 0000000..70ce1f4 --- /dev/null +++ b/compat/thrust/system/tbb/detail/transform.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// omp inherits transform +#include + diff --git a/compat/thrust/system/tbb/detail/transform_reduce.h b/compat/thrust/system/tbb/detail/transform_reduce.h new file mode 100644 index 0000000..23ed070 --- /dev/null +++ b/compat/thrust/system/tbb/detail/transform_reduce.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits transform_reduce +#include + diff --git a/compat/thrust/system/tbb/detail/transform_scan.h b/compat/thrust/system/tbb/detail/transform_scan.h new file mode 100644 index 0000000..fc2e55d --- /dev/null +++ b/compat/thrust/system/tbb/detail/transform_scan.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits transform_scan +#include + diff --git a/compat/thrust/system/tbb/detail/uninitialized_copy.h b/compat/thrust/system/tbb/detail/uninitialized_copy.h new file mode 100644 index 0000000..944f4ba --- /dev/null +++ b/compat/thrust/system/tbb/detail/uninitialized_copy.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits uninitialized_copy +#include + diff --git a/compat/thrust/system/tbb/detail/uninitialized_fill.h b/compat/thrust/system/tbb/detail/uninitialized_fill.h new file mode 100644 index 0000000..b9d6de2 --- /dev/null +++ b/compat/thrust/system/tbb/detail/uninitialized_fill.h @@ -0,0 +1,23 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// this system inherits uninitialized_fill +#include + diff --git a/compat/thrust/system/tbb/detail/unique.h b/compat/thrust/system/tbb/detail/unique.h new file mode 100644 index 0000000..34538ca --- /dev/null +++ b/compat/thrust/system/tbb/detail/unique.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + ForwardIterator unique(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred); + + +template + OutputIterator unique_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/unique.inl b/compat/thrust/system/tbb/detail/unique.inl new file mode 100644 index 0000000..06e6a30 --- /dev/null +++ b/compat/thrust/system/tbb/detail/unique.inl @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + ForwardIterator unique(execution_policy &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred) +{ + // tbb prefers generic::unique to cpp::unique + return thrust::system::detail::generic::unique(exec,first,last,binary_pred); +} // end unique() + + +template + OutputIterator unique_copy(execution_policy &exec, + InputIterator first, + InputIterator last, + OutputIterator output, + BinaryPredicate binary_pred) +{ + // tbb prefers generic::unique_copy to cpp::unique_copy + return thrust::system::detail::generic::unique_copy(exec,first,last,output,binary_pred); +} // end unique_copy() + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/unique_by_key.h b/compat/thrust/system/tbb/detail/unique_by_key.h new file mode 100644 index 0000000..c6d0532 --- /dev/null +++ b/compat/thrust/system/tbb/detail/unique_by_key.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + thrust::pair + unique_by_key(execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred); + + +template + thrust::pair + unique_by_key_copy(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred); + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + +#include + diff --git a/compat/thrust/system/tbb/detail/unique_by_key.inl b/compat/thrust/system/tbb/detail/unique_by_key.inl new file mode 100644 index 0000000..7747ca4 --- /dev/null +++ b/compat/thrust/system/tbb/detail/unique_by_key.inl @@ -0,0 +1,74 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ +namespace detail +{ + + +template + thrust::pair + unique_by_key(execution_policy &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred) +{ + // tbb prefers generic::unique_by_key to cpp::unique_by_key + return thrust::system::detail::generic::unique_by_key(exec,keys_first,keys_last,values_first,binary_pred); +} // end unique_by_key() + + +template + thrust::pair + unique_by_key_copy(execution_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred) +{ + // tbb prefers generic::unique_by_key_copy to cpp::unique_by_key_copy + return thrust::system::detail::generic::unique_by_key_copy(exec,keys_first,keys_last,values_first,keys_output,values_output,binary_pred); +} // end unique_by_key_copy() + + +} // end namespace detail +} // end namespace tbb +} // end namespace system +} // end namespace thrust + diff --git a/compat/thrust/system/tbb/detail/vector.inl b/compat/thrust/system/tbb/detail/vector.inl new file mode 100644 index 0000000..d87e670 --- /dev/null +++ b/compat/thrust/system/tbb/detail/vector.inl @@ -0,0 +1,97 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ + +template + vector + ::vector() + : super_t() +{} + +template + vector + ::vector(size_type n) + : super_t(n) +{} + +template + vector + ::vector(size_type n, const value_type &value) + : super_t(n,value) +{} + +template + vector + ::vector(const vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(const thrust::detail::vector_base &x) + : super_t(x) +{} + +template + template + vector + ::vector(const std::vector &x) + : super_t(x) +{} + +template + template + vector + ::vector(InputIterator first, InputIterator last) + : super_t(first,last) +{} + +template + template + vector & + vector + ::operator=(const std::vector &x) +{ + super_t::operator=(x); + return *this; +} + +template + template + vector & + vector + ::operator=(const thrust::detail::vector_base &x) +{ + super_t::operator=(x); + return *this; +} + +} // end tbb +} // end system +} // end thrust + diff --git a/compat/thrust/system/tbb/execution_policy.h b/compat/thrust/system/tbb/execution_policy.h new file mode 100644 index 0000000..c462586 --- /dev/null +++ b/compat/thrust/system/tbb/execution_policy.h @@ -0,0 +1,156 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +/*! \file thrust/system/tbb/execution_policy.h + * \brief Execution policies for Thrust's TBB system. + */ + +#include + +// get the execution policies definitions first +#include + +// get the definition of par +#include + +// now get all the algorithm definitions + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// define these entities here for the purpose of Doxygenating them +// they are actually defined elsewhere +#if 0 +namespace thrust +{ +namespace system +{ +namespace tbb +{ + + +/*! \addtogroup execution_policies + * \{ + */ + + +/*! \p thrust::tbb::execution_policy is the base class for all Thrust parallel execution + * policies which are derived from Thrust's TBB backend system. + */ +template +struct execution_policy : thrust::execution_policy +{}; + + +/*! \p tbb::tag is a type representing Thrust's TBB backend system in C++'s type system. + * Iterators "tagged" with a type which is convertible to \p tbb::tag assert that they may be + * "dispatched" to algorithm implementations in the \p tbb system. + */ +struct tag : thrust::system::tbb::execution_policy { unspecified }; + + +/*! \p thrust::tbb::par is the parallel execution policy associated with Thrust's TBB + * backend system. + * + * Instead of relying on implicit algorithm dispatch through iterator system tags, users may + * directly target Thrust's TBB backend system by providing \p thrust::tbb::par as an algorithm + * parameter. + * + * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such + * as \p thrust::tbb::vector. + * + * The type of \p thrust::tbb::par is implementation-defined. + * + * The following code snippet demonstrates how to use \p thrust::tbb::par to explicitly dispatch an + * invocation of \p thrust::for_each to the TBB backend system: + * + * \code + * #include + * #include + * #include + * + * struct printf_functor + * { + * __host__ __device__ + * void operator()(int x) + * { + * printf("%d\n"); + * } + * }; + * ... + * int vec[3]; + * vec[0] = 0; vec[1] = 1; vec[2] = 2; + * + * thrust::for_each(thrust::tbb::par, vec.begin(), vec.end(), printf_functor()); + * + * // 0 1 2 is printed to standard output in some unspecified order + * \endcode + */ +static const unspecified par; + + +/*! \} + */ + + +} // end tbb +} // end system +} // end thrust +#endif + + diff --git a/compat/thrust/system/tbb/memory.h b/compat/thrust/system/tbb/memory.h new file mode 100644 index 0000000..deea7ee --- /dev/null +++ b/compat/thrust/system/tbb/memory.h @@ -0,0 +1,414 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/tbb/memory.h + * \brief Managing memory associated with Thrust's TBB system. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ + +template class pointer; + +} // end tbb +} // end system +} // end thrust + + +/*! \cond + */ + +// specialize std::iterator_traits to avoid problems with the name of +// pointer's constructor shadowing its nested pointer type +// do this before pointer is defined so the specialization is correctly +// used inside the definition +namespace std +{ + +template + struct iterator_traits > +{ + private: + typedef thrust::system::tbb::pointer ptr; + + public: + typedef typename ptr::iterator_category iterator_category; + typedef typename ptr::value_type value_type; + typedef typename ptr::difference_type difference_type; + typedef ptr pointer; + typedef typename ptr::reference reference; +}; // end iterator_traits + +} // end std + +/*! \endcond + */ + + +namespace thrust +{ +namespace system +{ + +/*! \addtogroup system_backends Systems + * \ingroup system + * \{ + */ + +/*! \namespace thrust::system::tbb + * \brief \p thrust::system::tbb is the namespace containing functionality for allocating, manipulating, + * and deallocating memory available to Thrust's TBB backend system. + * The identifiers are provided in a separate namespace underneath thrust::system + * for import convenience but are also aliased in the top-level thrust::tbb + * namespace for easy access. + * + */ +namespace tbb +{ + +// forward declaration of reference for pointer +template class reference; + +/*! \cond + */ + +// XXX nvcc + msvc have trouble instantiating reference below +// this is a workaround +namespace detail +{ + +template + struct reference_msvc_workaround +{ + typedef thrust::system::tbb::reference type; +}; // end reference_msvc_workaround + +} // end detail + +/*! \endcond + */ + + +/*! \p pointer stores a pointer to an object allocated in memory available to the tbb system. + * This type provides type safety when dispatching standard algorithms on ranges resident + * in tbb memory. + * + * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. + * + * \p pointer can be created with the function \p tbb::malloc, or by explicitly calling its constructor + * with a raw pointer. + * + * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function + * or the \p raw_pointer_cast function. + * + * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory + * pointed to by \p pointer. + * + * \tparam T specifies the type of the pointee. + * + * \see tbb::malloc + * \see tbb::free + * \see raw_pointer_cast + */ +template + class pointer + : public thrust::pointer< + T, + thrust::system::tbb::tag, + thrust::system::tbb::reference, + thrust::system::tbb::pointer + > +{ + /*! \cond + */ + + private: + typedef thrust::pointer< + T, + thrust::system::tbb::tag, + //thrust::system::tbb::reference, + typename detail::reference_msvc_workaround::type, + thrust::system::tbb::pointer + > super_t; + + /*! \endcond + */ + + public: + // note that tbb::pointer's member functions need __host__ __device__ + // to interoperate with nvcc + iterators' dereference member function + + /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. + */ + __host__ __device__ + pointer() : super_t() {} + + /*! This constructor allows construction of a pointer from a T*. + * + * \param ptr A raw pointer to copy from, presumed to point to a location in memory + * accessible by the \p tbb system. + * \tparam OtherT \p OtherT shall be convertible to \p T. + */ + template + __host__ __device__ + explicit pointer(OtherT *ptr) : super_t(ptr) {} + + /*! This constructor allows construction from another pointer-like object with related type. + * + * \param other The \p OtherPointer to copy. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::tbb::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + pointer(const OtherPointer &other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer + >::type * = 0) : super_t(other) {} + + /*! Assignment operator allows assigning from another pointer-like object with related type. + * + * \param other The other pointer-like object to assign from. + * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible + * to \p thrust::system::tbb::tag and its element type shall be convertible to \p T. + */ + template + __host__ __device__ + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + pointer, + pointer & + >::type + operator=(const OtherPointer &other) + { + return super_t::operator=(other); + } +}; // end pointer + + +/*! \p reference is a wrapped reference to an object stored in memory available to the \p tbb system. + * \p reference is the type of the result of dereferencing a \p tbb::pointer. + * + * \tparam T Specifies the type of the referenced object. + */ +template + class reference + : public thrust::reference< + T, + thrust::system::tbb::pointer, + thrust::system::tbb::reference + > +{ + /*! \cond + */ + + private: + typedef thrust::reference< + T, + thrust::system::tbb::pointer, + thrust::system::tbb::reference + > super_t; + + /*! \endcond + */ + + public: + /*! \cond + */ + + typedef typename super_t::value_type value_type; + typedef typename super_t::pointer pointer; + + /*! \endcond + */ + + /*! This constructor initializes this \p reference to refer to an object + * pointed to by the given \p pointer. After this \p reference is constructed, + * it shall refer to the object pointed to by \p ptr. + * + * \param ptr A \p pointer to copy from. + */ + __host__ __device__ + explicit reference(const pointer &ptr) + : super_t(ptr) + {} + + /*! This constructor accepts a const reference to another \p reference of related type. + * After this \p reference is constructed, it shall refer to the same object as \p other. + * + * \param other A \p reference to copy from. + * \tparam OtherT The element type of the other \p reference. + * + * \note This constructor is templated primarily to allow initialization of reference + * from reference. + */ + template + __host__ __device__ + reference(const reference &other, + typename thrust::detail::enable_if_convertible< + typename reference::pointer, + pointer + >::type * = 0) + : super_t(other) + {} + + /*! Copy assignment operator copy assigns from another \p reference of related type. + * + * \param other The other \p reference to assign from. + * \return *this + * \tparam OtherT The element type of the other \p reference. + */ + template + reference &operator=(const reference &other); + + /*! Assignment operator assigns from a \p value_type. + * + * \param x The \p value_type to assign from. + * \return *this + */ + reference &operator=(const value_type &x); +}; // end reference + +/*! Exchanges the values of two objects referred to by \p reference. + * \p x The first \p reference of interest. + * \p y The second \p reference ot interest. + */ +template +__host__ __device__ +void swap(reference x, reference y); + +/*! Allocates an area of memory available to Thrust's tbb system. + * \param n Number of bytes to allocate. + * \return A tbb::pointer pointing to the beginning of the newly + * allocated memory. A null tbb::pointer is returned if + * an error occurs. + * \note The tbb::pointer returned by this function must be + * deallocated with \p tbb::free. + * \see tbb::free + * \see std::malloc + */ +inline pointer malloc(std::size_t n); + +/*! Allocates a typed area of memory available to Thrust's tbb system. + * \param n Number of elements to allocate. + * \return A tbb::pointer pointing to the beginning of the newly + * allocated memory. A null tbb::pointer is returned if + * an error occurs. + * \note The tbb::pointer returned by this function must be + * deallocated with \p tbb::free. + * \see tbb::free + * \see std::malloc + */ +template +inline pointer malloc(std::size_t n); + +/*! Deallocates an area of memory previously allocated by tbb::malloc. + * \param ptr A tbb::pointer pointing to the beginning of an area + * of memory previously allocated with tbb::malloc. + * \see tbb::malloc + * \see std::free + */ +inline void free(pointer ptr); + +// XXX upon c++11 +// template using allocator = thrust::detail::malloc_allocator >; + +/*! \p tbb::allocator is the default allocator used by the \p tbb system's containers such as + * tbb::vector if no user-specified allocator is provided. \p tbb::allocator allocates + * (deallocates) storage with \p tbb::malloc (\p tbb::free). + */ +template + struct allocator + : thrust::detail::malloc_allocator< + T, + tag, + pointer + > +{ + /*! The \p rebind metafunction provides the type of an \p allocator + * instantiated with another type. + * + * \tparam U The other type to use for instantiation. + */ + template + struct rebind + { + /*! The typedef \p other gives the type of the rebound \p allocator. + */ + typedef allocator other; + }; + + /*! No-argument constructor has no effect. + */ + __host__ __device__ + inline allocator() {} + + /*! Copy constructor has no effect. + */ + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Constructor from other \p allocator has no effect. + */ + template + __host__ __device__ + inline allocator(const allocator &) {} + + /*! Destructor has no effect. + */ + __host__ __device__ + inline ~allocator() {} +}; // end allocator + +} // end tbb + +/*! \} + */ + +} // end system + +/*! \namespace thrust::tbb + * \brief \p thrust::tbb is a top-level alias for thrust::system::tbb. + */ +namespace tbb +{ + +using thrust::system::tbb::pointer; +using thrust::system::tbb::reference; +using thrust::system::tbb::malloc; +using thrust::system::tbb::free; +using thrust::system::tbb::allocator; + +} // end tbb + +} // end thrust + +#include + diff --git a/compat/thrust/system/tbb/vector.h b/compat/thrust/system/tbb/vector.h new file mode 100644 index 0000000..1c49c3f --- /dev/null +++ b/compat/thrust/system/tbb/vector.h @@ -0,0 +1,144 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in ctbbliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/tbb/vector.h + * \brief A dynamically-sizable array of elements which reside in memory available to + * Thrust's TBB system. + */ + +#pragma once + +#include +#include +#include +#include + +namespace thrust +{ +namespace system +{ +namespace tbb +{ + +// XXX upon c++11 +// template > using vector = thrust::detail::vector_base; + +/*! \p tbb::vector is a container that supports random access to elements, + * constant time removal of elements at the end, and linear time insertion + * and removal of elements at the beginning or in the middle. The number of + * elements in a \p tbb::vector may vary dynamically; memory management is + * automatic. The elements contained in a \p tbb::vector reside in memory + * available to the \p tbb system. + * + * \tparam T The element type of the \p tbb::vector. + * \tparam Allocator The allocator type of the \p tbb::vector. Defaults to \p tbb::allocator. + * + * \see http://www.sgi.com/tech/stl/Vector.html + * \see host_vector For the documentation of the complete interface which is + * shared by \p tbb::vector + * \see device_vector + */ +template > + class vector + : public thrust::detail::vector_base +{ + /*! \cond + */ + private: + typedef thrust::detail::vector_base super_t; + /*! \endcond + */ + + public: + + /*! \cond + */ + typedef typename super_t::size_type size_type; + typedef typename super_t::value_type value_type; + /*! \endcond + */ + + /*! This constructor creates an empty \p tbb::vector. + */ + vector(); + + /*! This constructor creates a \p tbb::vector with \p n default-constructed elements. + * \param n The size of the \p tbb::vector to create. + */ + explicit vector(size_type n); + + /*! This constructor creates a \p tbb::vector with \p n copies of \p value. + * \param n The size of the \p tbb::vector to create. + * \param value An element to copy. + */ + explicit vector(size_type n, const value_type &value); + + /*! Copy constructor copies from another \p tbb::vector. + * \param x The other \p tbb::vector to copy. + */ + vector(const vector &x); + + /*! This constructor copies from another Thrust vector-like object. + * \param x The other object to copy from. + */ + template + vector(const thrust::detail::vector_base &x); + + /*! This constructor copies from a \c std::vector. + * \param x The \c std::vector to copy from. + */ + template + vector(const std::vector &x); + + /*! This constructor creates a \p tbb::vector by copying from a range. + * \param first The beginning of the range. + * \param last The end of the range. + */ + template + vector(InputIterator first, InputIterator last); + + // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns + + /*! Assignment operator assigns from a \c std::vector. + * \param x The \c std::vector to assign from. + * \return *this + */ + template + vector &operator=(const std::vector &x); + + /*! Assignment operator assigns from another Thrust vector-like object. + * \param x The other object to assign from. + * \return *this + */ + template + vector &operator=(const thrust::detail::vector_base &x); +}; // end vector + +} // end tbb +} // end system + +// alias system::tbb names at top-level +namespace tbb +{ + +using thrust::system::tbb::vector; + +} // end tbb + +} // end thrust + +#include + diff --git a/compat/thrust/system_error.h b/compat/thrust/system_error.h new file mode 100644 index 0000000..ce88fe6 --- /dev/null +++ b/compat/thrust/system_error.h @@ -0,0 +1,51 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system_error.h + * \brief System diagnostics + */ + +#pragma once + +#include + +namespace thrust +{ + +/*! \addtogroup system System Access + * \{ + */ + +/*! \namespace thrust::system + * \brief \p thrust::system is the namespace which contains functionality for manipulating + * memory specific to one of Thrust's backend systems. It also contains functionality + * for reporting error conditions originating from the operating system or other + * low-level application program interfaces such as the CUDA runtime. + * They are provided in a separate namespace for import convenience but are + * also aliased in the top-level \p thrust namespace for easy access. + */ +namespace system +{ +} // end system + +/*! \} // end system + */ + +} // end thrust + +#include +#include + diff --git a/compat/thrust/tabulate.h b/compat/thrust/tabulate.h new file mode 100644 index 0000000..c87edf0 --- /dev/null +++ b/compat/thrust/tabulate.h @@ -0,0 +1,128 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file tabulate.h + * \brief Fills a range with the tabulation of a function + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup transformations + * \{ + */ + + +/*! \p tabulate fills the range [first, last) with the value of a function applied to each + * element's index. + * + * For each iterator \c i in the range [first, last), \p tabulate performs the assignment + * *i = unary_op(i - first). + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the range. + * \param last The end of the range. + * \param unary_op The unary operation to apply. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * \tparam UnaryOperation is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p tabulate to generate the first \c n non-positive integers + * using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::tabulate(thrust::host, A, A + 10, thrust::negate()); + * // A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9} + * \endcode + * + * \see thrust::fill + * \see thrust::generate + * \see thrust::sequence + */ +template + void tabulate(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + UnaryOperation unary_op); + + +/*! \p tabulate fills the range [first, last) with the value of a function applied to each + * element's index. + * + * For each iterator \c i in the range [first, last), \p tabulate performs the assignment + * *i = unary_op(i - first). + * + * \param first The beginning of the range. + * \param last The end of the range. + * \param unary_op The unary operation to apply. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, + * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. + * \tparam UnaryOperation is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * The following code snippet demonstrates how to use \p tabulate to generate the first \c n non-positive integers: + * + * \code + * #include + * #include + * ... + * const int N = 10; + * int A[N]; + * thrust::tabulate(A, A + 10, thrust::negate()); + * // A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9} + * \endcode + * + * \see thrust::fill + * \see thrust::generate + * \see thrust::sequence + */ +template + void tabulate(ForwardIterator first, + ForwardIterator last, + UnaryOperation unary_op); + + +/*! \} // end transformations + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/transform.h b/compat/thrust/transform.h new file mode 100644 index 0000000..1ada105 --- /dev/null +++ b/compat/thrust/transform.h @@ -0,0 +1,720 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file transform.h + * \brief Transforms input ranges using a function object + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + +/*! \addtogroup transformations + * \ingroup algorithms + * \{ + */ + + +/*! This version of \p transform applies a unary function to each element + * of an input sequence and stores the result in the corresponding + * position in an output sequence. Specifically, for each iterator + * i in the range [\p first, \p last) the operation + * op(*i) is performed and the result is assigned to *o, + * where o is the corresponding output iterator in the range + * [\p result, \p result + (\p last - \p first) ). The input and + * output sequences may coincide, resulting in an in-place transformation. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. + * + * The following code snippet demonstrates how to use \p transform to negate a range in-place + * using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * thrust::negate op; + * + * thrust::transform(thrust::host, data, data + 10, data, op); // in-place transformation + * + * // data is now {5, 0, -2, 3, -2, -4, 0, 1, -2, -8}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/transform.html + */ +template + OutputIterator transform(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + OutputIterator result, + UnaryFunction op); + + +/*! This version of \p transform applies a unary function to each element + * of an input sequence and stores the result in the corresponding + * position in an output sequence. Specifically, for each iterator + * i in the range [\p first, \p last) the operation + * op(*i) is performed and the result is assigned to *o, + * where o is the corresponding output iterator in the range + * [\p result, \p result + (\p last - \p first) ). The input and + * output sequences may coincide, resulting in an in-place transformation. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. + * + * The following code snippet demonstrates how to use \p transform + * + * \code + * #include + * #include + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * thrust::negate op; + * + * thrust::transform(data, data + 10, data, op); // in-place transformation + * + * // data is now {5, 0, -2, 3, -2, -4, 0, 1, -2, -8}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/transform.html + */ +template + OutputIterator transform(InputIterator first, InputIterator last, + OutputIterator result, + UnaryFunction op); + + +/*! This version of \p transform applies a binary function to each pair + * of elements from two input sequences and stores the result in the + * corresponding position in an output sequence. Specifically, for + * each iterator i in the range [\p first1, \p last1) and + * j = first + (i - first1) in the range [\p first2, \p last2) + * the operation op(*i,*j) is performed and the result is + * assigned to *o, where o is the corresponding + * output iterator in the range [\p result, \p result + (\p last - \p first) ). + * The input and output sequences may coincide, resulting in an + * in-place transformation. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input sequence. + * \param last1 The end of the first input sequence. + * \param first2 The beginning of the second input sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam BinaryFunction is a model of Binary Function + * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * + * The following code snippet demonstrates how to use \p transform to compute the sum of two + * ranges using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int input1[6] = {-5, 0, 2, 3, 2, 4}; + * int input2[6] = { 3, 6, -2, 1, 2, 3}; + * int output[6]; + * + * thrust::plus op; + * + * thrust::transform(thrust::host, input1, input1 + 6, input2, output, op); + * + * // output is now {-2, 6, 0, 4, 4, 7}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/transform.html + */ +template + OutputIterator transform(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryFunction op); + + +/*! This version of \p transform applies a binary function to each pair + * of elements from two input sequences and stores the result in the + * corresponding position in an output sequence. Specifically, for + * each iterator i in the range [\p first1, \p last1) and + * j = first + (i - first1) in the range [\p first2, \p last2) + * the operation op(*i,*j) is performed and the result is + * assigned to *o, where o is the corresponding + * output iterator in the range [\p result, \p result + (\p last - \p first) ). + * The input and output sequences may coincide, resulting in an + * in-place transformation. + * + * \param first1 The beginning of the first input sequence. + * \param last1 The end of the first input sequence. + * \param first2 The beginning of the second input sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam BinaryFunction is a model of Binary Function + * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * + * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * + * The following code snippet demonstrates how to use \p transform + * + * \code + * #include + * #include + * + * int input1[6] = {-5, 0, 2, 3, 2, 4}; + * int input2[6] = { 3, 6, -2, 1, 2, 3}; + * int output[6]; + * + * thrust::plus op; + * + * thrust::transform(input1, input1 + 6, input2, output, op); + * + * // output is now {-2, 6, 0, 4, 4, 7}; + * \endcode + * + * \see http://www.sgi.com/tech/stl/transform.html + */ +template + OutputIterator transform(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryFunction op); + + +/*! This version of \p transform_if conditionally applies a unary function + * to each element of an input sequence and stores the result in the corresponding + * position in an output sequence if the corresponding position in the input sequence + * satifies a predicate. Otherwise, the corresponding position in the + * output sequence is not modified. + * + * Specifically, for each iterator i in the range [first, last) the + * predicate pred(*i) is evaluated. If this predicate + * evaluates to \c true, the result of op(*i) is assigned to *o, + * where o is the corresponding output iterator in the range + * [result, result + (last - first) ). Otherwise, op(*i) is + * not evaluated and no assignment occurs. The input and output sequences may coincide, + * resulting in an in-place transformation. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \param pred The predicate operation. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \c InputIterator's \c value_type is convertible to \c Predicate's \c argument_type, + * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * \tparam Predicate is a model of Predicate. + * + * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. + * + * The following code snippet demonstrates how to use \p transform_if to negate the odd-valued + * elements of a range using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * struct is_odd + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x % 2; + * } + * }; + * + * thrust::negate op; + * thrust::identity identity; + * + * // negate odd elements + * thrust::transform_if(thrust::host, data, data + 10, data, op, is_odd()); // in-place transformation + * + * // data is now {5, 0, 2, 3, 2, 4, 0, 1, 2, 8}; + * \endcode + * + * \see thrust::transform + */ +template + ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, + InputIterator first, InputIterator last, + ForwardIterator result, + UnaryFunction op, + Predicate pred); + + +/*! This version of \p transform_if conditionally applies a unary function + * to each element of an input sequence and stores the result in the corresponding + * position in an output sequence if the corresponding position in the input sequence + * satifies a predicate. Otherwise, the corresponding position in the + * output sequence is not modified. + * + * Specifically, for each iterator i in the range [first, last) the + * predicate pred(*i) is evaluated. If this predicate + * evaluates to \c true, the result of op(*i) is assigned to *o, + * where o is the corresponding output iterator in the range + * [result, result + (last - first) ). Otherwise, op(*i) is + * not evaluated and no assignment occurs. The input and output sequences may coincide, + * resulting in an in-place transformation. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \param pred The predicate operation. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator, + * and \c InputIterator's \c value_type is convertible to \c Predicate's \c argument_type, + * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * \tparam Predicate is a model of Predicate. + * + * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. + * + * The following code snippet demonstrates how to use \p transform_if: + * + * \code + * #include + * #include + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * + * struct is_odd + * { + * __host__ __device__ + * bool operator()(int x) + * { + * return x % 2; + * } + * }; + * + * thrust::negate op; + * thrust::identity identity; + * + * // negate odd elements + * thrust::transform_if(data, data + 10, data, op, is_odd()); // in-place transformation + * + * // data is now {5, 0, 2, 3, 2, 4, 0, 1, 2, 8}; + * \endcode + * + * \see thrust::transform + */ +template + ForwardIterator transform_if(InputIterator first, InputIterator last, + ForwardIterator result, + UnaryFunction op, + Predicate pred); + + +/*! This version of \p transform_if conditionally applies a unary function + * to each element of an input sequence and stores the result in the corresponding + * position in an output sequence if the corresponding position in a stencil sequence + * satisfies a predicate. Otherwise, the corresponding position in the + * output sequence is not modified. + * + * Specifically, for each iterator i in the range [first, last) the + * predicate pred(*s) is evaluated, where s is the corresponding input + * iterator in the range [stencil, stencil + (last - first) ). If this predicate + * evaluates to \c true, the result of op(*i) is assigned to *o, + * where o is the corresponding output iterator in the range + * [result, result + (last - first) ). Otherwise, op(*i) is + * not evaluated and no assignment occurs. The input and output sequences may coincide, + * resulting in an in-place transformation. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \param pred The predicate operation. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * and \c InputIterator1's \c value_type is convertible to \c UnaryFunction's \c argument_type. + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c Predicate's \c argument_type. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * \tparam Predicate is a model of Predicate. + * + * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. + * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)) otherwise. + * + * The following code snippet demonstrates how to use \p transform_if using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * int stencil[10] = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * + * thrust::negate op; + * thrust::identity identity; + * + * thrust::transform_if(thrust::host, data, data + 10, stencil, data, op, identity); // in-place transformation + * + * // data is now {5, 0, -2, -3, -2, 4, 0, -1, -2, 8}; + * \endcode + * + * \see thrust::transform + */ +template + ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + ForwardIterator result, + UnaryFunction op, + Predicate pred); + + +/*! This version of \p transform_if conditionally applies a unary function + * to each element of an input sequence and stores the result in the corresponding + * position in an output sequence if the corresponding position in a stencil sequence + * satisfies a predicate. Otherwise, the corresponding position in the + * output sequence is not modified. + * + * Specifically, for each iterator i in the range [first, last) the + * predicate pred(*s) is evaluated, where s is the corresponding input + * iterator in the range [stencil, stencil + (last - first) ). If this predicate + * evaluates to \c true, the result of op(*i) is assigned to *o, + * where o is the corresponding output iterator in the range + * [result, result + (last - first) ). Otherwise, op(*i) is + * not evaluated and no assignment occurs. The input and output sequences may coincide, + * resulting in an in-place transformation. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the output sequence. + * \param op The tranformation operation. + * \param pred The predicate operation. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * and \c InputIterator1's \c value_type is convertible to \c UnaryFunction's \c argument_type. + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c Predicate's \c argument_type. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * \tparam Predicate is a model of Predicate. + * + * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. + * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)) otherwise. + * + * The following code snippet demonstrates how to use \p transform_if: + * + * \code + * #include + * #include + * + * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; + * int stencil[10] = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + * + * thrust::negate op; + * thrust::identity identity; + * + * thrust::transform_if(data, data + 10, stencil, data, op, identity); // in-place transformation + * + * // data is now {5, 0, -2, -3, -2, 4, 0, -1, -2, 8}; + * \endcode + * + * \see thrust::transform + */ +template + ForwardIterator transform_if(InputIterator1 first, InputIterator1 last, + InputIterator2 stencil, + ForwardIterator result, + UnaryFunction op, + Predicate pred); + + +/*! This version of \p transform_if conditionally applies a binary function + * to each pair of elements from two input sequences and stores the result in the corresponding + * position in an output sequence if the corresponding position in a stencil sequence + * satifies a predicate. Otherwise, the corresponding position in the + * output sequence is not modified. + * + * Specifically, for each iterator i in the range [first1, last1) and + * j = first2 + (i - first1) in the range [first2, first2 + (last1 - first1) ), + * the predicate pred(*s) is evaluated, where s is the corresponding input + * iterator in the range [stencil, stencil + (last1 - first1) ). If this predicate + * evaluates to \c true, the result of binary_op(*i,*j) is assigned to *o, + * where o is the corresponding output iterator in the range + * [result, result + (last1 - first1) ). Otherwise, binary_op(*i,*j) is + * not evaluated and no assignment occurs. The input and output sequences may coincide, + * resulting in an in-place transformation. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first1 The beginning of the first input sequence. + * \param last1 The end of the first input sequence. + * \param first2 The beginning of the second input sequence. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the output sequence. + * \param binary_op The transformation operation. + * \param pred The predicate operation. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator + * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam BinaryFunction is a model of Binary Function + * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * \tparam Predicate is a model of Predicate. + * + * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * + * The following code snippet demonstrates how to use \p transform_if using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * ... + * + * int input1[6] = {-5, 0, 2, 3, 2, 4}; + * int input2[6] = { 3, 6, -2, 1, 2, 3}; + * int stencil[8] = { 1, 0, 1, 0, 1, 0}; + * int output[6]; + * + * thrust::plus op; + * thrust::identity identity; + * + * thrust::transform_if(thrust::host, input1, input1 + 6, input2, stencil, output, op, identity); + * + * // output is now {-2, 0, 0, 3, 4, 4}; + * \endcode + * + * \see thrust::transform + */ +template + ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + InputIterator3 stencil, + ForwardIterator result, + BinaryFunction binary_op, + Predicate pred); + + +/*! This version of \p transform_if conditionally applies a binary function + * to each pair of elements from two input sequences and stores the result in the corresponding + * position in an output sequence if the corresponding position in a stencil sequence + * satifies a predicate. Otherwise, the corresponding position in the + * output sequence is not modified. + * + * Specifically, for each iterator i in the range [first1, last1) and + * j = first2 + (i - first1) in the range [first2, first2 + (last1 - first1) ), + * the predicate pred(*s) is evaluated, where s is the corresponding input + * iterator in the range [stencil, stencil + (last1 - first1) ). If this predicate + * evaluates to \c true, the result of binary_op(*i,*j) is assigned to *o, + * where o is the corresponding output iterator in the range + * [result, result + (last1 - first1) ). Otherwise, binary_op(*i,*j) is + * not evaluated and no assignment occurs. The input and output sequences may coincide, + * resulting in an in-place transformation. + * + * \param first1 The beginning of the first input sequence. + * \param last1 The end of the first input sequence. + * \param first2 The beginning of the second input sequence. + * \param stencil The beginning of the stencil sequence. + * \param result The beginning of the output sequence. + * \param binary_op The transformation operation. + * \param pred The predicate operation. + * \return The end of the output sequence. + * + * \tparam InputIterator1 is a model of Input Iterator + * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. + * \tparam InputIterator2 is a model of Input Iterator + * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. + * \tparam ForwardIterator is a model of Forward Iterator. + * \tparam BinaryFunction is a model of Binary Function + * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. + * \tparam Predicate is a model of Predicate. + * + * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. + * + * The following code snippet demonstrates how to use \p transform_if: + * + * \code + * #include + * #include + * + * int input1[6] = {-5, 0, 2, 3, 2, 4}; + * int input2[6] = { 3, 6, -2, 1, 2, 3}; + * int stencil[8] = { 1, 0, 1, 0, 1, 0}; + * int output[6]; + * + * thrust::plus op; + * thrust::identity identity; + * + * thrust::transform_if(input1, input1 + 6, input2, stencil, output, op, identity); + * + * // output is now {-2, 0, 0, 3, 4, 4}; + * \endcode + * + * \see thrust::transform + */ +template + ForwardIterator transform_if(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, + InputIterator3 stencil, + ForwardIterator result, + BinaryFunction binary_op, + Predicate pred); + + +/*! \} // end transformations + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/transform_reduce.h b/compat/thrust/transform_reduce.h new file mode 100644 index 0000000..3ef5efd --- /dev/null +++ b/compat/thrust/transform_reduce.h @@ -0,0 +1,197 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file transform_reduce.h + * \brief Fused transform / reduction + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup reductions + * \{ + * \addtogroup transformed_reductions Transformed Reductions + * \ingroup reductions + * \{ + */ + + +/*! \p transform_reduce fuses the \p transform and \p reduce operations. + * \p transform_reduce is equivalent to performing a transformation defined by + * \p unary_op into a temporary sequence and then performing \p reduce on the + * transformed sequence. In most cases, fusing these two operations together is + * more efficient, since fewer memory reads and writes are required. + * + * \p transform_reduce performs a reduction on the transformation of the + * sequence [first, last) according to \p unary_op. Specifically, + * \p unary_op is applied to each element of the sequence and then the result + * is reduced to a single value with \p binary_op using the initial value + * \p init. Note that the transformation \p unary_op is not applied to + * the initial value \p init. The order of reduction is not specified, + * so \p binary_op must be both commutative and associative. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param unary_op The function to apply to each element of the input sequence. + * \param init The result is initialized to this value. + * \param binary_op The reduction operation. + * \return The result of the transformed reduction. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. + * \tparam UnaryFunction is a model of Unary Function, + * and \p UnaryFunction's \c result_type is convertible to \c OutputType. + * \tparam OutputType is a model of Assignable, + * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. + * \tparam BinaryFunction is a model of Binary Function, + * and \p BinaryFunction's \c result_type is convertible to \p OutputType. + * + * The following code snippet demonstrates how to use \p transform_reduce + * to compute the maximum value of the absolute value of the elements + * of a range using the \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * template + * struct absolute_value : public unary_function + * { + * __host__ __device__ T operator()(const T &x) const + * { + * return x < T(0) ? -x : x; + * } + * }; + * + * ... + * + * int data[6] = {-1, 0, -2, -2, 1, -3}; + * int result = thrust::transform_reduce(thrust::host, + * data, data + 6, + * absolute_value(), + * 0, + * thrust::maximum()); + * // result == 3 + * \endcode + * + * \see \c transform + * \see \c reduce + */ +template + OutputType transform_reduce(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + UnaryFunction unary_op, + OutputType init, + BinaryFunction binary_op); + + +/*! \p transform_reduce fuses the \p transform and \p reduce operations. + * \p transform_reduce is equivalent to performing a transformation defined by + * \p unary_op into a temporary sequence and then performing \p reduce on the + * transformed sequence. In most cases, fusing these two operations together is + * more efficient, since fewer memory reads and writes are required. + * + * \p transform_reduce performs a reduction on the transformation of the + * sequence [first, last) according to \p unary_op. Specifically, + * \p unary_op is applied to each element of the sequence and then the result + * is reduced to a single value with \p binary_op using the initial value + * \p init. Note that the transformation \p unary_op is not applied to + * the initial value \p init. The order of reduction is not specified, + * so \p binary_op must be both commutative and associative. + * + * \param first The beginning of the sequence. + * \param last The end of the sequence. + * \param unary_op The function to apply to each element of the input sequence. + * \param init The result is initialized to this value. + * \param binary_op The reduction operation. + * \return The result of the transformed reduction. + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. + * \tparam UnaryFunction is a model of Unary Function, + * and \p UnaryFunction's \c result_type is convertible to \c OutputType. + * \tparam OutputType is a model of Assignable, + * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. + * \tparam BinaryFunction is a model of Binary Function, + * and \p BinaryFunction's \c result_type is convertible to \p OutputType. + * + * The following code snippet demonstrates how to use \p transform_reduce + * to compute the maximum value of the absolute value of the elements + * of a range. + * + * \code + * #include + * #include + * + * template + * struct absolute_value : public unary_function + * { + * __host__ __device__ T operator()(const T &x) const + * { + * return x < T(0) ? -x : x; + * } + * }; + * + * ... + * + * int data[6] = {-1, 0, -2, -2, 1, -3}; + * int result = thrust::transform_reduce(data, data + 6, + * absolute_value(), + * 0, + * thrust::maximum()); + * // result == 3 + * \endcode + * + * \see \c transform + * \see \c reduce + */ +template + OutputType transform_reduce(InputIterator first, + InputIterator last, + UnaryFunction unary_op, + OutputType init, + BinaryFunction binary_op); + + +/*! \} // end transformed_reductions + * \} // end reductions + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/transform_scan.h b/compat/thrust/transform_scan.h new file mode 100644 index 0000000..e9943e4 --- /dev/null +++ b/compat/thrust/transform_scan.h @@ -0,0 +1,322 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file transform_scan.h + * \brief Fused transform / prefix-sum + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup algorithms + */ + +/*! \addtogroup prefixsums Prefix Sums + * \ingroup algorithms + * \{ + */ + +/*! \addtogroup transformed_prefixsums Transformed Prefix Sums + * \ingroup prefixsums + * \{ + */ + + +/*! \p transform_inclusive_scan fuses the \p transform and \p inclusive_scan + * operations. \p transform_inclusive_scan is equivalent to performing a + * tranformation defined by \p unary_op into a temporary sequence and then + * performing an \p inclusive_scan on the tranformed sequence. In most + * cases, fusing these two operations together is more efficient, since + * fewer memory reads and writes are required. In \p transform_inclusive_scan, + * unary_op(\*first) is assigned to \*result and the result + * of binary_op(unary_op(\*first), unary_op(\*(first + 1))) is + * assigned to \*(result + 1), and so on. The transform scan + * operation is permitted to be in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param unary_op The function used to tranform the input sequence. + * \param binary_op The associatve operator used to 'sum' transformed values. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type + * is convertable to \c OutputIterator's \c value_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p transform_inclusive_scan using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::negate unary_op; + * thrust::plus binary_op; + * + * thrust::transform_inclusive_scan(thrust::host, data, data + 6, data, unary_op, binary_op); // in-place scan + * + * // data is now {-1, -1, -3, -5, -6, -9} + * \endcode + * + * \see \p transform + * \see \p inclusive_scan + * + */ +template + OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + AssociativeOperator binary_op); + + +/*! \p transform_inclusive_scan fuses the \p transform and \p inclusive_scan + * operations. \p transform_inclusive_scan is equivalent to performing a + * tranformation defined by \p unary_op into a temporary sequence and then + * performing an \p inclusive_scan on the tranformed sequence. In most + * cases, fusing these two operations together is more efficient, since + * fewer memory reads and writes are required. In \p transform_inclusive_scan, + * unary_op(\*first) is assigned to \*result and the result + * of binary_op(unary_op(\*first), unary_op(\*(first + 1))) is + * assigned to \*(result + 1), and so on. The transform scan + * operation is permitted to be in-place. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param unary_op The function used to tranform the input sequence. + * \param binary_op The associatve operator used to 'sum' transformed values. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type + * is convertable to \c OutputIterator's \c value_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p transform_inclusive_scan + * + * \code + * #include + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::negate unary_op; + * thrust::plus binary_op; + * + * thrust::transform_inclusive_scan(data, data + 6, data, unary_op, binary_op); // in-place scan + * + * // data is now {-1, -1, -3, -5, -6, -9} + * \endcode + * + * \see \p transform + * \see \p inclusive_scan + * + */ +template + OutputIterator transform_inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + AssociativeOperator binary_op); + + +/*! \p transform_exclusive_scan fuses the \p transform and \p exclusive_scan + * operations. \p transform_exclusive_scan is equivalent to performing a + * tranformation defined by \p unary_op into a temporary sequence and then + * performing an \p exclusive_scan on the tranformed sequence. In most + * cases, fusing these two operations together is more efficient, since + * fewer memory reads and writes are required. In + * \p transform_exclusive_scan, \p init is assigned to \*result + * and the result of binary_op(init, unary_op(\*first)) is assigned + * to \*(result + 1), and so on. The transform scan operation is + * permitted to be in-place. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param unary_op The function used to tranform the input sequence. + * \param init The initial value of the \p exclusive_scan + * \param binary_op The associatve operator used to 'sum' transformed values. + * \return The end of the output sequence. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type + * is convertable to \c OutputIterator's \c value_type. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p transform_exclusive_scan using the + * \p thrust::host execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::negate unary_op; + * thrust::plus binary_op; + * + * thrust::transform_exclusive_scan(thrust::host, data, data + 6, data, unary_op, 4, binary_op); // in-place scan + * + * // data is now {4, 3, 3, 1, -1, -2} + * \endcode + * + * \see \p transform + * \see \p exclusive_scan + * + */ +template + OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + T init, + AssociativeOperator binary_op); + + +/*! \p transform_exclusive_scan fuses the \p transform and \p exclusive_scan + * operations. \p transform_exclusive_scan is equivalent to performing a + * tranformation defined by \p unary_op into a temporary sequence and then + * performing an \p exclusive_scan on the tranformed sequence. In most + * cases, fusing these two operations together is more efficient, since + * fewer memory reads and writes are required. In + * \p transform_exclusive_scan, \p init is assigned to \*result + * and the result of binary_op(init, unary_op(\*first)) is assigned + * to \*(result + 1), and so on. The transform scan operation is + * permitted to be in-place. + * + * \param first The beginning of the input sequence. + * \param last The end of the input sequence. + * \param result The beginning of the output sequence. + * \param unary_op The function used to tranform the input sequence. + * \param init The initial value of the \p exclusive_scan + * \param binary_op The associatve operator used to 'sum' transformed values. + * \return The end of the output sequence. + * + * \tparam InputIterator is a model of Input Iterator + * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. + * \tparam OutputIterator is a model of Output Iterator. + * \tparam UnaryFunction is a model of Unary Function + * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type + * is convertable to \c OutputIterator's \c value_type. + * \tparam T is convertible to \c OutputIterator's \c value_type. + * \tparam AssociativeOperator is a model of Binary Function + * and \c AssociativeOperator's \c result_type is + * convertible to \c OutputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p transform_exclusive_scan + * + * \code + * #include + * + * int data[6] = {1, 0, 2, 2, 1, 3}; + * + * thrust::negate unary_op; + * thrust::plus binary_op; + * + * thrust::transform_exclusive_scan(data, data + 6, data, unary_op, 4, binary_op); // in-place scan + * + * // data is now {4, 3, 3, 1, -1, -2} + * \endcode + * + * \see \p transform + * \see \p exclusive_scan + * + */ +template + OutputIterator transform_exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction unary_op, + T init, + AssociativeOperator binary_op); + + +/*! \} // end transformed_prefixsums + */ + + +/*! \} // end prefixsums + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/tuple.h b/compat/thrust/tuple.h new file mode 100644 index 0000000..3961d98 --- /dev/null +++ b/compat/thrust/tuple.h @@ -0,0 +1,583 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file tuple.h + * \brief A type encapsulating a heterogeneous collection of elements + */ + +/* + * Copyright (C) 1999, 2000 Jaakko Järvi (jaakko.jarvi@cs.utu.fi) + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying NOTICE file for the complete license) + * + * For more information, see http://www.boost.org + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + +/*! \addtogroup utility + * \{ + */ + +/*! \addtogroup tuple + * \{ + */ + +/*! \cond + */ + +struct null_type; + +/*! \endcond + */ + +/*! This metafunction returns the type of a + * \p tuple's Nth element. + * + * \tparam N This parameter selects the element of interest. + * \tparam T A \c tuple type of interest. + * + * \see pair + * \see tuple + */ +template + struct tuple_element +{ + private: + typedef typename T::tail_type Next; + + public: + /*! The result of this metafunction is returned in \c type. + */ + typedef typename tuple_element::type type; +}; // end tuple_element + +/*! This metafunction returns the number of elements + * of a \p tuple type of interest. + * + * \tparam T A \c tuple type of interest. + * + * \see pair + * \see tuple + */ +template + struct tuple_size +{ + /*! The result of this metafunction is returned in \c value. + */ + static const int value = 1 + tuple_size::value; +}; // end tuple_size + +// get function for non-const cons-lists, returns a reference to the element + +/*! The \p get function returns a reference to a \p tuple element of + * interest. + * + * \param t A reference to a \p tuple of interest. + * \return A reference to \p t's Nth element. + * + * \tparam N The index of the element of interest. + * + * The following code snippet demonstrates how to use \p get to print + * the value of a \p tuple element. + * + * \code + * #include + * #include + * ... + * thrust::tuple t(13, "thrust"); + * + * std::cout << "The 1st value of t is " << thrust::get<1>(t) << std::endl; + * \endcode + * + * \see pair + * \see tuple + */ +template +__host__ __device__ +inline typename access_traits< + typename tuple_element >::type + >::non_const_type +get(detail::cons& t); + + +/*! The \p get function returns a \c const reference to a \p tuple element of + * interest. + * + * \param t A reference to a \p tuple of interest. + * \return A \c const reference to \p t's Nth element. + * + * \tparam N The index of the element of interest. + * + * The following code snippet demonstrates how to use \p get to print + * the value of a \p tuple element. + * + * \code + * #include + * #include + * ... + * thrust::tuple t(13, "thrust"); + * + * std::cout << "The 1st value of t is " << thrust::get<1>(t) << std::endl; + * \endcode + * + * \see pair + * \see tuple + */ +template +__host__ __device__ +inline typename access_traits< + typename tuple_element >::type + >::const_type +get(const detail::cons& t); + + + +/*! \p tuple is a class template that can be instantiated with up to ten arguments. + * Each template argument specifies the type of element in the \p tuple. + * Consequently, tuples are heterogeneous, fixed-size collections of values. An + * instantiation of \p tuple with two arguments is similar to an instantiation + * of \p pair with the same two arguments. Individual elements of a \p tuple may + * be accessed with the \p get function. + * + * \tparam TN The type of the N \c tuple element. Thrust's \p tuple + * type currently supports up to ten elements. + * + * The following code snippet demonstrates how to create a new \p tuple object + * and inspect and modify the value of its elements. + * + * \code + * #include + * #include + * ... + * // create a tuple containing an int, a float, and a string + * thrust::tuple t(13, 0.1f, "thrust"); + * + * // individual members are accessed with the free function get + * std::cout << "The first element's value is " << thrust::get<0>(t) << std::endl; + * + * // or the member function get + * std::cout << "The second element's value is " << t.get<1>() << std::endl; + * + * // we can also modify elements with the same function + * thrust::get<0>(t) += 10; + * \endcode + * + * \see pair + * \see get + * \see make_tuple + * \see tuple_element + * \see tuple_size + * \see tie + */ +template + class tuple : + public detail::map_tuple_to_cons::type +{ + /*! \cond + */ + + private: + typedef typename detail::map_tuple_to_cons::type inherited; + + /*! \endcond + */ + + public: + /*! \p tuple's no-argument constructor initializes each element. + */ + inline __host__ __device__ + tuple(void) {} + + /*! \p tuple's one-argument constructor copy constructs the first element from the given parameter + * and intializes all other elements. + * \param t0 The value to assign to this \p tuple's first element. + */ + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0) + : inherited(t0, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + /*! \p tuple's one-argument constructor copy constructs the first two elements from the given parameters + * and intializes all other elements. + * \param t0 The value to assign to this \p tuple's first element. + * \param t1 The value to assign to this \p tuple's second element. + * \note \p tuple's constructor has ten variants of this form, the rest of which are ommitted here for brevity. + */ + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1) + : inherited(t0, t1, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + /*! \cond + */ + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2) + : inherited(t0, t1, t2, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3) + : inherited(t0, t1, t2, t3, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4) + : inherited(t0, t1, t2, t3, t4, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5) + : inherited(t0, t1, t2, t3, t4, t5, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6) + : inherited(t0, t1, t2, t3, t4, t5, t6, + static_cast(null_type()), + static_cast(null_type()), + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6, + typename access_traits::parameter_type t7) + : inherited(t0, t1, t2, t3, t4, t5, t6, t7, + static_cast(null_type()), + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6, + typename access_traits::parameter_type t7, + typename access_traits::parameter_type t8) + : inherited(t0, t1, t2, t3, t4, t5, t6, t7, t8, + static_cast(null_type())) {} + + inline __host__ __device__ + tuple(typename access_traits::parameter_type t0, + typename access_traits::parameter_type t1, + typename access_traits::parameter_type t2, + typename access_traits::parameter_type t3, + typename access_traits::parameter_type t4, + typename access_traits::parameter_type t5, + typename access_traits::parameter_type t6, + typename access_traits::parameter_type t7, + typename access_traits::parameter_type t8, + typename access_traits::parameter_type t9) + : inherited(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) {} + + + template + inline __host__ __device__ + tuple(const detail::cons& p) : inherited(p) {} + + template + inline __host__ __device__ + tuple& operator=(const detail::cons& k) + { + inherited::operator=(k); + return *this; + } + + /*! \endcond + */ + + /*! This assignment operator allows assigning the first two elements of this \p tuple from a \p pair. + * \param k A \p pair to assign from. + */ + template + __host__ __device__ inline + tuple& operator=(const thrust::pair& k) { + //BOOST_STATIC_ASSERT(length::value == 2);// check_length = 2 + this->head = k.first; + this->tail.head = k.second; + return *this; + } + + /*! \p swap swaps the elements of two tuples. + * + * \param t The other tuple with which to swap. + */ + inline __host__ __device__ + void swap(tuple &t) + { + inherited::swap(t); + } +}; + +/*! \cond + */ + +template <> +class tuple : + public null_type +{ +public: + typedef null_type inherited; +}; + +/*! \endcond + */ + + +/*! This version of \p make_tuple creates a new \c tuple object from a + * single object. + * + * \param t0 The object to copy from. + * \return A \p tuple object with a single member which is a copy of \p t0. + */ +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0); + +/*! This version of \p make_tuple creates a new \c tuple object from two + * objects. + * + * \param t0 The first object to copy from. + * \param t1 The second object to copy from. + * \return A \p tuple object with two members which are copies of \p t0 + * and \p t1. + * + * \note \p make_tuple has ten variants, the rest of which are omitted here + * for brevity. + */ +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1); + +/*! This version of \p tie creates a new \c tuple whose single element is + * a reference which refers to this function's argument. + * + * \param t0 The object to reference. + * \return A \p tuple object with one member which is a reference to \p t0. + */ +template +__host__ __device__ inline +tuple tie(T0& t0); + +/*! This version of \p tie creates a new \c tuple of references object which + * refers to this function's arguments. + * + * \param t0 The first object to reference. + * \param t1 The second object to reference. + * \return A \p tuple object with two members which are references to \p t0 + * and \p t1. + * + * \note \p tie has ten variants, the rest of which are omitted here for + * brevity. + */ +template +__host__ __device__ inline +tuple tie(T0& t0, T1& t1); + +/*! \p swap swaps the contents of two tuples. + * + * \param x The first \p tuple to swap. + * \param y The second \p tuple to swap. + */ +template< + typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, + typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9 +> +inline __host__ __device__ +void swap(tuple &x, + tuple &y); + + + +/*! \cond + */ + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8); + +template +__host__ __device__ inline + typename detail::make_tuple_mapper::type + make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8); + +template +__host__ __device__ inline +tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9); + + +__host__ __device__ inline +bool operator==(const null_type&, const null_type&); + +__host__ __device__ inline +bool operator>=(const null_type&, const null_type&); + +__host__ __device__ inline +bool operator<=(const null_type&, const null_type&); + +__host__ __device__ inline +bool operator!=(const null_type&, const null_type&); + +__host__ __device__ inline +bool operator<(const null_type&, const null_type&); + +__host__ __device__ inline +bool operator>(const null_type&, const null_type&); + +/*! \endcond + */ + +/*! \} // tuple + */ + +/*! \} // utility + */ + +} // end thrust + diff --git a/compat/thrust/uninitialized_copy.h b/compat/thrust/uninitialized_copy.h new file mode 100644 index 0000000..77b673c --- /dev/null +++ b/compat/thrust/uninitialized_copy.h @@ -0,0 +1,301 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file uninitialized_copy.h + * \brief Copy construction into a range of uninitialized elements from a source range + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup copying + * \{ + */ + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a constructor. + * Occasionally, however, it is useful to separate those two operations. + * If each iterator in the range [result, result + (last - first)) points + * to uninitialized memory, then \p uninitialized_copy creates a copy of + * [first, last) in that range. That is, for each iterator \c i in + * the input, \p uninitialized_copy creates a copy of \c *i in the location pointed + * to by the corresponding iterator in the output range by \p ForwardIterator's + * \c value_type's copy constructor with *i as its argument. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the input range to copy from. + * \param last The last element of the input range to copy from. + * \param result The first element of the output range to copy to. + * \return An iterator pointing to the last element of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes + * a single argument whose type is \p InputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p uninitialized_copy to initialize + * a range of uninitialized memory using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_vector input(N, val); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_copy(thrust::device, input.begin(), input.end(), array); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_copy.html + * \see \c copy + * \see \c uninitialized_fill + * \see \c device_new + * \see \c device_malloc + */ +template + ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + ForwardIterator result); + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a constructor. + * Occasionally, however, it is useful to separate those two operations. + * If each iterator in the range [result, result + (last - first)) points + * to uninitialized memory, then \p uninitialized_copy creates a copy of + * [first, last) in that range. That is, for each iterator \c i in + * the input, \p uninitialized_copy creates a copy of \c *i in the location pointed + * to by the corresponding iterator in the output range by \p ForwardIterator's + * \c value_type's copy constructor with *i as its argument. + * + * \param first The first element of the input range to copy from. + * \param last The last element of the input range to copy from. + * \param result The first element of the output range to copy to. + * \return An iterator pointing to the last element of the output range. + * + * \tparam InputIterator is a model of Input Iterator. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes + * a single argument whose type is \p InputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p uninitialized_copy to initialize + * a range of uninitialized memory. + * + * \code + * #include + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_vector input(N, val); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_copy(input.begin(), input.end(), array); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_copy.html + * \see \c copy + * \see \c uninitialized_fill + * \see \c device_new + * \see \c device_malloc + */ +template + ForwardIterator uninitialized_copy(InputIterator first, + InputIterator last, + ForwardIterator result); + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a constructor. + * Occasionally, however, it is useful to separate those two operations. + * If each iterator in the range [result, result + n) points + * to uninitialized memory, then \p uninitialized_copy_n creates a copy of + * [first, first + n) in that range. That is, for each iterator \c i in + * the input, \p uninitialized_copy_n creates a copy of \c *i in the location pointed + * to by the corresponding iterator in the output range by \p InputIterator's + * \c value_type's copy constructor with *i as its argument. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the input range to copy from. + * \param n The number of elements to copy. + * \param result The first element of the output range to copy to. + * \return An iterator pointing to the last element of the output range. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator. + * \tparam Size is an integral type. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes + * a single argument whose type is \p InputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, first + n) and the range [result, result + n) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p uninitialized_copy to initialize + * a range of uninitialized memory using the \p thrust::device execution policy for + * parallelization: + * + * \code + * #include + * #include + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_vector input(N, val); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_copy_n(thrust::device, input.begin(), N, array); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_copy.html + * \see \c uninitialized_copy + * \see \c copy + * \see \c uninitialized_fill + * \see \c device_new + * \see \c device_malloc + */ +template + ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base &exec, + InputIterator first, + Size n, + ForwardIterator result); + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a constructor. + * Occasionally, however, it is useful to separate those two operations. + * If each iterator in the range [result, result + n) points + * to uninitialized memory, then \p uninitialized_copy_n creates a copy of + * [first, first + n) in that range. That is, for each iterator \c i in + * the input, \p uninitialized_copy_n creates a copy of \c *i in the location pointed + * to by the corresponding iterator in the output range by \p InputIterator's + * \c value_type's copy constructor with *i as its argument. + * + * \param first The first element of the input range to copy from. + * \param n The number of elements to copy. + * \param result The first element of the output range to copy to. + * \return An iterator pointing to the last element of the output range. + * + * \tparam InputIterator is a model of Input Iterator. + * \tparam Size is an integral type. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes + * a single argument whose type is \p InputIterator's \c value_type. + * + * \pre \p first may equal \p result, but the range [first, first + n) and the range [result, result + n) shall not overlap otherwise. + * + * The following code snippet demonstrates how to use \p uninitialized_copy to initialize + * a range of uninitialized memory. + * + * \code + * #include + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_vector input(N, val); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_copy_n(input.begin(), N, array); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_copy.html + * \see \c uninitialized_copy + * \see \c copy + * \see \c uninitialized_fill + * \see \c device_new + * \see \c device_malloc + */ +template + ForwardIterator uninitialized_copy_n(InputIterator first, + Size n, + ForwardIterator result); + + +/*! \} // copying + */ + + +} // end thrust + +#include + diff --git a/compat/thrust/uninitialized_fill.h b/compat/thrust/uninitialized_fill.h new file mode 100644 index 0000000..c726241 --- /dev/null +++ b/compat/thrust/uninitialized_fill.h @@ -0,0 +1,273 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file uninitialized_fill.h + * \brief Copy construction into a range of uninitialized elements from a source value + */ + +#pragma once + +#include +#include + +namespace thrust +{ + + +/*! \addtogroup filling + * \ingroup transformations + * \{ + */ + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a + * constructor. Occasionally, however, it is useful to separate those two + * operations. If each iterator in the range [first, last) points + * to unitialized memory, then \p unitialized_fill creates copies of \c x + * in that range. That is, for each iterator \c i in the range [first, last), + * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by + * calling \p ForwardIterator's \c value_type's copy constructor. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the range of interest. + * \param last The last element of the range of interest. + * \param x The value to use as the exemplar of the copy constructor. + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that + * takes a single argument of type \p T. + * + * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of + * uninitialized memory using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_fill(thrust::device, array, array + N, val); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_fill.html + * \see \c uninitialized_fill_n + * \see \c fill + * \see \c uninitialized_copy + * \see \c device_new + * \see \c device_malloc + */ +template + void uninitialized_fill(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + const T &x); + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a + * constructor. Occasionally, however, it is useful to separate those two + * operations. If each iterator in the range [first, last) points + * to unitialized memory, then \p unitialized_fill creates copies of \c x + * in that range. That is, for each iterator \c i in the range [first, last), + * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by + * calling \p ForwardIterator's \c value_type's copy constructor. + * + * \param first The first element of the range of interest. + * \param last The last element of the range of interest. + * \param x The value to use as the exemplar of the copy constructor. + * + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that + * takes a single argument of type \p T. + * + * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of + * uninitialized memory. + * + * \code + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_fill(array, array + N, val); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_fill.html + * \see \c uninitialized_fill_n + * \see \c fill + * \see \c uninitialized_copy + * \see \c device_new + * \see \c device_malloc + */ +template + void uninitialized_fill(ForwardIterator first, + ForwardIterator last, + const T &x); + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a + * constructor. Occasionally, however, it is useful to separate those two + * operations. If each iterator in the range [first, first+n) points + * to unitialized memory, then \p unitialized_fill creates copies of \c x + * in that range. That is, for each iterator \c i in the range [first, first+n), + * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by + * calling \p ForwardIterator's \c value_type's copy constructor. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The first element of the range of interest. + * \param n The size of the range of interest. + * \param x The value to use as the exemplar of the copy constructor. + * \return first+n + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that + * takes a single argument of type \p T. + * + * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of + * uninitialized memory using the \p thrust::device execution policy for parallelization: + * + * \code + * #include + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_fill_n(thrust::device, array, N, val); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_fill.html + * \see \c uninitialized_fill + * \see \c fill + * \see \c uninitialized_copy_n + * \see \c device_new + * \see \c device_malloc + */ +template + ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + Size n, + const T &x); + + +/*! In \c thrust, the function \c thrust::device_new allocates memory for + * an object and then creates an object at that location by calling a + * constructor. Occasionally, however, it is useful to separate those two + * operations. If each iterator in the range [first, first+n) points + * to unitialized memory, then \p unitialized_fill creates copies of \c x + * in that range. That is, for each iterator \c i in the range [first, first+n), + * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by + * calling \p ForwardIterator's \c value_type's copy constructor. + * + * \param first The first element of the range of interest. + * \param n The size of the range of interest. + * \param x The value to use as the exemplar of the copy constructor. + * \return first+n + * + * \tparam ForwardIterator is a model of Forward Iterator, + * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that + * takes a single argument of type \p T. + * + * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of + * uninitialized memory. + * + * \code + * #include + * #include + * + * struct Int + * { + * __host__ __device__ + * Int(int x) : val(x) {} + * int val; + * }; + * ... + * const int N = 137; + * + * Int val(46); + * thrust::device_ptr array = thrust::device_malloc(N); + * thrust::uninitialized_fill_n(array, N, val); + * + * // Int x = array[i]; + * // x.val == 46 for all 0 <= i < N + * \endcode + * + * \see http://www.sgi.com/tech/stl/uninitialized_fill.html + * \see \c uninitialized_fill + * \see \c fill + * \see \c uninitialized_copy_n + * \see \c device_new + * \see \c device_malloc + */ +template + ForwardIterator uninitialized_fill_n(ForwardIterator first, + Size n, + const T &x); + +/*! \} // end filling + * \} // transformations + */ + +} // end thrust + +#include + diff --git a/compat/thrust/unique.h b/compat/thrust/unique.h new file mode 100644 index 0000000..98150f3 --- /dev/null +++ b/compat/thrust/unique.h @@ -0,0 +1,960 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file unique.h + * \brief Move unique elements to the front of a range + */ + +#pragma once + +#include +#include +#include + +namespace thrust +{ + + +/*! \addtogroup stream_compaction + * \{ + */ + + +/*! For each group of consecutive elements in the range [first, last) + * with the same value, \p unique removes all but the first element of + * the group. The return value is an iterator \c new_last such that + * no two consecutive elements in the range [first, new_last) are + * equal. The iterators in the range [new_last, last) are all still + * dereferenceable, but the elements that they point to are unspecified. + * \p unique is stable, meaning that the relative order of elements that are + * not removed is unchanged. + * + * This version of \p unique uses \c operator== to test for equality. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input range. + * \param last The end of the input range. + * \return The end of the unique range [first, new_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is a model of Equality Comparable. + * + * The following code snippet demonstrates how to use \p unique to + * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution policy + * for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int *new_end = thrust::unique(thrust::host, A, A + N); + * // The first four values of A are now {1, 3, 2, 1} + * // Values beyond new_end are unspecified. + * \endcode + * + * \see http://www.sgi.com/tech/stl/unique.html + * \see unique_copy + */ +template +ForwardIterator unique(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last); + + +/*! For each group of consecutive elements in the range [first, last) + * with the same value, \p unique removes all but the first element of + * the group. The return value is an iterator \c new_last such that + * no two consecutive elements in the range [first, new_last) are + * equal. The iterators in the range [new_last, last) are all still + * dereferenceable, but the elements that they point to are unspecified. + * \p unique is stable, meaning that the relative order of elements that are + * not removed is unchanged. + * + * This version of \p unique uses \c operator== to test for equality. + * + * \param first The beginning of the input range. + * \param last The end of the input range. + * \return The end of the unique range [first, new_last). + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is a model of Equality Comparable. + * + * The following code snippet demonstrates how to use \p unique to + * compact a sequence of numbers to remove consecutive duplicates. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int *new_end = thrust::unique(A, A + N); + * // The first four values of A are now {1, 3, 2, 1} + * // Values beyond new_end are unspecified. + * \endcode + * + * \see http://www.sgi.com/tech/stl/unique.html + * \see unique_copy + */ +template +ForwardIterator unique(ForwardIterator first, + ForwardIterator last); + + +/*! For each group of consecutive elements in the range [first, last) + * with the same value, \p unique removes all but the first element of + * the group. The return value is an iterator \c new_last such that + * no two consecutive elements in the range [first, new_last) are + * equal. The iterators in the range [new_last, last) are all still + * dereferenceable, but the elements that they point to are unspecified. + * \p unique is stable, meaning that the relative order of elements that are + * not removed is unchanged. + * + * This version of \p unique uses the function object \p binary_pred to test + * for equality. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param binary_pred The binary predicate used to determine equality. + * \return The end of the unique range [first, new_last) + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type and to \p BinaryPredicate's \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p unique to + * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution policy + * for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int *new_end = thrust::unique(thrust::host, A, A + N, thrust::equal_to()); + * // The first four values of A are now {1, 3, 2, 1} + * // Values beyond new_end are unspecified. + * \endcode + * + * \see http://www.sgi.com/tech/stl/unique.html + * \see unique_copy + */ +template +ForwardIterator unique(const thrust::detail::execution_policy_base &exec, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred); + + +/*! For each group of consecutive elements in the range [first, last) + * with the same value, \p unique removes all but the first element of + * the group. The return value is an iterator \c new_last such that + * no two consecutive elements in the range [first, new_last) are + * equal. The iterators in the range [new_last, last) are all still + * dereferenceable, but the elements that they point to are unspecified. + * \p unique is stable, meaning that the relative order of elements that are + * not removed is unchanged. + * + * This version of \p unique uses the function object \p binary_pred to test + * for equality. + * + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param binary_pred The binary predicate used to determine equality. + * \return The end of the unique range [first, new_last) + * + * \tparam ForwardIterator is a model of Forward Iterator, + * and \p ForwardIterator is mutable, + * and \p ForwardIterator's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type and to \p BinaryPredicate's \c second_argument_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * The following code snippet demonstrates how to use \p unique to + * compact a sequence of numbers to remove consecutive duplicates. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int *new_end = thrust::unique(A, A + N, thrust::equal_to()); + * // The first four values of A are now {1, 3, 2, 1} + * // Values beyond new_end are unspecified. + * \endcode + * + * \see http://www.sgi.com/tech/stl/unique.html + * \see unique_copy + */ +template +ForwardIterator unique(ForwardIterator first, + ForwardIterator last, + BinaryPredicate binary_pred); + + +/*! \p unique_copy copies elements from the range [first, last) + * to a range beginning with \p result, except that in a consecutive group + * of duplicate elements only the first one is copied. The return value + * is the end of the range to which the elements are copied. + * + * The reason there are two different versions of unique_copy is that there + * are two different definitions of what it means for a consecutive group of + * elements to be duplicates. In the first version, the test is simple + * equality: the elements in a range [f, l) are duplicates if, + * for every iterator \p i in the range, either i == f or else + * *i == *(i-1). In the second, the test is an arbitrary + * \p BinaryPredicate \p binary_pred: the elements in [f, l) are + * duplicates if, for every iterator \p i in the range, either i == f + * or else binary_pred(*i, *(i-1)) is \p true. + * + * This version of \p unique_copy uses \c operator== to test for equality. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \return The end of the unique range [result, result_end). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is a model of Equality Comparable. + * \tparam OutputIterator is a model of Output Iterator and + * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. + * + * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_copy to + * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int B[N]; + * int *result_end = thrust::unique_copy(thrust::host, A, A + N, B); + * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 + * // Values beyond result_end are unspecified + * \endcode + * + * \see unique + * \see http://www.sgi.com/tech/stl/unique_copy.html + */ +template +OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p unique_copy copies elements from the range [first, last) + * to a range beginning with \p result, except that in a consecutive group + * of duplicate elements only the first one is copied. The return value + * is the end of the range to which the elements are copied. + * + * The reason there are two different versions of unique_copy is that there + * are two different definitions of what it means for a consecutive group of + * elements to be duplicates. In the first version, the test is simple + * equality: the elements in a range [f, l) are duplicates if, + * for every iterator \p i in the range, either i == f or else + * *i == *(i-1). In the second, the test is an arbitrary + * \p BinaryPredicate \p binary_pred: the elements in [f, l) are + * duplicates if, for every iterator \p i in the range, either i == f + * or else binary_pred(*i, *(i-1)) is \p true. + * + * This version of \p unique_copy uses \c operator== to test for equality. + * + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \return The end of the unique range [result, result_end). + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is a model of Equality Comparable. + * \tparam OutputIterator is a model of Output Iterator and + * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. + * + * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_copy to + * compact a sequence of numbers to remove consecutive duplicates. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int B[N]; + * int *result_end = thrust::unique_copy(A, A + N, B); + * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 + * // Values beyond result_end are unspecified + * \endcode + * + * \see unique + * \see http://www.sgi.com/tech/stl/unique_copy.html + */ +template +OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator result); + + +/*! \p unique_copy copies elements from the range [first, last) + * to a range beginning with \p result, except that in a consecutive group + * of duplicate elements only the first one is copied. The return value + * is the end of the range to which the elements are copied. + * + * This version of \p unique_copy uses the function object \c binary_pred + * to test for equality. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \param binary_pred The binary predicate used to determine equality. + * \return The end of the unique range [result, result_end). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is a model of Equality Comparable. + * \tparam OutputIterator is a model of Output Iterator and + * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_copy to + * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution + * policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int B[N]; + * int *result_end = thrust::unique_copy(thrust::host, A, A + N, B, thrust::equal_to()); + * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 + * // Values beyond result_end are unspecified. + * \endcode + * + * \see unique + * \see http://www.sgi.com/tech/stl/unique_copy.html + */ +template +OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, + InputIterator first, + InputIterator last, + OutputIterator result, + BinaryPredicate binary_pred); + + +/*! \p unique_copy copies elements from the range [first, last) + * to a range beginning with \p result, except that in a consecutive group + * of duplicate elements only the first one is copied. The return value + * is the end of the range to which the elements are copied. + * + * This version of \p unique_copy uses the function object \c binary_pred + * to test for equality. + * + * \param first The beginning of the input range. + * \param last The end of the input range. + * \param result The beginning of the output range. + * \param binary_pred The binary predicate used to determine equality. + * \return The end of the unique range [result, result_end). + * + * \tparam InputIterator is a model of Input Iterator, + * and \p InputIterator's \c value_type is a model of Equality Comparable. + * \tparam OutputIterator is a model of Output Iterator and + * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_copy to + * compact a sequence of numbers to remove consecutive duplicates. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; + * int B[N]; + * int *result_end = thrust::unique_copy(A, A + N, B, thrust::equal_to()); + * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 + * // Values beyond result_end are unspecified. + * \endcode + * + * \see unique + * \see http://www.sgi.com/tech/stl/unique_copy.html + */ +template +OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryPredicate binary_pred); + + +/*! \p unique_by_key is a generalization of \p unique to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key removes all but the first element of + * the group. Similarly, the corresponding values in the range + * [values_first, values_first + (keys_last - keys_first)) + * are also removed. + * + * The return value is a \p pair of iterators (new_keys_last,new_values_last) + * such that no two consecutive elements in the range [keys_first, new_keys_last) + * are equal. + * + * This version of \p unique_by_key uses \c operator== to test for equality and + * \c project1st to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the key range. + * \param keys_last The end of the key range. + * \param values_first The beginning of the value range. + * \return A pair of iterators at end of the ranges [key_first, keys_new_last) and [values_first, values_new_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator1 is a model of Forward Iterator, + * and \p ForwardIterator1 is mutable, + * and \p ForwardIterator's \c value_type is a model of Equality Comparable. + * \tparam ForwardIterator2 is a model of Forward Iterator, + * and \p ForwardIterator2 is mutable. + * + * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_by_key to + * compact a sequence of key/value pairs to remove consecutive duplicates using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values + * + * thrust::pair new_end; + * new_end = thrust::unique_by_key(thrust::host, A, A + N, B); + * + * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. + * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. + * \endcode + * + * \see unique + * \see unique_by_key_copy + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key(const thrust::detail::execution_policy_base &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first); + + +/*! \p unique_by_key is a generalization of \p unique to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key removes all but the first element of + * the group. Similarly, the corresponding values in the range + * [values_first, values_first + (keys_last - keys_first)) + * are also removed. + * + * The return value is a \p pair of iterators (new_keys_last,new_values_last) + * such that no two consecutive elements in the range [keys_first, new_keys_last) + * are equal. + * + * This version of \p unique_by_key uses \c operator== to test for equality and + * \c project1st to reduce values with equal keys. + * + * \param keys_first The beginning of the key range. + * \param keys_last The end of the key range. + * \param values_first The beginning of the value range. + * \return A pair of iterators at end of the ranges [key_first, keys_new_last) and [values_first, values_new_last). + * + * \tparam ForwardIterator1 is a model of Forward Iterator, + * and \p ForwardIterator1 is mutable, + * and \p ForwardIterator's \c value_type is a model of Equality Comparable. + * \tparam ForwardIterator2 is a model of Forward Iterator, + * and \p ForwardIterator2 is mutable. + * + * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_by_key to + * compact a sequence of key/value pairs to remove consecutive duplicates. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values + * + * thrust::pair new_end; + * new_end = thrust::unique_by_key(A, A + N, B); + * + * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. + * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. + * \endcode + * + * \see unique + * \see unique_by_key_copy + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key(ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first); + + +/*! \p unique_by_key is a generalization of \p unique to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key removes all but the first element of + * the group. Similarly, the corresponding values in the range + * [values_first, values_first + (keys_last - keys_first)) + * are also removed. + * + * This version of \p unique_by_key uses the function object \c binary_pred + * to test for equality and \c project1st to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the key range. + * \param keys_last The end of the key range. + * \param values_first The beginning of the value range. + * \param binary_pred The binary predicate used to determine equality. + * \return The end of the unique range [first, new_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam ForwardIterator1 is a model of Forward Iterator, + * and \p ForwardIterator1 is mutable, + * and \p ForwardIterator's \c value_type is a model of Equality Comparable. + * \tparam ForwardIterator2 is a model of Forward Iterator, + * and \p ForwardIterator2 is mutable. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_by_key to + * compact a sequence of key/value pairs to remove consecutive duplicates using the \p thrust::host + * execution policy for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * new_end = thrust::unique_by_key(thrust::host, keys, keys + N, values, binary_pred); + * + * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. + * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. + * \endcode + * + * \see unique + * \see unique_by_key_copy + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key(const thrust::detail::execution_policy_base &exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred); + + +/*! \p unique_by_key is a generalization of \p unique to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key removes all but the first element of + * the group. Similarly, the corresponding values in the range + * [values_first, values_first + (keys_last - keys_first)) + * are also removed. + * + * This version of \p unique_by_key uses the function object \c binary_pred + * to test for equality and \c project1st to reduce values with equal keys. + * + * \param keys_first The beginning of the key range. + * \param keys_last The end of the key range. + * \param values_first The beginning of the value range. + * \param binary_pred The binary predicate used to determine equality. + * \return The end of the unique range [first, new_last). + * + * \tparam ForwardIterator1 is a model of Forward Iterator, + * and \p ForwardIterator1 is mutable, + * and \p ForwardIterator's \c value_type is a model of Equality Comparable. + * \tparam ForwardIterator2 is a model of Forward Iterator, + * and \p ForwardIterator2 is mutable. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. + * + * The following code snippet demonstrates how to use \p unique_by_key to + * compact a sequence of key/value pairs to remove consecutive duplicates. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * new_end = thrust::unique_by_key(keys, keys + N, values, binary_pred); + * + * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. + * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. + * \endcode + * + * \see unique + * \see unique_by_key_copy + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key(ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first, + BinaryPredicate binary_pred); + + +/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key_copy copies the first element of the group to + * a range beginning with \c keys_result and the corresponding values from the range + * [values_first, values_first + (keys_last - keys_first)) are copied to a range + * beginning with \c values_result. + * + * This version of \p unique_by_key_copy uses \c operator== to test for equality and + * \c project1st to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_result The beginning of the output key range. + * \param values_result The beginning of the output value range. + * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p unique_by_key_copy to + * compact a sequence of key/value pairs and with equal keys using the \p thrust::host execution policy + * for parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. + * \endcode + * + * \see unique_copy + * \see unique_by_key + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key_copy copies the first element of the group to + * a range beginning with \c keys_result and the corresponding values from the range + * [values_first, values_first + (keys_last - keys_first)) are copied to a range + * beginning with \c values_result. + * + * This version of \p unique_by_key_copy uses \c operator== to test for equality and + * \c project1st to reduce values with equal keys. + * + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_result The beginning of the output key range. + * \param values_result The beginning of the output value range. + * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). + * + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p unique_by_key_copy to + * compact a sequence of key/value pairs and with equal keys. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * new_end = thrust::unique_by_key_copy(A, A + N, B, C, D); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. + * \endcode + * + * \see unique_copy + * \see unique_by_key + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key_copy(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_result, + OutputIterator2 values_result); + + +/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key_copy copies the first element of the group to + * a range beginning with \c keys_result and the corresponding values from the range + * [values_first, values_first + (keys_last - keys_first)) are copied to a range + * beginning with \c values_result. + * + * This version of \p unique_by_key_copy uses the function object \c binary_pred + * to test for equality and \c project1st to reduce values with equal keys. + * + * The algorithm's execution is parallelized as determined by \p exec. + * + * \param exec The execution policy to use for parallelization. + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_result The beginning of the output key range. + * \param values_result The beginning of the output value range. + * \param binary_pred The binary predicate used to determine equality. + * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). + * + * \tparam DerivedPolicy The name of the derived execution policy. + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p unique_by_key_copy to + * compact a sequence of key/value pairs and with equal keys using the \p thrust::host execution policy for + * parallelization: + * + * \code + * #include + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D, binary_pred); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. + * \endcode + * + * \see unique_copy + * \see unique_by_key + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key_copy(const thrust::detail::execution_policy_base &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_result, + OutputIterator2 values_result, + BinaryPredicate binary_pred); + + +/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. + * For each group of consecutive keys in the range [keys_first, keys_last) + * that are equal, \p unique_by_key_copy copies the first element of the group to + * a range beginning with \c keys_result and the corresponding values from the range + * [values_first, values_first + (keys_last - keys_first)) are copied to a range + * beginning with \c values_result. + * + * This version of \p unique_by_key_copy uses the function object \c binary_pred + * to test for equality and \c project1st to reduce values with equal keys. + * + * \param keys_first The beginning of the input key range. + * \param keys_last The end of the input key range. + * \param values_first The beginning of the input value range. + * \param keys_result The beginning of the output key range. + * \param values_result The beginning of the output value range. + * \param binary_pred The binary predicate used to determine equality. + * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). + * + * \tparam InputIterator1 is a model of Input Iterator, + * \tparam InputIterator2 is a model of Input Iterator, + * \tparam OutputIterator1 is a model of Output Iterator and + * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. + * \tparam OutputIterator2 is a model of Output Iterator and + * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. + * \tparam BinaryPredicate is a model of Binary Predicate. + * + * \pre The input ranges shall not overlap either output range. + * + * The following code snippet demonstrates how to use \p unique_by_key_copy to + * compact a sequence of key/value pairs and with equal keys. + * + * \code + * #include + * ... + * const int N = 7; + * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys + * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values + * int C[N]; // output keys + * int D[N]; // output values + * + * thrust::pair new_end; + * thrust::equal_to binary_pred; + * new_end = thrust::unique_by_key_copy(A, A + N, B, C, D, binary_pred); + * + * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. + * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. + * \endcode + * + * \see unique_copy + * \see unique_by_key + * \see reduce_by_key + */ +template + thrust::pair + unique_by_key_copy(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_result, + OutputIterator2 values_result, + BinaryPredicate binary_pred); + + +/*! \} // end stream_compaction + */ + + +} // end namespace thrust + +#include + diff --git a/compat/thrust/version.h b/compat/thrust/version.h new file mode 100644 index 0000000..730997e --- /dev/null +++ b/compat/thrust/version.h @@ -0,0 +1,73 @@ +/* + * Copyright 2008-2012 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file version.h + * \brief Compile-time macros encoding Thrust release version + * + * is the only Thrust header that is guaranteed to + * change with every thrust release. + */ + +#pragma once + +#include + +// This is the only thrust header that is guaranteed to +// change with every thrust release. +// +// THRUST_VERSION % 100 is the sub-minor version +// THRUST_VERSION / 100 % 1000 is the minor version +// THRUST_VERSION / 100000 is the major version + +/*! \def THRUST_VERSION + * \brief The preprocessor macro \p THRUST_VERSION encodes the version + * number of the Thrust library. + * + * THRUST_VERSION % 100 is the sub-minor version. + * THRUST_VERSION / 100 % 1000 is the minor version. + * THRUST_VERSION / 100000 is the major version. + */ +#define THRUST_VERSION 100700 + +/*! \def THRUST_MAJOR_VERSION + * \brief The preprocessor macro \p THRUST_MAJOR_VERSION encodes the + * major version number of the Thrust library. + */ +#define THRUST_MAJOR_VERSION (THRUST_VERSION / 100000) + +/*! \def THRUST_MINOR_VERSION + * \brief The preprocessor macro \p THRUST_MINOR_VERSION encodes the + * minor version number of the Thrust library. + */ +#define THRUST_MINOR_VERSION (THRUST_VERSION / 100 % 1000) + +/*! \def THRUST_SUBMINOR_VERSION + * \brief The preprocessor macro \p THRUST_SUBMINOR_VERSION encodes the + * sub-minor version number of the Thrust library. + */ +#define THRUST_SUBMINOR_VERSION (THRUST_VERSION % 100) + +// Declare these namespaces here for the purpose of Doxygenating them + +/*! \namespace thrust + * \brief \p thrust is the top-level namespace which contains all Thrust + * functions and types. + */ +namespace thrust +{ + +} + diff --git a/compat/unistd.h b/compat/unistd.h new file mode 100644 index 0000000..193da66 --- /dev/null +++ b/compat/unistd.h @@ -0,0 +1,2 @@ +#pragma once +#include "getopt/getopt.h" \ No newline at end of file diff --git a/compile b/compile new file mode 100644 index 0000000..a49b6d0 --- /dev/null +++ b/compile @@ -0,0 +1,310 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2012-01-04.17; # UTC + +# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free +# Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l*) + lib=${1#-l} + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + set x "$@" "$dir/$lib.dll.lib" + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + set x "$@" "$dir/$lib.lib" + break + fi + done + IFS=$save_IFS + + test "$found" != yes && set x "$@" "$lib.lib" + shift + ;; + -L*) + func_file_conv "${1#-L}" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/config.guess b/config.guess new file mode 100644 index 0000000..f32079a --- /dev/null +++ b/config.guess @@ -0,0 +1,1526 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +# Free Software Foundation, Inc. + +timestamp='2008-01-23' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner . +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[456]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:[3456]*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + EM64T | authenticamd) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d + s/[ ][ ]*/ /g + s/.*supported targets: *// + s/ .*// + p'` + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit ;; + esac + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^LIBC/{ + s: ::g + p + }'`" + test x"${LIBC}" != x && { + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit + } + test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.sub b/config.sub new file mode 100644 index 0000000..6759825 --- /dev/null +++ b/config.sub @@ -0,0 +1,1658 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +# Free Software Foundation, Inc. + +timestamp='2008-01-16' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ + uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64vr | mips64vrel \ + | mips64orion | mips64orionel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | mt \ + | msp430 \ + | nios | nios2 \ + | ns16k | ns32k \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | score \ + | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu | strongarm \ + | tahoe | thumb | tic4x | tic80 | tron \ + | v850 | v850e \ + | we32k \ + | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k) + basic_machine=$basic_machine-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tron-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tic55x | c55x*) + basic_machine=tic55x-unknown + os=-coff + ;; + tic6x | c6x*) + basic_machine=tic6x-unknown + os=-coff + ;; + tile*) + basic_machine=tile-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/configure b/configure new file mode 100755 index 0000000..30ecab4 --- /dev/null +++ b/configure @@ -0,0 +1,8127 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for ccminer 2014.03.18. +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='ccminer' +PACKAGE_TARNAME='ccminer' +PACKAGE_VERSION='2014.03.18' +PACKAGE_STRING='ccminer 2014.03.18' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' + +ac_unique_file="cpu-miner.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +NVCC +CUDA_LDFLAGS +CUDA_LIBS +CUDA_CFLAGS +WS2_LIBS +PTHREAD_LIBS +PTHREAD_FLAGS +JANSSON_LIBS +LIBCURL +LIBCURL_CPPFLAGS +_libcurl_config +ARCH_x86_64_FALSE +ARCH_x86_64_TRUE +ARCH_x86_FALSE +ARCH_x86_TRUE +HAVE_WINDOWS_FALSE +HAVE_WINDOWS_TRUE +WANT_JANSSON_FALSE +WANT_JANSSON_TRUE +ALLOCA +OPENMP_CFLAGS +am__fastdepCXX_FALSE +am__fastdepCXX_TRUE +CXXDEPMODE +ac_ct_CXX +CXXFLAGS +CXX +RANLIB +am__fastdepCCAS_FALSE +am__fastdepCCAS_TRUE +CCASDEPMODE +CCASFLAGS +CCAS +EGREP +GREP +CPP +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__quote +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +MAINT +MAINTAINER_MODE_FALSE +MAINTAINER_MODE_TRUE +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_os +target_vendor +target_cpu +target +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_maintainer_mode +enable_dependency_tracking +enable_openmp +with_libcurl +with_cuda +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP +CCAS +CCASFLAGS +CXX +CXXFLAGS +CCC' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures ccminer 2014.03.18 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/ccminer] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] + --target=TARGET configure for building compilers for TARGET [HOST] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of ccminer 2014.03.18:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-maintainer-mode + enable make rules and dependencies not useful (and + sometimes confusing) to the casual installer + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --disable-openmp do not use OpenMP + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-libcurl=PREFIX look for the curl library in PREFIX/lib and headers + in PREFIX/include + --with-cuda=PATH prefix where cuda is installed default=/usr/local/cuda + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + CCAS assembler compiler command (defaults to CC) + CCASFLAGS assembler compiler flags (defaults to CFLAGS) + CXX C++ compiler command + CXXFLAGS C++ compiler flags + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +ccminer configure 2014.03.18 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES +# --------------------------------------------- +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. +ac_fn_c_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +$as_echo_n "checking whether $as_decl_name is declared... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_decl + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by ccminer $as_me 2014.03.18, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +$as_echo_n "checking host system type... " >&6; } +if ${ac_cv_host+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +$as_echo "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 +$as_echo_n "checking target system type... " >&6; } +if ${ac_cv_target+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$target_alias" = x; then + ac_cv_target=$ac_cv_host +else + ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 +$as_echo "$ac_cv_target" >&6; } +case $ac_cv_target in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;; +esac +target=$ac_cv_target +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_target +shift +target_cpu=$1 +target_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +target_os=$* +IFS=$ac_save_IFS +case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac + + +# The aliases save the names the user supplied, while $host etc. +# will get canonicalized. +test -n "$target_alias" && + test "$program_prefix$program_suffix$program_transform_name" = \ + NONENONEs,x,x, && + program_prefix=${target_alias}- + + +am__api_version='1.13' + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='ccminer' + VERSION='2014.03.18' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + + +ac_config_headers="$ac_config_headers cpuminer-config.h" + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } + # Check whether --enable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then : + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else + USE_MAINTAINER_MODE=no +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +$as_echo "$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + + +am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 +$as_echo_n "checking for style of include used by $am_make... " >&6; } +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 +$as_echo "$_am_result" >&6; } +rm -f confinc confmf + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5 +$as_echo_n "checking for $CC option to accept ISO C99... " >&6; } +if ${ac_cv_prog_cc_c99+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +#include + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +#define debug(...) fprintf (stderr, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + your preprocessor is broken; +#endif +#if BIG_OK +#else + your preprocessor is broken; +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\0'; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static void +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str; + int number; + float fnumber; + + while (*format) + { + switch (*format++) + { + case 's': // string + str = va_arg (args_copy, const char *); + break; + case 'd': // int + number = va_arg (args_copy, int); + break; + case 'f': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); +} + +int +main () +{ + + // Check bool. + _Bool success = false; + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + test_varargs ("s, d' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' + || dynamic_array[ni.number - 1] != 543); + + ; + return 0; +} +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c99" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c99" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +$as_echo "$ac_cv_prog_cc_c99" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c99" != xno; then : + +fi + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +if test $ac_cv_c_compiler_gnu = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC needs -traditional" >&5 +$as_echo_n "checking whether $CC needs -traditional... " >&6; } +if ${ac_cv_prog_gcc_traditional+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_pattern="Autoconf.*'x'" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +Autoconf TIOCGETP +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "$ac_pattern" >/dev/null 2>&1; then : + ac_cv_prog_gcc_traditional=yes +else + ac_cv_prog_gcc_traditional=no +fi +rm -f conftest* + + + if test $ac_cv_prog_gcc_traditional = no; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +Autoconf TCGETA +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "$ac_pattern" >/dev/null 2>&1; then : + ac_cv_prog_gcc_traditional=yes +fi +rm -f conftest* + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_gcc_traditional" >&5 +$as_echo "$ac_cv_prog_gcc_traditional" >&6; } + if test $ac_cv_prog_gcc_traditional = yes; then + CC="$CC -traditional" + fi +fi + +if test "x$CC" != xcc; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC and cc understand -c and -o together" >&5 +$as_echo_n "checking whether $CC and cc understand -c and -o together... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cc understands -c and -o together" >&5 +$as_echo_n "checking whether cc understands -c and -o together... " >&6; } +fi +set dummy $CC; ac_cc=`$as_echo "$2" | + sed 's/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/'` +if eval \${ac_cv_prog_cc_${ac_cc}_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +# Make sure it works both with $CC and with simple cc. +# We do the test twice because some compilers refuse to overwrite an +# existing .o file with -o, though they will create one. +ac_try='$CC -c conftest.$ac_ext -o conftest2.$ac_objext >&5' +rm -f conftest2.* +if { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && + test -f conftest2.$ac_objext && { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; +then + eval ac_cv_prog_cc_${ac_cc}_c_o=yes + if test "x$CC" != xcc; then + # Test first that cc exists at all. + if { ac_try='cc -c conftest.$ac_ext >&5' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + ac_try='cc -c conftest.$ac_ext -o conftest2.$ac_objext >&5' + rm -f conftest2.* + if { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && + test -f conftest2.$ac_objext && { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; + then + # cc works too. + : + else + # cc exists but doesn't like -o. + eval ac_cv_prog_cc_${ac_cc}_c_o=no + fi + fi + fi +else + eval ac_cv_prog_cc_${ac_cc}_c_o=no +fi +rm -f core conftest* + +fi +if eval test \$ac_cv_prog_cc_${ac_cc}_c_o = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +$as_echo "#define NO_MINUS_C_MINUS_O 1" >>confdefs.h + +fi + +# FIXME: we rely on the cache variable name because +# there is no other way. +set dummy $CC +am_cc=`echo $2 | sed 's/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/'` +eval am_t=\$ac_cv_prog_cc_${am_cc}_c_o +if test "$am_t" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi + + +# By default we simply use the C compiler to build assembly code. + +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS + + + +depcc="$CCAS" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CCAS_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CCAS_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CCAS_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CCAS_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CCAS_dependencies_compiler_type" >&6; } +CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CCAS_dependencies_compiler_type" = gcc3; then + am__fastdepCCAS_TRUE= + am__fastdepCCAS_FALSE='#' +else + am__fastdepCCAS_TRUE='#' + am__fastdepCCAS_FALSE= +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +$as_echo "$ac_ct_CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 +$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } +if ${ac_cv_cxx_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +$as_echo "$ac_cv_cxx_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +$as_echo_n "checking whether $CXX accepts -g... " >&6; } +if ${ac_cv_prog_cxx_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +else + CXXFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +$as_echo "$ac_cv_prog_cxx_g" >&6; } +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CXX" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CXX_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; } +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + + + OPENMP_CFLAGS= + # Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then : + enableval=$enable_openmp; +fi + + if test "$enable_openmp" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 +$as_echo_n "checking for $CC option to support OpenMP... " >&6; } +if ${ac_cv_prog_c_openmp+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP + choke me +#endif +#include +int main () { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_prog_c_openmp='none needed' +else + ac_cv_prog_c_openmp='unsupported' + for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ + -Popenmp --openmp; do + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP + choke me +#endif +#include +int main () { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_prog_c_openmp=$ac_option +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + if test "$ac_cv_prog_c_openmp" != unsupported; then + break + fi + done +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 +$as_echo "$ac_cv_prog_c_openmp" >&6; } + case $ac_cv_prog_c_openmp in #( + "none needed" | unsupported) + ;; #( + *) + OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;; + esac + fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in sys/endian.h sys/param.h syslog.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +# sys/sysctl.h requires sys/types.h on FreeBSD +# sys/sysctl.h requires sys/param.h on OpenBSD +for ac_header in sys/sysctl.h +do : + ac_fn_c_check_header_compile "$LINENO" "sys/sysctl.h" "ac_cv_header_sys_sysctl_h" "#include +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +" +if test "x$ac_cv_header_sys_sysctl_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_SYSCTL_H 1 +_ACEOF + +fi + +done + + +ac_fn_c_check_decl "$LINENO" "be32dec" "ac_cv_have_decl_be32dec" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_be32dec" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_BE32DEC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "le32dec" "ac_cv_have_decl_le32dec" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_le32dec" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_LE32DEC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "be32enc" "ac_cv_have_decl_be32enc" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_be32enc" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_BE32ENC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "le32enc" "ac_cv_have_decl_le32enc" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_le32enc" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_LE32ENC $ac_have_decl +_ACEOF + + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works +# for constant arguments. Useless! +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 +$as_echo_n "checking for working alloca.h... " >&6; } +if ${ac_cv_working_alloca_h+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +char *p = (char *) alloca (2 * sizeof (int)); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_working_alloca_h=yes +else + ac_cv_working_alloca_h=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 +$as_echo "$ac_cv_working_alloca_h" >&6; } +if test $ac_cv_working_alloca_h = yes; then + +$as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 +$as_echo_n "checking for alloca... " >&6; } +if ${ac_cv_func_alloca_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (size_t); +# endif +# endif +# endif +# endif +#endif + +int +main () +{ +char *p = (char *) alloca (1); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_func_alloca_works=yes +else + ac_cv_func_alloca_works=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 +$as_echo "$ac_cv_func_alloca_works" >&6; } + +if test $ac_cv_func_alloca_works = yes; then + +$as_echo "#define HAVE_ALLOCA 1" >>confdefs.h + +else + # The SVR3 libPW and SVR4 libucb both contain incompatible functions +# that cause trouble. Some versions do not even contain alloca or +# contain a buggy version. If you still want to use their alloca, +# use ar to extract alloca.o from them instead of compiling alloca.c. + +ALLOCA=\${LIBOBJDIR}alloca.$ac_objext + +$as_echo "#define C_ALLOCA 1" >>confdefs.h + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5 +$as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; } +if ${ac_cv_os_cray+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if defined CRAY && ! defined CRAY2 +webecray +#else +wenotbecray +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "webecray" >/dev/null 2>&1; then : + ac_cv_os_cray=yes +else + ac_cv_os_cray=no +fi +rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5 +$as_echo "$ac_cv_os_cray" >&6; } +if test $ac_cv_os_cray = yes; then + for ac_func in _getb67 GETB67 getb67; do + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + +cat >>confdefs.h <<_ACEOF +#define CRAY_STACKSEG_END $ac_func +_ACEOF + + break +fi + + done +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 +$as_echo_n "checking stack direction for C alloca... " >&6; } +if ${ac_cv_c_stack_direction+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_c_stack_direction=0 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +find_stack_direction (int *addr, int depth) +{ + int dir, dummy = 0; + if (! addr) + addr = &dummy; + *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; + dir = depth ? find_stack_direction (addr, depth - 1) : 0; + return dir + dummy; +} + +int +main (int argc, char **argv) +{ + return find_stack_direction (0, argc + !argv + 20) < 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_c_stack_direction=1 +else + ac_cv_c_stack_direction=-1 +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 +$as_echo "$ac_cv_c_stack_direction" >&6; } +cat >>confdefs.h <<_ACEOF +#define STACK_DIRECTION $ac_cv_c_stack_direction +_ACEOF + + +fi + +for ac_func in getopt_long +do : + ac_fn_c_check_func "$LINENO" "getopt_long" "ac_cv_func_getopt_long" +if test "x$ac_cv_func_getopt_long" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_GETOPT_LONG 1 +_ACEOF + +fi +done + + +case $target in + i*86-*-*) + have_x86=true + ;; + x86_64-*-*) + have_x86=true + have_x86_64=true + ;; + amd64-*-*) + have_x86=true + have_x86_64=true + ;; +esac + +PTHREAD_FLAGS="-pthread" +WS2_LIBS="" + +case $target in + *-*-mingw*) + have_win32=true + PTHREAD_FLAGS="" + WS2_LIBS="-lws2_32" + ;; +esac + +if test x$have_x86 = xtrue +then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX code" >&5 +$as_echo_n "checking whether we can compile AVX code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vmovdqa %ymm0, %ymm1"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_AVX 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile XOP code" >&5 +$as_echo_n "checking whether we can compile XOP code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vprotd \$7, %xmm0, %xmm1"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_XOP 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the XOP instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the XOP instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX2 code" >&5 +$as_echo_n "checking whether we can compile AVX2 code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vpaddd %ymm0, %ymm1, %ymm2"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_AVX2 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX2 instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the AVX2 instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the AVX instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for json_loads in -ljansson" >&5 +$as_echo_n "checking for json_loads in -ljansson... " >&6; } +if ${ac_cv_lib_jansson_json_loads+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ljansson $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char json_loads (); +int +main () +{ +return json_loads (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_jansson_json_loads=yes +else + ac_cv_lib_jansson_json_loads=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_jansson_json_loads" >&5 +$as_echo "$ac_cv_lib_jansson_json_loads" >&6; } +if test "x$ac_cv_lib_jansson_json_loads" = xyes; then : + request_jansson=false +else + request_jansson=true +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +$as_echo_n "checking for pthread_create in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_create=yes +else + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : + PTHREAD_LIBS="-lpthread" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthreadGC2" >&5 +$as_echo_n "checking for pthread_create in -lpthreadGC2... " >&6; } +if ${ac_cv_lib_pthreadGC2_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthreadGC2 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthreadGC2_pthread_create=yes +else + ac_cv_lib_pthreadGC2_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthreadGC2_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthreadGC2_pthread_create" >&6; } +if test "x$ac_cv_lib_pthreadGC2_pthread_create" = xyes; then : + PTHREAD_LIBS="-lpthreadGC2" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthreadGC1" >&5 +$as_echo_n "checking for pthread_create in -lpthreadGC1... " >&6; } +if ${ac_cv_lib_pthreadGC1_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthreadGC1 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthreadGC1_pthread_create=yes +else + ac_cv_lib_pthreadGC1_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthreadGC1_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthreadGC1_pthread_create" >&6; } +if test "x$ac_cv_lib_pthreadGC1_pthread_create" = xyes; then : + PTHREAD_LIBS="-lpthreadGC1" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthreadGC" >&5 +$as_echo_n "checking for pthread_create in -lpthreadGC... " >&6; } +if ${ac_cv_lib_pthreadGC_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthreadGC $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthreadGC_pthread_create=yes +else + ac_cv_lib_pthreadGC_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthreadGC_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthreadGC_pthread_create" >&6; } +if test "x$ac_cv_lib_pthreadGC_pthread_create" = xyes; then : + PTHREAD_LIBS="-lpthreadGC" + +fi + +fi + +fi + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSL_library_init in -lssl" >&5 +$as_echo_n "checking for SSL_library_init in -lssl... " >&6; } +if ${ac_cv_lib_ssl_SSL_library_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lssl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char SSL_library_init (); +int +main () +{ +return SSL_library_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_ssl_SSL_library_init=yes +else + ac_cv_lib_ssl_SSL_library_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ssl_SSL_library_init" >&5 +$as_echo "$ac_cv_lib_ssl_SSL_library_init" >&6; } +if test "x$ac_cv_lib_ssl_SSL_library_init" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBSSL 1 +_ACEOF + + LIBS="-lssl $LIBS" + +else + as_fn_error $? "OpenSSL library required" "$LINENO" 5 +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for EVP_DigestFinal_ex in -lcrypto" >&5 +$as_echo_n "checking for EVP_DigestFinal_ex in -lcrypto... " >&6; } +if ${ac_cv_lib_crypto_EVP_DigestFinal_ex+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcrypto $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char EVP_DigestFinal_ex (); +int +main () +{ +return EVP_DigestFinal_ex (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_crypto_EVP_DigestFinal_ex=yes +else + ac_cv_lib_crypto_EVP_DigestFinal_ex=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_crypto_EVP_DigestFinal_ex" >&5 +$as_echo "$ac_cv_lib_crypto_EVP_DigestFinal_ex" >&6; } +if test "x$ac_cv_lib_crypto_EVP_DigestFinal_ex" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBCRYPTO 1 +_ACEOF + + LIBS="-lcrypto $LIBS" + +else + as_fn_error $? "OpenSSL library required" "$LINENO" 5 +fi + + + if test x$request_jansson = xtrue; then + WANT_JANSSON_TRUE= + WANT_JANSSON_FALSE='#' +else + WANT_JANSSON_TRUE='#' + WANT_JANSSON_FALSE= +fi + + if test x$have_win32 = xtrue; then + HAVE_WINDOWS_TRUE= + HAVE_WINDOWS_FALSE='#' +else + HAVE_WINDOWS_TRUE='#' + HAVE_WINDOWS_FALSE= +fi + + if test x$have_x86 = xtrue; then + ARCH_x86_TRUE= + ARCH_x86_FALSE='#' +else + ARCH_x86_TRUE='#' + ARCH_x86_FALSE= +fi + + if test x$have_x86_64 = xtrue; then + ARCH_x86_64_TRUE= + ARCH_x86_64_FALSE='#' +else + ARCH_x86_64_TRUE='#' + ARCH_x86_64_FALSE= +fi + + +if test x$request_jansson = xtrue +then + JANSSON_LIBS="compat/jansson/libjansson.a" +else + JANSSON_LIBS=-ljansson +fi + + + + + + + + + + + + + + + + + + + + + + + + + + +# Check whether --with-libcurl was given. +if test "${with_libcurl+set}" = set; then : + withval=$with_libcurl; _libcurl_with=$withval +else + _libcurl_with=yes +fi + + + if test "$_libcurl_with" != "no" ; then + + for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + + + _libcurl_version_parse="eval $AWK '{split(\$NF,A,\".\"); X=256*256*A[1]+256*A[2]+A[3]; print X;}'" + + _libcurl_try_link=yes + + if test -d "$_libcurl_with" ; then + LIBCURL_CPPFLAGS="-I$withval/include" + _libcurl_ldflags="-L$withval/lib" + # Extract the first word of "curl-config", so it can be a program name with args. +set dummy curl-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path__libcurl_config+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $_libcurl_config in + [\\/]* | ?:[\\/]*) + ac_cv_path__libcurl_config="$_libcurl_config" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in "$withval/bin" +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path__libcurl_config="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +_libcurl_config=$ac_cv_path__libcurl_config +if test -n "$_libcurl_config"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_libcurl_config" >&5 +$as_echo "$_libcurl_config" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + else + # Extract the first word of "curl-config", so it can be a program name with args. +set dummy curl-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path__libcurl_config+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $_libcurl_config in + [\\/]* | ?:[\\/]*) + ac_cv_path__libcurl_config="$_libcurl_config" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path__libcurl_config="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +_libcurl_config=$ac_cv_path__libcurl_config +if test -n "$_libcurl_config"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_libcurl_config" >&5 +$as_echo "$_libcurl_config" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi + + if test x$_libcurl_config != "x" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for the version of libcurl" >&5 +$as_echo_n "checking for the version of libcurl... " >&6; } +if ${libcurl_cv_lib_curl_version+:} false; then : + $as_echo_n "(cached) " >&6 +else + libcurl_cv_lib_curl_version=`$_libcurl_config --version | $AWK '{print $2}'` +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libcurl_cv_lib_curl_version" >&5 +$as_echo "$libcurl_cv_lib_curl_version" >&6; } + + _libcurl_version=`echo $libcurl_cv_lib_curl_version | $_libcurl_version_parse` + _libcurl_wanted=`echo 7.15.2 | $_libcurl_version_parse` + + if test $_libcurl_wanted -gt 0 ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libcurl >= version 7.15.2" >&5 +$as_echo_n "checking for libcurl >= version 7.15.2... " >&6; } +if ${libcurl_cv_lib_version_ok+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test $_libcurl_version -ge $_libcurl_wanted ; then + libcurl_cv_lib_version_ok=yes + else + libcurl_cv_lib_version_ok=no + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libcurl_cv_lib_version_ok" >&5 +$as_echo "$libcurl_cv_lib_version_ok" >&6; } + fi + + if test $_libcurl_wanted -eq 0 || test x$libcurl_cv_lib_version_ok = xyes ; then + if test x"$LIBCURL_CPPFLAGS" = "x" ; then + LIBCURL_CPPFLAGS=`$_libcurl_config --cflags` + fi + if test x"$LIBCURL" = "x" ; then + LIBCURL=`$_libcurl_config --libs` + + # This is so silly, but Apple actually has a bug in their + # curl-config script. Fixed in Tiger, but there are still + # lots of Panther installs around. + case "${host}" in + powerpc-apple-darwin7*) + LIBCURL=`echo $LIBCURL | sed -e 's|-arch i386||g'` + ;; + esac + fi + + # All curl-config scripts support --feature + _libcurl_features=`$_libcurl_config --feature` + + # Is it modern enough to have --protocols? (7.12.4) + if test $_libcurl_version -ge 461828 ; then + _libcurl_protocols=`$_libcurl_config --protocols` + fi + else + _libcurl_try_link=no + fi + + unset _libcurl_wanted + fi + + if test $_libcurl_try_link = yes ; then + + # we didn't find curl-config, so let's see if the user-supplied + # link line (or failing that, "-lcurl") is enough. + LIBCURL=${LIBCURL-"$_libcurl_ldflags -lcurl"} + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether libcurl is usable" >&5 +$as_echo_n "checking whether libcurl is usable... " >&6; } +if ${libcurl_cv_lib_curl_usable+:} false; then : + $as_echo_n "(cached) " >&6 +else + + _libcurl_save_cppflags=$CPPFLAGS + CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" + _libcurl_save_libs=$LIBS + LIBS="$LIBCURL $LIBS" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + +/* Try and use a few common options to force a failure if we are + missing symbols or can't link. */ +int x; +curl_easy_setopt(NULL,CURLOPT_URL,NULL); +x=CURL_ERROR_SIZE; +x=CURLOPT_WRITEFUNCTION; +x=CURLOPT_FILE; +x=CURLOPT_ERRORBUFFER; +x=CURLOPT_STDERR; +x=CURLOPT_VERBOSE; +if (x) ; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + libcurl_cv_lib_curl_usable=yes +else + libcurl_cv_lib_curl_usable=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + CPPFLAGS=$_libcurl_save_cppflags + LIBS=$_libcurl_save_libs + unset _libcurl_save_cppflags + unset _libcurl_save_libs + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libcurl_cv_lib_curl_usable" >&5 +$as_echo "$libcurl_cv_lib_curl_usable" >&6; } + + if test $libcurl_cv_lib_curl_usable = yes ; then + + # Does curl_free() exist in this version of libcurl? + # If not, fake it with free() + + _libcurl_save_cppflags=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" + _libcurl_save_libs=$LIBS + LIBS="$LIBS $LIBCURL" + + ac_fn_c_check_func "$LINENO" "curl_free" "ac_cv_func_curl_free" +if test "x$ac_cv_func_curl_free" = xyes; then : + +else + +$as_echo "#define curl_free free" >>confdefs.h + +fi + + + CPPFLAGS=$_libcurl_save_cppflags + LIBS=$_libcurl_save_libs + unset _libcurl_save_cppflags + unset _libcurl_save_libs + + +$as_echo "#define HAVE_LIBCURL 1" >>confdefs.h + + + + + for _libcurl_feature in $_libcurl_features ; do + cat >>confdefs.h <<_ACEOF +#define `$as_echo "libcurl_feature_$_libcurl_feature" | $as_tr_cpp` 1 +_ACEOF + + eval `$as_echo "libcurl_feature_$_libcurl_feature" | $as_tr_sh`=yes + done + + if test "x$_libcurl_protocols" = "x" ; then + + # We don't have --protocols, so just assume that all + # protocols are available + _libcurl_protocols="HTTP FTP FILE TELNET LDAP DICT TFTP" + + if test x$libcurl_feature_SSL = xyes ; then + _libcurl_protocols="$_libcurl_protocols HTTPS" + + # FTPS wasn't standards-compliant until version + # 7.11.0 (0x070b00 == 461568) + if test $_libcurl_version -ge 461568; then + _libcurl_protocols="$_libcurl_protocols FTPS" + fi + fi + + # RTSP, IMAP, POP3 and SMTP were added in + # 7.20.0 (0x071400 == 463872) + if test $_libcurl_version -ge 463872; then + _libcurl_protocols="$_libcurl_protocols RTSP IMAP POP3 SMTP" + fi + fi + + for _libcurl_protocol in $_libcurl_protocols ; do + cat >>confdefs.h <<_ACEOF +#define `$as_echo "libcurl_protocol_$_libcurl_protocol" | $as_tr_cpp` 1 +_ACEOF + + eval `$as_echo "libcurl_protocol_$_libcurl_protocol" | $as_tr_sh`=yes + done + else + unset LIBCURL + unset LIBCURL_CPPFLAGS + fi + fi + + unset _libcurl_try_link + unset _libcurl_version_parse + unset _libcurl_config + unset _libcurl_feature + unset _libcurl_features + unset _libcurl_protocol + unset _libcurl_protocols + unset _libcurl_version + unset _libcurl_ldflags + fi + + if test x$_libcurl_with = xno || test x$libcurl_cv_lib_curl_usable != xyes ; then + # This is the IF-NO path + as_fn_error $? "Missing required libcurl >= 7.15.2" "$LINENO" 5 + else + # This is the IF-YES path + : + fi + + unset _libcurl_with + + + + + + + +ac_config_files="$ac_config_files Makefile compat/Makefile compat/jansson/Makefile" + + +ARCH=`uname -m` +if [ $ARCH == "x86_64" ]; +then + SUFFIX="64" +else + SUFFIX="" +fi + + +# Check whether --with-cuda was given. +if test "${with_cuda+set}" = set; then : + withval=$with_cuda; +fi + + +if test -n "$with_cuda" +then + CUDA_CFLAGS="-I$with_cuda/include" + CUDA_LIBS="-lcudart" + CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX" + NVCC="$with_cuda/bin/nvcc" +else + CUDA_CFLAGS="-I/usr/local/cuda/include" + CUDA_LIBS="-lcudart -static-libstdc++" + CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX" + NVCC="nvcc" +fi + + + + + + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WANT_JANSSON_TRUE}" && test -z "${WANT_JANSSON_FALSE}"; then + as_fn_error $? "conditional \"WANT_JANSSON\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_WINDOWS_TRUE}" && test -z "${HAVE_WINDOWS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_WINDOWS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ARCH_x86_TRUE}" && test -z "${ARCH_x86_FALSE}"; then + as_fn_error $? "conditional \"ARCH_x86\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ARCH_x86_64_TRUE}" && test -z "${ARCH_x86_64_FALSE}"; then + as_fn_error $? "conditional \"ARCH_x86_64\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by ccminer $as_me 2014.03.18, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +ccminer config.status 2014.03.18 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "cpuminer-config.h") CONFIG_HEADERS="$CONFIG_HEADERS cpuminer-config.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "compat/Makefile") CONFIG_FILES="$CONFIG_FILES compat/Makefile" ;; + "compat/jansson/Makefile") CONFIG_FILES="$CONFIG_FILES compat/jansson/Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`$as_dirname -- "$mf" || +$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$mf" : 'X\(//\)[^/]' \| \ + X"$mf" : 'X\(//\)$' \| \ + X"$mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`$as_dirname -- "$file" || +$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$file" : 'X\(//\)[^/]' \| \ + X"$file" : 'X\(//\)$' \| \ + X"$file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir=$dirpart/$fdir; as_fn_mkdir_p + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done + done +} + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..52572fc --- /dev/null +++ b/configure.ac @@ -0,0 +1,164 @@ +AC_INIT([ccminer], [2014.03.18]) + +AC_PREREQ([2.59c]) +AC_CANONICAL_SYSTEM +AC_CONFIG_SRCDIR([cpu-miner.c]) +AM_INIT_AUTOMAKE([gnu]) +AC_CONFIG_HEADERS([cpuminer-config.h]) + +dnl Make sure anyone changing configure.ac/Makefile.am has a clue +AM_MAINTAINER_MODE + +dnl Checks for programs +AC_PROG_CC_C99 +AC_PROG_GCC_TRADITIONAL +AM_PROG_CC_C_O +AM_PROG_AS +AC_PROG_RANLIB +AC_PROG_CXX +AC_OPENMP + +dnl Checks for header files +AC_HEADER_STDC +AC_CHECK_HEADERS([sys/endian.h sys/param.h syslog.h]) +# sys/sysctl.h requires sys/types.h on FreeBSD +# sys/sysctl.h requires sys/param.h on OpenBSD +AC_CHECK_HEADERS([sys/sysctl.h], [], [], +[#include +#ifdef HAVE_SYS_PARAM_H +#include +#endif +]) + +AC_CHECK_DECLS([be32dec, le32dec, be32enc, le32enc], [], [], +[AC_INCLUDES_DEFAULT +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif +]) + +AC_FUNC_ALLOCA +AC_CHECK_FUNCS([getopt_long]) + +case $target in + i*86-*-*) + have_x86=true + ;; + x86_64-*-*) + have_x86=true + have_x86_64=true + ;; + amd64-*-*) + have_x86=true + have_x86_64=true + ;; +esac + +PTHREAD_FLAGS="-pthread" +WS2_LIBS="" + +case $target in + *-*-mingw*) + have_win32=true + PTHREAD_FLAGS="" + WS2_LIBS="-lws2_32" + ;; +esac + +if test x$have_x86 = xtrue +then + AC_MSG_CHECKING(whether we can compile AVX code) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vmovdqa %ymm0, %ymm1");])], + AC_DEFINE(USE_AVX, 1, [Define to 1 if AVX assembly is available.]) + AC_MSG_RESULT(yes) + AC_MSG_CHECKING(whether we can compile XOP code) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vprotd \$7, %xmm0, %xmm1");])], + AC_DEFINE(USE_XOP, 1, [Define to 1 if XOP assembly is available.]) + AC_MSG_RESULT(yes) + , + AC_MSG_RESULT(no) + AC_MSG_WARN([The assembler does not support the XOP instruction set.]) + ) + AC_MSG_CHECKING(whether we can compile AVX2 code) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %ymm0, %ymm1, %ymm2");])], + AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.]) + AC_MSG_RESULT(yes) + , + AC_MSG_RESULT(no) + AC_MSG_WARN([The assembler does not support the AVX2 instruction set.]) + ) + , + AC_MSG_RESULT(no) + AC_MSG_WARN([The assembler does not support the AVX instruction set.]) + ) +fi + +AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true) +AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthread", + AC_CHECK_LIB([pthreadGC2], [pthread_create], PTHREAD_LIBS="-lpthreadGC2", + AC_CHECK_LIB([pthreadGC1], [pthread_create], PTHREAD_LIBS="-lpthreadGC1", + AC_CHECK_LIB([pthreadGC], [pthread_create], PTHREAD_LIBS="-lpthreadGC" +)))) + +AC_CHECK_LIB([ssl],[SSL_library_init], [], [AC_MSG_ERROR([OpenSSL library required])]) +AC_CHECK_LIB([crypto],[EVP_DigestFinal_ex], [], [AC_MSG_ERROR([OpenSSL library required])]) + +AM_CONDITIONAL([WANT_JANSSON], [test x$request_jansson = xtrue]) +AM_CONDITIONAL([HAVE_WINDOWS], [test x$have_win32 = xtrue]) +AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue]) +AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue]) + +if test x$request_jansson = xtrue +then + JANSSON_LIBS="compat/jansson/libjansson.a" +else + JANSSON_LIBS=-ljansson +fi + +LIBCURL_CHECK_CONFIG(, 7.15.2, , + [AC_MSG_ERROR([Missing required libcurl >= 7.15.2])]) + +AC_SUBST(JANSSON_LIBS) +AC_SUBST(PTHREAD_FLAGS) +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(WS2_LIBS) + +AC_CONFIG_FILES([ + Makefile + compat/Makefile + compat/jansson/Makefile + ]) + +dnl find out what version we are running +ARCH=`uname -m` +if [[ $ARCH == "x86_64" ]]; +then + SUFFIX="64" +else + SUFFIX="" +fi + +dnl Setup CUDA paths +AC_ARG_WITH([cuda], + [ --with-cuda=PATH prefix where cuda is installed [default=/usr/local/cuda]]) + +if test -n "$with_cuda" +then + CUDA_CFLAGS="-I$with_cuda/include" + CUDA_LIBS="-lcudart" + CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX" + NVCC="$with_cuda/bin/nvcc" +else + CUDA_CFLAGS="-I/usr/local/cuda/include" + CUDA_LIBS="-lcudart -static-libstdc++" + CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX" + NVCC="nvcc" +fi +AC_SUBST(CUDA_CFLAGS) +AC_SUBST(CUDA_LIBS) +AC_SUBST(CUDA_LDFLAGS) +AC_SUBST(NVCC) + +AC_SUBST(OPENMP_CFLAGS) + +AC_OUTPUT diff --git a/configure.sh b/configure.sh new file mode 100644 index 0000000..134abd1 --- /dev/null +++ b/configure.sh @@ -0,0 +1 @@ +./configure "CFLAGS=-O3" "CXXFLAGS=-O3" --with-cuda=/usr/local/cuda diff --git a/cpu-miner.c b/cpu-miner.c new file mode 100644 index 0000000..d3cb4ef --- /dev/null +++ b/cpu-miner.c @@ -0,0 +1,1523 @@ +/* + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. See COPYING for more details. + */ + +#include "cpuminer-config.h" +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef WIN32 +#include +#else +#include +#include +#include +#if HAVE_SYS_SYSCTL_H +#include +#if HAVE_SYS_PARAM_H +#include +#endif +#include +#endif +#endif +#include +#include +#include "compat.h" +#include "miner.h" + +#define PROGRAM_NAME "minerd" +#define LP_SCANTIME 60 +#define HEAVYCOIN_BLKHDR_SZ 84 + +// from heavy.cu +#ifdef __cplusplus +extern "C" +#endif +int cuda_num_devices(); + + +#ifdef __linux /* Linux specific policy and affinity management */ +#include +static inline void drop_policy(void) +{ + struct sched_param param; + param.sched_priority = 0; + +#ifdef SCHED_IDLE + if (unlikely(sched_setscheduler(0, SCHED_IDLE, ¶m) == -1)) +#endif +#ifdef SCHED_BATCH + sched_setscheduler(0, SCHED_BATCH, ¶m); +#endif +} + +static inline void affine_to_cpu(int id, int cpu) +{ + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu, &set); + sched_setaffinity(0, sizeof(&set), &set); +} +#elif defined(__FreeBSD__) /* FreeBSD specific policy and affinity management */ +#include +static inline void drop_policy(void) +{ +} + +static inline void affine_to_cpu(int id, int cpu) +{ + cpuset_t set; + CPU_ZERO(&set); + CPU_SET(cpu, &set); + cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &set); +} +#else +static inline void drop_policy(void) +{ +} + +static inline void affine_to_cpu(int id, int cpu) +{ +} +#endif + +enum workio_commands { + WC_GET_WORK, + WC_SUBMIT_WORK, +}; + +struct workio_cmd { + enum workio_commands cmd; + struct thr_info *thr; + union { + struct work *work; + } u; +}; + +typedef enum { + ALGO_SCRYPT, /* scrypt(1024,1,1) */ + ALGO_SHA256D, /* SHA-256d */ + ALGO_HEAVY, /* Heavycoin hash */ + ALGO_FUGUE256, /* Fugue256 */ +} sha256_algos; + +static const char *algo_names[] = { + "scrypt", + "sha256d", + "heavy", + "fugue256" +}; + +bool opt_debug = false; +bool opt_protocol = false; +static bool opt_benchmark = false; +bool want_longpoll = true; +bool have_longpoll = false; +bool want_stratum = true; +bool have_stratum = false; +static bool submit_old = false; +bool use_syslog = false; +static bool opt_background = false; +static bool opt_quiet = false; +static int opt_retries = -1; +static int opt_fail_pause = 30; +int opt_timeout = 270; +static int opt_scantime = 5; +static json_t *opt_config; +static const bool opt_time = true; +static sha256_algos opt_algo = ALGO_HEAVY; +static int opt_n_threads; +bool opt_trust_pool = false; +uint16_t opt_vote = 9999; +static int num_processors; +static char *rpc_url; +static char *rpc_userpass; +static char *rpc_user, *rpc_pass; +char *opt_cert; +char *opt_proxy; +long opt_proxy_type; +struct thr_info *thr_info; +static int work_thr_id; +int longpoll_thr_id = -1; +int stratum_thr_id = -1; +struct work_restart *work_restart = NULL; +static struct stratum_ctx stratum; + +pthread_mutex_t applog_lock; +static pthread_mutex_t stats_lock; + +static unsigned long accepted_count = 0L; +static unsigned long rejected_count = 0L; +static double *thr_hashrates; + +#ifdef HAVE_GETOPT_LONG +#include +#else +struct option { + const char *name; + int has_arg; + int *flag; + int val; +}; +#endif + +static char const usage[] = "\ +Usage: " PROGRAM_NAME " [OPTIONS]\n\ +Options:\n\ + -a, --algo=ALGO specify the algorithm to use\n\ + scrypt scrypt(1024, 1, 1) (default)\n\ + sha256d SHA-256d\n\ + heavy Heavycoin hash\n\ + -v, --vote=VOTE block reward vote\n\ + -m, --trust-pool trust the max block reward vote (maxvote) sent by the pool\n\ + -o, --url=URL URL of mining server\n\ + -O, --userpass=U:P username:password pair for mining server\n\ + -u, --user=USERNAME username for mining server\n\ + -p, --pass=PASSWORD password for mining server\n\ + --cert=FILE certificate for mining server using SSL\n\ + -x, --proxy=[PROTOCOL://]HOST[:PORT] connect through a proxy\n\ + -t, --threads=N number of miner threads (default: number of processors)\n\ + -r, --retries=N number of times to retry if a network call fails\n\ + (default: retry indefinitely)\n\ + -R, --retry-pause=N time to pause between retries, in seconds (default: 30)\n\ + -T, --timeout=N network timeout, in seconds (default: 270)\n\ + -s, --scantime=N upper bound on time spent scanning current work when\n\ + long polling is unavailable, in seconds (default: 5)\n\ + --no-longpoll disable X-Long-Polling support\n\ + --no-stratum disable X-Stratum support\n\ + -q, --quiet disable per-thread hashmeter output\n\ + -D, --debug enable debug output\n\ + -P, --protocol-dump verbose dump of protocol-level activities\n" +#ifdef HAVE_SYSLOG_H +"\ + -S, --syslog use system log for output messages\n" +#endif +#ifndef WIN32 +"\ + -B, --background run the miner in the background\n" +#endif +"\ + --benchmark run in offline benchmark mode\n\ + -c, --config=FILE load a JSON-format configuration file\n\ + -V, --version display version information and exit\n\ + -h, --help display this help text and exit\n\ +"; + +static char const short_options[] = +#ifndef WIN32 + "B" +#endif +#ifdef HAVE_SYSLOG_H + "S" +#endif + "a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vmv:"; + +static struct option const options[] = { + { "algo", 1, NULL, 'a' }, +#ifndef WIN32 + { "background", 0, NULL, 'B' }, +#endif + { "benchmark", 0, NULL, 1005 }, + { "cert", 1, NULL, 1001 }, + { "config", 1, NULL, 'c' }, + { "debug", 0, NULL, 'D' }, + { "help", 0, NULL, 'h' }, + { "no-longpoll", 0, NULL, 1003 }, + { "no-stratum", 0, NULL, 1007 }, + { "pass", 1, NULL, 'p' }, + { "protocol-dump", 0, NULL, 'P' }, + { "proxy", 1, NULL, 'x' }, + { "quiet", 0, NULL, 'q' }, + { "retries", 1, NULL, 'r' }, + { "retry-pause", 1, NULL, 'R' }, + { "scantime", 1, NULL, 's' }, +#ifdef HAVE_SYSLOG_H + { "syslog", 0, NULL, 'S' }, +#endif + { "threads", 1, NULL, 't' }, + { "vote", 1, NULL, 'v' }, + { "trust-pool", 0, NULL, 'm' }, + { "timeout", 1, NULL, 'T' }, + { "url", 1, NULL, 'o' }, + { "user", 1, NULL, 'u' }, + { "userpass", 1, NULL, 'O' }, + { "version", 0, NULL, 'V' }, + { 0, 0, 0, 0 } +}; + +struct work { + uint32_t data[32]; + uint32_t target[8]; + uint32_t maxvote; + + char job_id[128]; + size_t xnonce2_len; + unsigned char xnonce2[32]; +}; + +static struct work g_work; +static time_t g_work_time; +static pthread_mutex_t g_work_lock; + +static bool jobj_binary(const json_t *obj, const char *key, + void *buf, size_t buflen) +{ + const char *hexstr; + json_t *tmp; + + tmp = json_object_get(obj, key); + if (unlikely(!tmp)) { + applog(LOG_ERR, "JSON key '%s' not found", key); + return false; + } + hexstr = json_string_value(tmp); + if (unlikely(!hexstr)) { + applog(LOG_ERR, "JSON key '%s' is not a string", key); + return false; + } + if (!hex2bin((unsigned char*)buf, hexstr, buflen)) + return false; + + return true; +} + +static bool work_decode(const json_t *val, struct work *work) +{ + int i; + + if (unlikely(!jobj_binary(val, "data", work->data, sizeof(work->data)))) { + applog(LOG_ERR, "JSON inval data"); + goto err_out; + } + if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) { + applog(LOG_ERR, "JSON inval target"); + goto err_out; + } + if (opt_algo == ALGO_HEAVY) { + if (unlikely(!jobj_binary(val, "maxvote", &work->maxvote, sizeof(work->maxvote)))) { + work->maxvote = 1024; + } + } else work->maxvote = 0; + + for (i = 0; i < ARRAY_SIZE(work->data); i++) + work->data[i] = le32dec(work->data + i); + for (i = 0; i < ARRAY_SIZE(work->target); i++) + work->target[i] = le32dec(work->target + i); + + return true; + +err_out: + return false; +} + +static void share_result(int result, const char *reason) +{ + char s[345]; + double hashrate; + int i; + + hashrate = 0.; + pthread_mutex_lock(&stats_lock); + for (i = 0; i < opt_n_threads; i++) + hashrate += thr_hashrates[i]; + result ? accepted_count++ : rejected_count++; + pthread_mutex_unlock(&stats_lock); + + sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate); + applog(LOG_INFO, "accepted: %lu/%lu (%.2f%%), %s khash/s %s", + accepted_count, + accepted_count + rejected_count, + 100. * accepted_count / (accepted_count + rejected_count), + s, + result ? "(yay!!!)" : "(booooo)"); + + if (opt_debug && reason) + applog(LOG_DEBUG, "DEBUG: reject reason: %s", reason); +} + +static bool submit_upstream_work(CURL *curl, struct work *work) +{ + char *str = NULL; + json_t *val, *res, *reason; + char s[345]; + int i; + bool rc = false; + + /* pass if the previous hash is not the current previous hash */ + if (memcmp(work->data + 1, g_work.data + 1, 32)) { + if (opt_debug) + applog(LOG_DEBUG, "DEBUG: stale work detected, discarding"); + return true; + } + + if (have_stratum) { + uint32_t ntime, nonce; + uint16_t nvote; + char *ntimestr, *noncestr, *xnonce2str, *nvotestr; + + le32enc(&ntime, work->data[17]); + le32enc(&nonce, work->data[19]); + be16enc(&nvote, *((uint16_t*)&work->data[20])); + + ntimestr = bin2hex((const unsigned char *)(&ntime), 4); + noncestr = bin2hex((const unsigned char *)(&nonce), 4); + xnonce2str = bin2hex(work->xnonce2, work->xnonce2_len); + nvotestr = bin2hex((const unsigned char *)(&nvote), 2); + if (opt_algo == ALGO_HEAVY) { + sprintf(s, + "{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}", + rpc_user, work->job_id, xnonce2str, ntimestr, noncestr, nvotestr); + } else { + sprintf(s, + "{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}", + rpc_user, work->job_id, xnonce2str, ntimestr, noncestr); + } + free(ntimestr); + free(noncestr); + free(xnonce2str); + free(nvotestr); + + if (unlikely(!stratum_send_line(&stratum, s))) { + applog(LOG_ERR, "submit_upstream_work stratum_send_line failed"); + goto out; + } + } else { + + /* build hex string */ + + if (opt_algo != ALGO_HEAVY) { + for (i = 0; i < ARRAY_SIZE(work->data); i++) + le32enc(work->data + i, work->data[i]); + } + str = bin2hex((unsigned char *)work->data, sizeof(work->data)); + if (unlikely(!str)) { + applog(LOG_ERR, "submit_upstream_work OOM"); + goto out; + } + + /* build JSON-RPC request */ + sprintf(s, + "{\"method\": \"getwork\", \"params\": [ \"%s\" ], \"id\":1}\r\n", + str); + + /* issue JSON-RPC request */ + val = json_rpc_call(curl, rpc_url, rpc_userpass, s, false, false, NULL); + if (unlikely(!val)) { + applog(LOG_ERR, "submit_upstream_work json_rpc_call failed"); + goto out; + } + + res = json_object_get(val, "result"); + reason = json_object_get(val, "reject-reason"); + share_result(json_is_true(res), reason ? json_string_value(reason) : NULL); + + json_decref(val); + } + + rc = true; + +out: + free(str); + return rc; +} + +static const char *rpc_req = + "{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n"; + +static bool get_upstream_work(CURL *curl, struct work *work) +{ + json_t *val; + bool rc; + struct timeval tv_start, tv_end, diff; + + gettimeofday(&tv_start, NULL); + val = json_rpc_call(curl, rpc_url, rpc_userpass, rpc_req, + want_longpoll, false, NULL); + gettimeofday(&tv_end, NULL); + + if (have_stratum) { + if (val) + json_decref(val); + return true; + } + + if (!val) + return false; + + rc = work_decode(json_object_get(val, "result"), work); + + if (opt_debug && rc) { + timeval_subtract(&diff, &tv_end, &tv_start); + applog(LOG_DEBUG, "DEBUG: got new work in %d ms", + diff.tv_sec * 1000 + diff.tv_usec / 1000); + } + + json_decref(val); + + return rc; +} + +static void workio_cmd_free(struct workio_cmd *wc) +{ + if (!wc) + return; + + switch (wc->cmd) { + case WC_SUBMIT_WORK: + free(wc->u.work); + break; + default: /* do nothing */ + break; + } + + memset(wc, 0, sizeof(*wc)); /* poison */ + free(wc); +} + +static bool workio_get_work(struct workio_cmd *wc, CURL *curl) +{ + struct work *ret_work; + int failures = 0; + + ret_work = (struct work*)calloc(1, sizeof(*ret_work)); + if (!ret_work) + return false; + + /* obtain new work from bitcoin via JSON-RPC */ + while (!get_upstream_work(curl, ret_work)) { + if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) { + applog(LOG_ERR, "json_rpc_call failed, terminating workio thread"); + free(ret_work); + return false; + } + + /* pause, then restart work-request loop */ + applog(LOG_ERR, "json_rpc_call failed, retry after %d seconds", + opt_fail_pause); + sleep(opt_fail_pause); + } + + /* send work to requesting thread */ + if (!tq_push(wc->thr->q, ret_work)) + free(ret_work); + + return true; +} + +static bool workio_submit_work(struct workio_cmd *wc, CURL *curl) +{ + int failures = 0; + + /* submit solution to bitcoin via JSON-RPC */ + while (!submit_upstream_work(curl, wc->u.work)) { + if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) { + applog(LOG_ERR, "...terminating workio thread"); + return false; + } + + /* pause, then restart work-request loop */ + applog(LOG_ERR, "...retry after %d seconds", + opt_fail_pause); + sleep(opt_fail_pause); + } + + return true; +} + +static void *workio_thread(void *userdata) +{ + struct thr_info *mythr = (struct thr_info*)userdata; + CURL *curl; + bool ok = true; + + curl = curl_easy_init(); + if (unlikely(!curl)) { + applog(LOG_ERR, "CURL initialization failed"); + return NULL; + } + + while (ok) { + struct workio_cmd *wc; + + /* wait for workio_cmd sent to us, on our queue */ + wc = (struct workio_cmd *)tq_pop(mythr->q, NULL); + if (!wc) { + ok = false; + break; + } + + /* process workio_cmd */ + switch (wc->cmd) { + case WC_GET_WORK: + ok = workio_get_work(wc, curl); + break; + case WC_SUBMIT_WORK: + ok = workio_submit_work(wc, curl); + break; + + default: /* should never happen */ + ok = false; + break; + } + + workio_cmd_free(wc); + } + + tq_freeze(mythr->q); + curl_easy_cleanup(curl); + + return NULL; +} + +static bool get_work(struct thr_info *thr, struct work *work) +{ + struct workio_cmd *wc; + struct work *work_heap; + + if (opt_benchmark) { + memset(work->data, 0x55, 76); + work->data[17] = swab32((uint32_t)time(NULL)); + memset(work->data + 19, 0x00, 52); + work->data[20] = 0x80000000; + work->data[31] = 0x00000280; + memset(work->target, 0x00, sizeof(work->target)); + return true; + } + + /* fill out work request message */ + wc = (struct workio_cmd *)calloc(1, sizeof(*wc)); + if (!wc) + return false; + + wc->cmd = WC_GET_WORK; + wc->thr = thr; + + /* send work request to workio thread */ + if (!tq_push(thr_info[work_thr_id].q, wc)) { + workio_cmd_free(wc); + return false; + } + + /* wait for response, a unit of work */ + work_heap = (struct work *)tq_pop(thr->q, NULL); + if (!work_heap) + return false; + + /* copy returned work into storage provided by caller */ + memcpy(work, work_heap, sizeof(*work)); + free(work_heap); + + return true; +} + +static bool submit_work(struct thr_info *thr, const struct work *work_in) +{ + struct workio_cmd *wc; + /* fill out work request message */ + wc = (struct workio_cmd *)calloc(1, sizeof(*wc)); + if (!wc) + return false; + + wc->u.work = (struct work *)malloc(sizeof(*work_in)); + if (!wc->u.work) + goto err_out; + + wc->cmd = WC_SUBMIT_WORK; + wc->thr = thr; + memcpy(wc->u.work, work_in, sizeof(*work_in)); + + /* send solution to workio thread */ + if (!tq_push(thr_info[work_thr_id].q, wc)) + goto err_out; + + return true; + +err_out: + workio_cmd_free(wc); + return false; +} + +static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) +{ + unsigned char merkle_root[64]; + int i; + + pthread_mutex_lock(&sctx->work_lock); + + strcpy(work->job_id, sctx->job.job_id); + work->xnonce2_len = sctx->xnonce2_size; + memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size); + + /* Generate merkle root */ + if (opt_algo == ALGO_HEAVY) + heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); + else + sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); + for (i = 0; i < sctx->job.merkle_count; i++) { + memcpy(merkle_root + 32, sctx->job.merkle[i], 32); + if (opt_algo == ALGO_HEAVY) + heavycoin_hash(merkle_root, merkle_root, 64); + else + sha256d(merkle_root, merkle_root, 64); + } + + /* Increment extranonce2 */ + for (i = 0; i < (int)sctx->xnonce2_size && !++sctx->job.xnonce2[i]; i++); + + /* Assemble block header */ + memset(work->data, 0, 128); + work->data[0] = le32dec(sctx->job.version); + for (i = 0; i < 8; i++) + work->data[1 + i] = le32dec((uint32_t *)sctx->job.prevhash + i); + for (i = 0; i < 8; i++) + work->data[9 + i] = be32dec((uint32_t *)merkle_root + i); + work->data[17] = le32dec(sctx->job.ntime); + work->data[18] = le32dec(sctx->job.nbits); + work->data[20] = 0x80000000; + work->data[31] = 0x00000280; + + // HeavyCoin + if (opt_algo == ALGO_HEAVY) { + uint16_t *ext; + work->maxvote = 1024; + ext = (uint16_t*)(&work->data[20]); + ext[0] = opt_vote; + ext[1] = be16dec(sctx->job.nreward); + + for (i = 0; i < 20; i++) + work->data[i] = be32dec((uint32_t *)&work->data[i]); + } + // + + pthread_mutex_unlock(&sctx->work_lock); + + if (opt_debug) { + char *xnonce2str = bin2hex(work->xnonce2, sctx->xnonce2_size); + applog(LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x", + work->job_id, xnonce2str, swab32(work->data[17])); + free(xnonce2str); + } + + if (opt_algo == ALGO_SCRYPT) + diff_to_target(work->target, sctx->job.diff / 65536.0); + else + diff_to_target(work->target, sctx->job.diff); +} + +static void *miner_thread(void *userdata) +{ + struct thr_info *mythr = (struct thr_info *)userdata; + int thr_id = mythr->id; + struct work work; + uint32_t max_nonce; + uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; + unsigned char *scratchbuf = NULL; + char s[16]; + int i; + + memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized + + /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE + * and if that fails, then SCHED_BATCH. No need for this to be an + * error if it fails */ + if (!opt_benchmark) { + setpriority(PRIO_PROCESS, 0, 19); + drop_policy(); + } + + /* Cpu affinity only makes sense if the number of threads is a multiple + * of the number of CPUs */ + if (num_processors > 1 && opt_n_threads % num_processors == 0) { + if (!opt_quiet) + applog(LOG_INFO, "Binding thread %d to cpu %d", + thr_id, thr_id % num_processors); + affine_to_cpu(thr_id, thr_id % num_processors); + } + + if (opt_algo == ALGO_SCRYPT) + { + scratchbuf = scrypt_buffer_alloc(); + } + + while (1) { + unsigned long hashes_done; + struct timeval tv_start, tv_end, diff; + int64_t max64; + int rc; + + if (have_stratum) { + while (time(NULL) >= g_work_time + 120) + sleep(1); + pthread_mutex_lock(&g_work_lock); + if (work.data[19] >= end_nonce) + stratum_gen_work(&stratum, &g_work); + } else { + /* obtain new work from internal workio thread */ + pthread_mutex_lock(&g_work_lock); + if (!have_stratum && (!have_longpoll || + time(NULL) >= g_work_time + LP_SCANTIME*3/4 || + work.data[19] >= end_nonce)) { + if (unlikely(!get_work(mythr, &g_work))) { + applog(LOG_ERR, "work retrieval failed, exiting " + "mining thread %d", mythr->id); + pthread_mutex_unlock(&g_work_lock); + goto out; + } + g_work_time = have_stratum ? 0 : time(NULL); + } + if (have_stratum) { + pthread_mutex_unlock(&g_work_lock); + continue; + } + } + if (memcmp(work.data, g_work.data, 76)) { + memcpy(&work, &g_work, sizeof(struct work)); + work.data[19] = 0xffffffffU / opt_n_threads * thr_id; + } else + work.data[19]++; + pthread_mutex_unlock(&g_work_lock); + work_restart[thr_id].restart = 0; + + /* adjust max_nonce to meet target scan time */ + if (have_stratum) + max64 = LP_SCANTIME; + else + max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime) + - time(NULL); + max64 *= (int64_t)thr_hashrates[thr_id]; + if (max64 <= 0) + max64 = opt_algo == ALGO_SCRYPT ? 0xfffLL : 0x1fffffLL; + if (work.data[19] + max64 > end_nonce) + max_nonce = end_nonce; + else + max_nonce = (uint32_t)(work.data[19] + max64); + + hashes_done = 0; + gettimeofday(&tv_start, NULL); + + /* scan nonces for a proof-of-work hash */ + switch (opt_algo) { + case ALGO_SCRYPT: + rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target, + max_nonce, &hashes_done); + break; + + case ALGO_SHA256D: + rc = scanhash_sha256d(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + + case ALGO_HEAVY: + rc = scanhash_heavy(thr_id, work.data, work.target, + max_nonce, &hashes_done, work.maxvote); + break; + + case ALGO_FUGUE256: + rc = scanhash_fugue256(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + + default: + /* should never happen */ + goto out; + } + + /* record scanhash elapsed time */ + gettimeofday(&tv_end, NULL); + timeval_subtract(&diff, &tv_end, &tv_start); + if (diff.tv_usec || diff.tv_sec) { + pthread_mutex_lock(&stats_lock); + thr_hashrates[thr_id] = + hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); + pthread_mutex_unlock(&stats_lock); + } + if (!opt_quiet) { + sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f", + 1e-3 * thr_hashrates[thr_id]); + applog(LOG_INFO, "thread %d: %lu hashes, %s khash/s", + thr_id, hashes_done, s); + } + if (opt_benchmark && thr_id == opt_n_threads - 1) { + double hashrate = 0.; + for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++) + hashrate += thr_hashrates[i]; + if (i == opt_n_threads) { + sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate); + applog(LOG_INFO, "Total: %s khash/s", s); + } + } + + /* if nonce found, submit work */ + if (rc && !opt_benchmark && !submit_work(mythr, &work)) + break; + } + +out: + tq_freeze(mythr->q); + + return NULL; +} + +static void restart_threads(void) +{ + int i; + + for (i = 0; i < opt_n_threads; i++) + work_restart[i].restart = 1; +} + +static void *longpoll_thread(void *userdata) +{ + struct thr_info *mythr = (struct thr_info *)userdata; + CURL *curl = NULL; + char *copy_start, *hdr_path = NULL, *lp_url = NULL; + bool need_slash = false; + + curl = curl_easy_init(); + if (unlikely(!curl)) { + applog(LOG_ERR, "CURL initialization failed"); + goto out; + } + +start: + hdr_path = (char*)tq_pop(mythr->q, NULL); + if (!hdr_path) + goto out; + + /* full URL */ + if (strstr(hdr_path, "://")) { + lp_url = hdr_path; + hdr_path = NULL; + } + + /* absolute path, on current server */ + else { + copy_start = (*hdr_path == '/') ? (hdr_path + 1) : hdr_path; + if (rpc_url[strlen(rpc_url) - 1] != '/') + need_slash = true; + + lp_url = (char*)malloc(strlen(rpc_url) + strlen(copy_start) + 2); + if (!lp_url) + goto out; + + sprintf(lp_url, "%s%s%s", rpc_url, need_slash ? "/" : "", copy_start); + } + + applog(LOG_INFO, "Long-polling activated for %s", lp_url); + + while (1) { + json_t *val, *soval; + int err; + + val = json_rpc_call(curl, lp_url, rpc_userpass, rpc_req, + false, true, &err); + if (have_stratum) { + if (val) + json_decref(val); + goto out; + } + if (likely(val)) { + applog(LOG_INFO, "LONGPOLL detected new block"); + soval = json_object_get(json_object_get(val, "result"), "submitold"); + submit_old = soval ? json_is_true(soval) : false; + pthread_mutex_lock(&g_work_lock); + if (work_decode(json_object_get(val, "result"), &g_work)) { + if (opt_debug) + applog(LOG_DEBUG, "DEBUG: got new work"); + time(&g_work_time); + restart_threads(); + } + pthread_mutex_unlock(&g_work_lock); + json_decref(val); + } else { + pthread_mutex_lock(&g_work_lock); + g_work_time -= LP_SCANTIME; + pthread_mutex_unlock(&g_work_lock); + if (err == CURLE_OPERATION_TIMEDOUT) { + restart_threads(); + } else { + have_longpoll = false; + restart_threads(); + free(hdr_path); + free(lp_url); + lp_url = NULL; + sleep(opt_fail_pause); + goto start; + } + } + } + +out: + free(hdr_path); + free(lp_url); + tq_freeze(mythr->q); + if (curl) + curl_easy_cleanup(curl); + + return NULL; +} + +static bool stratum_handle_response(char *buf) +{ + json_t *val, *err_val, *res_val, *id_val; + json_error_t err; + bool ret = false; + + val = JSON_LOADS(buf, &err); + if (!val) { + applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text); + goto out; + } + + res_val = json_object_get(val, "result"); + err_val = json_object_get(val, "error"); + id_val = json_object_get(val, "id"); + + if (!id_val || json_is_null(id_val) || !res_val) + goto out; + + share_result(json_is_true(res_val), + err_val ? json_string_value(json_array_get(err_val, 1)) : NULL); + + ret = true; +out: + if (val) + json_decref(val); + + return ret; +} + +static void *stratum_thread(void *userdata) +{ + struct thr_info *mythr = (struct thr_info *)userdata; + char *s; + + stratum.url = (char*)tq_pop(mythr->q, NULL); + if (!stratum.url) + goto out; + applog(LOG_INFO, "Starting Stratum on %s", stratum.url); + + while (1) { + int failures = 0; + + while (!stratum.curl) { + pthread_mutex_lock(&g_work_lock); + g_work_time = 0; + pthread_mutex_unlock(&g_work_lock); + restart_threads(); + + if (!stratum_connect(&stratum, stratum.url) || + !stratum_subscribe(&stratum) || + !stratum_authorize(&stratum, rpc_user, rpc_pass)) { + stratum_disconnect(&stratum); + if (opt_retries >= 0 && ++failures > opt_retries) { + applog(LOG_ERR, "...terminating workio thread"); + tq_push(thr_info[work_thr_id].q, NULL); + goto out; + } + applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause); + sleep(opt_fail_pause); + } + } + + if (stratum.job.job_id && + (strcmp(stratum.job.job_id, g_work.job_id) || !g_work_time)) { + pthread_mutex_lock(&g_work_lock); + stratum_gen_work(&stratum, &g_work); + time(&g_work_time); + pthread_mutex_unlock(&g_work_lock); + if (stratum.job.clean) { + applog(LOG_INFO, "Stratum detected new block"); + restart_threads(); + } + } + + if (!stratum_socket_full(&stratum, 120)) { + applog(LOG_ERR, "Stratum connection timed out"); + s = NULL; + } else + s = stratum_recv_line(&stratum); + if (!s) { + stratum_disconnect(&stratum); + applog(LOG_ERR, "Stratum connection interrupted"); + continue; + } + if (!stratum_handle_method(&stratum, s)) + stratum_handle_response(s); + free(s); + } + +out: + return NULL; +} + +static void show_version_and_exit(void) +{ + printf("%s\n%s\n", PACKAGE_STRING, curl_version()); + exit(0); +} + +static void show_usage_and_exit(int status) +{ + if (status) + fprintf(stderr, "Try `" PROGRAM_NAME " --help' for more information.\n"); + else + printf(usage); + exit(status); +} + +static void parse_arg (int key, char *arg) +{ + char *p; + int v, i; + + switch(key) { + case 'a': + for (i = 0; i < ARRAY_SIZE(algo_names); i++) { + if (algo_names[i] && + !strcmp(arg, algo_names[i])) { + opt_algo = (sha256_algos)i; + break; + } + } + if (i == ARRAY_SIZE(algo_names)) + show_usage_and_exit(1); + break; + case 'B': + opt_background = true; + break; + case 'c': { + json_error_t err; + if (opt_config) + json_decref(opt_config); +#if JANSSON_VERSION_HEX >= 0x020000 + opt_config = json_load_file(arg, 0, &err); +#else + opt_config = json_load_file(arg, &err); +#endif + if (!json_is_object(opt_config)) { + applog(LOG_ERR, "JSON decode of %s failed", arg); + exit(1); + } + break; + } + case 'q': + opt_quiet = true; + break; + case 'D': + opt_debug = true; + break; + case 'p': + free(rpc_pass); + rpc_pass = strdup(arg); + break; + case 'P': + opt_protocol = true; + break; + case 'r': + v = atoi(arg); + if (v < -1 || v > 9999) /* sanity check */ + show_usage_and_exit(1); + opt_retries = v; + break; + case 'R': + v = atoi(arg); + if (v < 1 || v > 9999) /* sanity check */ + show_usage_and_exit(1); + opt_fail_pause = v; + break; + case 's': + v = atoi(arg); + if (v < 1 || v > 9999) /* sanity check */ + show_usage_and_exit(1); + opt_scantime = v; + break; + case 'T': + v = atoi(arg); + if (v < 1 || v > 99999) /* sanity check */ + show_usage_and_exit(1); + opt_timeout = v; + break; + case 't': + v = atoi(arg); + if (v < 1 || v > 9999) /* sanity check */ + show_usage_and_exit(1); + opt_n_threads = v; + break; + case 'v': + v = atoi(arg); + if (v < 0 || v > 1024) /* sanity check */ + show_usage_and_exit(1); + opt_vote = (uint16_t)v; + break; + case 'm': + opt_trust_pool = true; + break; + case 'u': + free(rpc_user); + rpc_user = strdup(arg); + break; + case 'o': /* --url */ + p = strstr(arg, "://"); + if (p) { + if (strncasecmp(arg, "http://", 7) && strncasecmp(arg, "https://", 8) && + strncasecmp(arg, "stratum+tcp://", 14)) + show_usage_and_exit(1); + free(rpc_url); + rpc_url = strdup(arg); + } else { + if (!strlen(arg) || *arg == '/') + show_usage_and_exit(1); + free(rpc_url); + rpc_url = (char*)malloc(strlen(arg) + 8); + sprintf(rpc_url, "http://%s", arg); + } + p = strrchr(rpc_url, '@'); + if (p) { + char *sp, *ap; + *p = '\0'; + ap = strstr(rpc_url, "://") + 3; + sp = strchr(ap, ':'); + if (sp) { + free(rpc_userpass); + rpc_userpass = strdup(ap); + free(rpc_user); + rpc_user = (char*)calloc(sp - ap + 1, 1); + strncpy(rpc_user, ap, sp - ap); + free(rpc_pass); + rpc_pass = strdup(sp + 1); + } else { + free(rpc_user); + rpc_user = strdup(ap); + } + memmove(ap, p + 1, strlen(p + 1) + 1); + } + have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7); + break; + case 'O': /* --userpass */ + p = strchr(arg, ':'); + if (!p) + show_usage_and_exit(1); + free(rpc_userpass); + rpc_userpass = strdup(arg); + free(rpc_user); + rpc_user = (char*)calloc(p - arg + 1, 1); + strncpy(rpc_user, arg, p - arg); + free(rpc_pass); + rpc_pass = strdup(p + 1); + break; + case 'x': /* --proxy */ + if (!strncasecmp(arg, "socks4://", 9)) + opt_proxy_type = CURLPROXY_SOCKS4; + else if (!strncasecmp(arg, "socks5://", 9)) + opt_proxy_type = CURLPROXY_SOCKS5; +#if LIBCURL_VERSION_NUM >= 0x071200 + else if (!strncasecmp(arg, "socks4a://", 10)) + opt_proxy_type = CURLPROXY_SOCKS4A; + else if (!strncasecmp(arg, "socks5h://", 10)) + opt_proxy_type = CURLPROXY_SOCKS5_HOSTNAME; +#endif + else + opt_proxy_type = CURLPROXY_HTTP; + free(opt_proxy); + opt_proxy = strdup(arg); + break; + case 1001: + free(opt_cert); + opt_cert = strdup(arg); + break; + case 1005: + opt_benchmark = true; + want_longpoll = false; + want_stratum = false; + have_stratum = false; + break; + case 1003: + want_longpoll = false; + break; + case 1007: + want_stratum = false; + break; + case 'S': + use_syslog = true; + break; + case 'V': + show_version_and_exit(); + case 'h': + show_usage_and_exit(0); + default: + show_usage_and_exit(1); + } +} + +static void parse_config(void) +{ + int i; + json_t *val; + + if (!json_is_object(opt_config)) + return; + + for (i = 0; i < ARRAY_SIZE(options); i++) { + if (!options[i].name) + break; + if (!strcmp(options[i].name, "config")) + continue; + + val = json_object_get(opt_config, options[i].name); + if (!val) + continue; + + if (options[i].has_arg && json_is_string(val)) { + char *s = strdup(json_string_value(val)); + if (!s) + break; + parse_arg(options[i].val, s); + free(s); + } else if (!options[i].has_arg && json_is_true(val)) + parse_arg(options[i].val, ""); + else + applog(LOG_ERR, "JSON option %s invalid", + options[i].name); + } + + if (opt_algo == ALGO_HEAVY && opt_vote == 9999) { + fprintf(stderr, "Heavycoin hash requires block reward vote parameter (see --vote)\n"); + show_usage_and_exit(1); + } +} + +static void parse_cmdline(int argc, char *argv[]) +{ + int key; + + while (1) { +#if HAVE_GETOPT_LONG + key = getopt_long(argc, argv, short_options, options, NULL); +#else + key = getopt(argc, argv, short_options); +#endif + if (key < 0) + break; + + parse_arg(key, optarg); + } + if (optind < argc) { + fprintf(stderr, "%s: unsupported non-option argument '%s'\n", + argv[0], argv[optind]); + show_usage_and_exit(1); + } + + if (opt_algo == ALGO_HEAVY && opt_vote == 9999) { + fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n", + argv[0]); + show_usage_and_exit(1); + } + + parse_config(); +} + +#ifndef WIN32 +static void signal_handler(int sig) +{ + switch (sig) { + case SIGHUP: + applog(LOG_INFO, "SIGHUP received"); + break; + case SIGINT: + applog(LOG_INFO, "SIGINT received, exiting"); + exit(0); + break; + case SIGTERM: + applog(LOG_INFO, "SIGTERM received, exiting"); + exit(0); + break; + } +} +#endif + +#define PROGRAM_VERSION "0.1" +int main(int argc, char *argv[]) +{ + struct thr_info *thr; + long flags; + int i; + +#ifdef WIN32 + SYSTEM_INFO sysinfo; +#endif + + printf(" *** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n"); + printf("\t This is version "PROGRAM_VERSION" (beta)\n"); + printf("\t based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n"); + printf("\t based on pooler-cpuminer extension for HVC from\n\t https://github.com/heavycoin/cpuminer-heavycoin\n"); + printf("\t\t\tand\n\t http://hvc.1gh.com/\n"); + printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n"); + printf("\t LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm\n"); + printf("\t BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n"); + printf("\t YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4\n"); + + rpc_user = strdup(""); + rpc_pass = strdup(""); + + /* parse command line */ + parse_cmdline(argc, argv); + + if (!opt_benchmark && !rpc_url) { + fprintf(stderr, "%s: no URL supplied\n", argv[0]); + show_usage_and_exit(1); + } + + if (!rpc_userpass) { + rpc_userpass = (char*)malloc(strlen(rpc_user) + strlen(rpc_pass) + 2); + if (!rpc_userpass) + return 1; + sprintf(rpc_userpass, "%s:%s", rpc_user, rpc_pass); + } + + pthread_mutex_init(&applog_lock, NULL); + pthread_mutex_init(&stats_lock, NULL); + pthread_mutex_init(&g_work_lock, NULL); + pthread_mutex_init(&stratum.sock_lock, NULL); + pthread_mutex_init(&stratum.work_lock, NULL); + + flags = !opt_benchmark && strncmp(rpc_url, "https:", 6) + ? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL) + : CURL_GLOBAL_ALL; + if (curl_global_init(flags)) { + applog(LOG_ERR, "CURL initialization failed"); + return 1; + } + +#ifndef WIN32 + if (opt_background) { + i = fork(); + if (i < 0) exit(1); + if (i > 0) exit(0); + i = setsid(); + if (i < 0) + applog(LOG_ERR, "setsid() failed (errno = %d)", errno); + i = chdir("/"); + if (i < 0) + applog(LOG_ERR, "chdir() failed (errno = %d)", errno); + signal(SIGHUP, signal_handler); + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + } +#endif + + num_processors = cuda_num_devices(); + if (num_processors == 0) + { + applog(LOG_ERR, "No CUDA devices found! terminating."); + exit(1); + } + if (!opt_n_threads) + opt_n_threads = num_processors; + +#ifdef HAVE_SYSLOG_H + if (use_syslog) + openlog("cpuminer", LOG_PID, LOG_USER); +#endif + + work_restart = (struct work_restart *)calloc(opt_n_threads, sizeof(*work_restart)); + if (!work_restart) + return 1; + + thr_info = (struct thr_info *)calloc(opt_n_threads + 3, sizeof(*thr)); + if (!thr_info) + return 1; + + thr_hashrates = (double *) calloc(opt_n_threads, sizeof(double)); + if (!thr_hashrates) + return 1; + + /* init workio thread info */ + work_thr_id = opt_n_threads; + thr = &thr_info[work_thr_id]; + thr->id = work_thr_id; + thr->q = tq_new(); + if (!thr->q) + return 1; + + /* start work I/O thread */ + if (pthread_create(&thr->pth, NULL, workio_thread, thr)) { + applog(LOG_ERR, "workio thread create failed"); + return 1; + } + + if (want_longpoll && !have_stratum) { + /* init longpoll thread info */ + longpoll_thr_id = opt_n_threads + 1; + thr = &thr_info[longpoll_thr_id]; + thr->id = longpoll_thr_id; + thr->q = tq_new(); + if (!thr->q) + return 1; + + /* start longpoll thread */ + if (unlikely(pthread_create(&thr->pth, NULL, longpoll_thread, thr))) { + applog(LOG_ERR, "longpoll thread create failed"); + return 1; + } + } + if (want_stratum) { + /* init stratum thread info */ + stratum_thr_id = opt_n_threads + 2; + thr = &thr_info[stratum_thr_id]; + thr->id = stratum_thr_id; + thr->q = tq_new(); + if (!thr->q) + return 1; + + /* start stratum thread */ + if (unlikely(pthread_create(&thr->pth, NULL, stratum_thread, thr))) { + applog(LOG_ERR, "stratum thread create failed"); + return 1; + } + + if (have_stratum) + tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url)); + } + + /* start mining threads */ + for (i = 0; i < opt_n_threads; i++) { + thr = &thr_info[i]; + + thr->id = i; + thr->q = tq_new(); + if (!thr->q) + return 1; + + if (unlikely(pthread_create(&thr->pth, NULL, miner_thread, thr))) { + applog(LOG_ERR, "thread %d create failed", i); + return 1; + } + } + + applog(LOG_INFO, "%d miner threads started, " + "using '%s' algorithm.", + opt_n_threads, + algo_names[opt_algo]); + + /* main loop - simply wait for workio thread to exit */ + pthread_join(thr_info[work_thr_id].pth, NULL); + + applog(LOG_INFO, "workio thread dead, exiting."); + + return 0; +} diff --git a/cpuminer-config.h b/cpuminer-config.h new file mode 100644 index 0000000..bdd09fe --- /dev/null +++ b/cpuminer-config.h @@ -0,0 +1,190 @@ +/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to 1 if you have `alloca', as a function or macro. */ +#undef HAVE_ALLOCA + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Define to 1 if you have the declaration of `be32dec', and to 0 if you + don't. */ +#undef HAVE_DECL_BE32DEC + +/* Define to 1 if you have the declaration of `be32enc', and to 0 if you + don't. */ +#undef HAVE_DECL_BE32ENC + +/* Define to 1 if you have the declaration of `le32dec', and to 0 if you + don't. */ +#undef HAVE_DECL_LE32DEC + +/* Define to 1 if you have the declaration of `le32enc', and to 0 if you + don't. */ +#undef HAVE_DECL_LE32ENC + +/* Define to 1 if you have the `getopt_long' function. */ +#define HAVE_GETOPT_LONG 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have a functional curl library. */ +#undef HAVE_LIBCURL + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYSLOG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_ENDIAN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SYSCTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Defined if libcurl supports AsynchDNS */ +#undef LIBCURL_FEATURE_ASYNCHDNS + +/* Defined if libcurl supports IDN */ +#undef LIBCURL_FEATURE_IDN + +/* Defined if libcurl supports IPv6 */ +#undef LIBCURL_FEATURE_IPV6 + +/* Defined if libcurl supports KRB4 */ +#undef LIBCURL_FEATURE_KRB4 + +/* Defined if libcurl supports libz */ +#undef LIBCURL_FEATURE_LIBZ + +/* Defined if libcurl supports NTLM */ +#undef LIBCURL_FEATURE_NTLM + +/* Defined if libcurl supports SSL */ +#undef LIBCURL_FEATURE_SSL + +/* Defined if libcurl supports SSPI */ +#undef LIBCURL_FEATURE_SSPI + +/* Defined if libcurl supports DICT */ +#undef LIBCURL_PROTOCOL_DICT + +/* Defined if libcurl supports FILE */ +#undef LIBCURL_PROTOCOL_FILE + +/* Defined if libcurl supports FTP */ +#undef LIBCURL_PROTOCOL_FTP + +/* Defined if libcurl supports FTPS */ +#undef LIBCURL_PROTOCOL_FTPS + +/* Defined if libcurl supports HTTP */ +#undef LIBCURL_PROTOCOL_HTTP + +/* Defined if libcurl supports HTTPS */ +#undef LIBCURL_PROTOCOL_HTTPS + +/* Defined if libcurl supports IMAP */ +#undef LIBCURL_PROTOCOL_IMAP + +/* Defined if libcurl supports LDAP */ +#undef LIBCURL_PROTOCOL_LDAP + +/* Defined if libcurl supports POP3 */ +#undef LIBCURL_PROTOCOL_POP3 + +/* Defined if libcurl supports RTSP */ +#undef LIBCURL_PROTOCOL_RTSP + +/* Defined if libcurl supports SMTP */ +#undef LIBCURL_PROTOCOL_SMTP + +/* Defined if libcurl supports TELNET */ +#undef LIBCURL_PROTOCOL_TELNET + +/* Defined if libcurl supports TFTP */ +#undef LIBCURL_PROTOCOL_TFTP + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "ccminer" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "ccminer 2014.03.18" + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2014.03.18" + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if AVX assembly is available. */ +#undef USE_AVX + +/* Define to 1 if XOP assembly is available. */ +#undef USE_XOP + +/* Version number of package */ +#undef VERSION + +/* Define curl_free() as free() if our version of curl lacks curl_free. */ +#undef curl_free + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff --git a/cpuminer-config.h.in b/cpuminer-config.h.in new file mode 100644 index 0000000..b8668be --- /dev/null +++ b/cpuminer-config.h.in @@ -0,0 +1,199 @@ +/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to 1 if you have `alloca', as a function or macro. */ +#undef HAVE_ALLOCA + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Define to 1 if you have the declaration of `be32dec', and to 0 if you + don't. */ +#undef HAVE_DECL_BE32DEC + +/* Define to 1 if you have the declaration of `be32enc', and to 0 if you + don't. */ +#undef HAVE_DECL_BE32ENC + +/* Define to 1 if you have the declaration of `le32dec', and to 0 if you + don't. */ +#undef HAVE_DECL_LE32DEC + +/* Define to 1 if you have the declaration of `le32enc', and to 0 if you + don't. */ +#undef HAVE_DECL_LE32ENC + +/* Define to 1 if you have the `getopt_long' function. */ +#undef HAVE_GETOPT_LONG + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#undef HAVE_LIBCRYPTO + +/* Define to 1 if you have a functional curl library. */ +#undef HAVE_LIBCURL + +/* Define to 1 if you have the `ssl' library (-lssl). */ +#undef HAVE_LIBSSL + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYSLOG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_ENDIAN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SYSCTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Defined if libcurl supports AsynchDNS */ +#undef LIBCURL_FEATURE_ASYNCHDNS + +/* Defined if libcurl supports IDN */ +#undef LIBCURL_FEATURE_IDN + +/* Defined if libcurl supports IPv6 */ +#undef LIBCURL_FEATURE_IPV6 + +/* Defined if libcurl supports KRB4 */ +#undef LIBCURL_FEATURE_KRB4 + +/* Defined if libcurl supports libz */ +#undef LIBCURL_FEATURE_LIBZ + +/* Defined if libcurl supports NTLM */ +#undef LIBCURL_FEATURE_NTLM + +/* Defined if libcurl supports SSL */ +#undef LIBCURL_FEATURE_SSL + +/* Defined if libcurl supports SSPI */ +#undef LIBCURL_FEATURE_SSPI + +/* Defined if libcurl supports DICT */ +#undef LIBCURL_PROTOCOL_DICT + +/* Defined if libcurl supports FILE */ +#undef LIBCURL_PROTOCOL_FILE + +/* Defined if libcurl supports FTP */ +#undef LIBCURL_PROTOCOL_FTP + +/* Defined if libcurl supports FTPS */ +#undef LIBCURL_PROTOCOL_FTPS + +/* Defined if libcurl supports HTTP */ +#undef LIBCURL_PROTOCOL_HTTP + +/* Defined if libcurl supports HTTPS */ +#undef LIBCURL_PROTOCOL_HTTPS + +/* Defined if libcurl supports IMAP */ +#undef LIBCURL_PROTOCOL_IMAP + +/* Defined if libcurl supports LDAP */ +#undef LIBCURL_PROTOCOL_LDAP + +/* Defined if libcurl supports POP3 */ +#undef LIBCURL_PROTOCOL_POP3 + +/* Defined if libcurl supports RTSP */ +#undef LIBCURL_PROTOCOL_RTSP + +/* Defined if libcurl supports SMTP */ +#undef LIBCURL_PROTOCOL_SMTP + +/* Defined if libcurl supports TELNET */ +#undef LIBCURL_PROTOCOL_TELNET + +/* Defined if libcurl supports TFTP */ +#undef LIBCURL_PROTOCOL_TFTP + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if AVX assembly is available. */ +#undef USE_AVX + +/* Define to 1 if AVX2 assembly is available. */ +#undef USE_AVX2 + +/* Define to 1 if XOP assembly is available. */ +#undef USE_XOP + +/* Version number of package */ +#undef VERSION + +/* Define curl_free() as free() if our version of curl lacks curl_free. */ +#undef curl_free + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff --git a/cuda_blake512.cu b/cuda_blake512.cu new file mode 100644 index 0000000..325901d --- /dev/null +++ b/cuda_blake512.cu @@ -0,0 +1,308 @@ +/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +// Folgende Definitionen später durch header ersetzen +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +// globaler Speicher für alle HeftyHashes aller Threads +extern uint32_t *d_heftyHashes[8]; +extern uint32_t *d_nonceVector[8]; + +// globaler Speicher für unsere Ergebnisse +uint32_t *d_hash5output[8]; + +// die Message (116 Bytes) mit Padding zur Berechnung auf der GPU +__constant__ uint64_t c_PaddedMessage[16]; // padded message (84+32 bytes + padding) + +// ---------------------------- BEGIN CUDA blake512 functions ------------------------------------ + +__constant__ uint8_t c_sigma[16][16]; + +const uint8_t host_sigma[16][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } +}; + +#define SWAP32(x) \ + ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ + (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) + +#define SWAP64(x) \ + ((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \ + (((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \ + (((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \ + (((uint64_t)(x) & 0x000000ff00000000ULL) >> 8) | \ + (((uint64_t)(x) & 0x00000000ff000000ULL) << 8) | \ + (((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \ + (((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \ + (((uint64_t)(x) & 0x00000000000000ffULL) << 56))) + +__constant__ uint64_t c_SecondRound[16]; + +const uint64_t host_SecondRound[16] = +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,SWAP64(1),0,SWAP64(0x3A0) +}; + +__constant__ uint64_t c_u512[16]; + +const uint64_t host_u512[16] = +{ + 0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL, + 0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL, + 0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL, + 0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL, + 0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL, + 0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL, + 0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL, + 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL +}; + + +#define ROTR(x,n) (((x)<<(64-n))|( (x)>>(n))) + +#define G(a,b,c,d,e) \ + v[a] += (m[sigma[i][e]] ^ u512[sigma[i][e+1]]) + v[b];\ + v[d] = ROTR( v[d] ^ v[a],32); \ + v[c] += v[d]; \ + v[b] = ROTR( v[b] ^ v[c],25); \ + v[a] += (m[sigma[i][e+1]] ^ u512[sigma[i][e]])+v[b]; \ + v[d] = ROTR( v[d] ^ v[a],16); \ + v[c] += v[d]; \ + v[b] = ROTR( v[b] ^ v[c],11); + +__device__ void blake512_compress( uint64_t *h, const uint64_t *block, int nullt, const uint8_t ((*sigma)[16]), const uint64_t *u512 ) +{ + uint64_t v[16], m[16], i; + +#pragma unroll 16 + for( i = 0; i < 16; ++i ) m[i] = SWAP64(block[i]); + +#pragma unroll 8 + for( i = 0; i < 8; ++i ) v[i] = h[i]; + + v[ 8] = u512[0]; + v[ 9] = u512[1]; + v[10] = u512[2]; + v[11] = u512[3]; + v[12] = u512[4]; + v[13] = u512[5]; + v[14] = u512[6]; + v[15] = u512[7]; + + /* don't xor t when the block is only padding */ + if ( !nullt ) { + v[12] ^= 928; + v[13] ^= 928; + } + +#pragma unroll 16 + for( i = 0; i < 16; ++i ) + { + /* column step */ + G( 0, 4, 8, 12, 0 ); + G( 1, 5, 9, 13, 2 ); + G( 2, 6, 10, 14, 4 ); + G( 3, 7, 11, 15, 6 ); + /* diagonal step */ + G( 0, 5, 10, 15, 8 ); + G( 1, 6, 11, 12, 10 ); + G( 2, 7, 8, 13, 12 ); + G( 3, 4, 9, 14, 14 ); + } + +#pragma unroll 16 + for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i]; +} + +// Endian Drehung für 32 Bit Typen +static __device__ uint32_t cuda_swab32(uint32_t x) +{ + return (((x << 24) & 0xff000000u) | ((x << 8) & 0x00ff0000u) + | ((x >> 8) & 0x0000ff00u) | ((x >> 24) & 0x000000ffu)); +} + +// Endian Drehung für 64 Bit Typen +static __device__ uint64_t cuda_swab64(uint64_t x) { + uint32_t h = (x >> 32); + uint32_t l = (x & 0xFFFFFFFFULL); + return (((uint64_t)cuda_swab32(l)) << 32) | ((uint64_t)cuda_swab32(h)); +} + +// das Hi Word aus einem 64 Bit Typen extrahieren +static __device__ uint32_t HIWORD(const uint64_t &x) { +#if __CUDA_ARCH__ >= 130 + return (uint32_t)__double2hiint(__longlong_as_double(x)); +#else + return (uint32_t)(x >> 32); +#endif +} + +// das Hi Word in einem 64 Bit Typen ersetzen +static __device__ uint64_t REPLACE_HIWORD(const uint64_t &x, const uint32_t &y) { + return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32ULL); +} + +// das Lo Word aus einem 64 Bit Typen extrahieren +static __device__ uint32_t LOWORD(const uint64_t &x) { +#if __CUDA_ARCH__ >= 130 + return (uint32_t)__double2loint(__longlong_as_double(x)); +#else + return (uint32_t)(x & 0xFFFFFFFFULL); +#endif +} + +// das Lo Word in einem 64 Bit Typen ersetzen +static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) { + return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y); +} + +__global__ void blake512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + // bestimme den aktuellen Zähler + //uint32_t nounce = startNounce + thread; + uint32_t nounce = nonceVector[thread]; + + // Index-Position des Hashes in den Hash Puffern bestimmen (Hefty1 und outputHash) + uint32_t hashPosition = nounce - startNounce; + + // State vorbereiten + uint64_t h[8]; + h[0] = 0x6a09e667f3bcc908ULL; + h[1] = 0xbb67ae8584caa73bULL; + h[2] = 0x3c6ef372fe94f82bULL; + h[3] = 0xa54ff53a5f1d36f1ULL; + h[4] = 0x510e527fade682d1ULL; + h[5] = 0x9b05688c2b3e6c1fULL; + h[6] = 0x1f83d9abfb41bd6bULL; + h[7] = 0x5be0cd19137e2179ULL; + + // 128 Byte für die Message + uint64_t buf[16]; + + // Message für die erste Runde in Register holen +#pragma unroll 16 + for (int i=0; i < 16; ++i) buf[i] = c_PaddedMessage[i]; + + // die Nounce durch die thread-spezifische ersetzen + buf[9] = REPLACE_HIWORD(buf[9], nounce); + + // den thread-spezifischen Hefty1 hash einsetzen + uint32_t *hefty = heftyHashes + 8 * hashPosition; + buf[10] = REPLACE_HIWORD(buf[10], hefty[0]); + buf[11] = REPLACE_LOWORD(buf[11], hefty[1]); + buf[11] = REPLACE_HIWORD(buf[11], hefty[2]); + buf[12] = REPLACE_LOWORD(buf[12], hefty[3]); + buf[12] = REPLACE_HIWORD(buf[12], hefty[4]); + buf[13] = REPLACE_LOWORD(buf[13], hefty[5]); + buf[13] = REPLACE_HIWORD(buf[13], hefty[6]); + buf[14] = REPLACE_LOWORD(buf[14], hefty[7]); + + // erste Runde + blake512_compress( h, buf, 0, c_sigma, c_u512 ); + + // zweite Runde +#pragma unroll 16 + for (int i=0; i < 16; ++i) buf[i] = c_SecondRound[i]; + blake512_compress( h, buf, 1, c_sigma, c_u512 ); + + // Hash rauslassen +#if 0 + // ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind + uint32_t *outHash = (uint32_t *)outputHash + 16 * hashPosition; +#pragma unroll 8 + for (int i=0; i < 8; ++i) { + outHash[2*i+0] = cuda_swab32( HIWORD(h[i]) ); + outHash[2*i+1] = cuda_swab32( LOWORD(h[i]) ); + } +#else + // in dieser Version passieren auch ein paar 64 Bit Shifts + uint64_t *outHash = (uint64_t *)outputHash + 8 * hashPosition; +#pragma unroll 8 + for (int i=0; i < 8; ++i) outHash[i] = cuda_swab64( h[i] ); +#endif + } +} + + +// ---------------------------- END CUDA blake512 functions ------------------------------------ + +// Setup-Funktionen +__host__ void blake512_cpu_init(int thr_id, int threads) +{ + // Kopiere die Hash-Tabellen in den GPU-Speicher + cudaMemcpyToSymbol( c_sigma, + host_sigma, + sizeof(host_sigma), + 0, cudaMemcpyHostToDevice); + + cudaMemcpyToSymbol( c_u512, + host_u512, + sizeof(host_u512), + 0, cudaMemcpyHostToDevice); + + cudaMemcpyToSymbol( c_SecondRound, + host_SecondRound, + sizeof(host_SecondRound), + 0, cudaMemcpyHostToDevice); + + // Speicher für alle Ergebnisse belegen + cudaMalloc(&d_hash5output[thr_id], 16 * sizeof(uint32_t) * threads); +} + +__host__ void blake512_cpu_setBlock(void *pdata) + // data muss 84-Byte haben! + // heftyHash hat 32-Byte +{ + // Message mit Padding für erste Runde bereitstellen + unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 84); + memset(PaddedMessage+84, 0, 32); // leeres Hefty Hash einfüllen + memset(PaddedMessage+116, 0, 12); + PaddedMessage[116] = 0x80; + + // die Message (116 Bytes) ohne Padding zur Berechnung auf der GPU + cudaMemcpyToSymbol( c_PaddedMessage, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); +} + + +__host__ void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce) +{ + const int threadsperblock = 128; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) + size_t shared_size = 0; + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + + blake512_gpu_hash<<>>(threads, startNounce, d_hash5output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]); +} diff --git a/cuda_blake512.h b/cuda_blake512.h new file mode 100644 index 0000000..b0cf201 --- /dev/null +++ b/cuda_blake512.h @@ -0,0 +1,8 @@ +#ifndef _CUDA_BLAKE512_H +#define _CUDA_BLAKE512_H + +void blake512_cpu_init(int thr_id, int threads); +void blake512_cpu_setBlock(void *pdata); +void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce); + +#endif diff --git a/cuda_combine.cu b/cuda_combine.cu new file mode 100644 index 0000000..2949765 --- /dev/null +++ b/cuda_combine.cu @@ -0,0 +1,151 @@ +/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +// Folgende Definitionen später durch header ersetzen +typedef unsigned int uint32_t; + +// globaler Speicher für unsere Ergebnisse +uint32_t *d_hashoutput[8]; + +extern uint32_t *d_hash2output[8]; +extern uint32_t *d_hash3output[8]; +extern uint32_t *d_hash4output[8]; +extern uint32_t *d_hash5output[8]; +extern uint32_t *d_nonceVector[8]; + +/* Combines top 64-bits from each hash into a single hash */ +static void __device__ combine_hashes(uint32_t *out, uint32_t *hash1, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4) +{ + uint32_t lout[8]; // Combining in Registern machen + +#pragma unroll 8 + for (int i=0; i < 8; ++i) + lout[i] = 0; + + // das Makro setzt jeweils 4 Bits aus vier verschiedenen Hashes zu einem Nibble zusammen +#define MIX(bits, mask, i) \ + lout[(255 - (bits+3))/32] <<= 4; \ + if ((hash1[i] & mask) != 0) lout[(255 - (bits+0))/32] |= 8; \ + if ((hash2[i] & mask) != 0) lout[(255 - (bits+1))/32] |= 4; \ + if ((hash3[i] & mask) != 0) lout[(255 - (bits+2))/32] |= 2; \ + if ((hash4[i] & mask) != 0) lout[(255 - (bits+3))/32] |= 1; \ + + /* Transpose first 64 bits of each hash into out */ + MIX( 0, 0x80000000, 7); + MIX( 4, 0x40000000, 7); + MIX( 8, 0x20000000, 7); + MIX( 12, 0x10000000, 7); + MIX( 16, 0x08000000, 7); + MIX( 20, 0x04000000, 7); + MIX( 24, 0x02000000, 7); + MIX( 28, 0x01000000, 7); + MIX( 32, 0x00800000, 7); + MIX( 36, 0x00400000, 7); + MIX( 40, 0x00200000, 7); + MIX( 44, 0x00100000, 7); + MIX( 48, 0x00080000, 7); + MIX( 52, 0x00040000, 7); + MIX( 56, 0x00020000, 7); + MIX( 60, 0x00010000, 7); + MIX( 64, 0x00008000, 7); + MIX( 68, 0x00004000, 7); + MIX( 72, 0x00002000, 7); + MIX( 76, 0x00001000, 7); + MIX( 80, 0x00000800, 7); + MIX( 84, 0x00000400, 7); + MIX( 88, 0x00000200, 7); + MIX( 92, 0x00000100, 7); + MIX( 96, 0x00000080, 7); + MIX(100, 0x00000040, 7); + MIX(104, 0x00000020, 7); + MIX(108, 0x00000010, 7); + MIX(112, 0x00000008, 7); + MIX(116, 0x00000004, 7); + MIX(120, 0x00000002, 7); + MIX(124, 0x00000001, 7); + + MIX(128, 0x80000000, 6); + MIX(132, 0x40000000, 6); + MIX(136, 0x20000000, 6); + MIX(140, 0x10000000, 6); + MIX(144, 0x08000000, 6); + MIX(148, 0x04000000, 6); + MIX(152, 0x02000000, 6); + MIX(156, 0x01000000, 6); + MIX(160, 0x00800000, 6); + MIX(164, 0x00400000, 6); + MIX(168, 0x00200000, 6); + MIX(172, 0x00100000, 6); + MIX(176, 0x00080000, 6); + MIX(180, 0x00040000, 6); + MIX(184, 0x00020000, 6); + MIX(188, 0x00010000, 6); + MIX(192, 0x00008000, 6); + MIX(196, 0x00004000, 6); + MIX(200, 0x00002000, 6); + MIX(204, 0x00001000, 6); + MIX(208, 0x00000800, 6); + MIX(212, 0x00000400, 6); + MIX(216, 0x00000200, 6); + MIX(220, 0x00000100, 6); + MIX(224, 0x00000080, 6); + MIX(228, 0x00000040, 6); + MIX(232, 0x00000020, 6); + MIX(236, 0x00000010, 6); + MIX(240, 0x00000008, 6); + MIX(244, 0x00000004, 6); + MIX(248, 0x00000002, 6); + MIX(252, 0x00000001, 6); + +#pragma unroll 8 + for (int i=0; i < 8; ++i) + out[i] = lout[i]; +} + +__global__ void combine_gpu_hash(int threads, uint32_t startNounce, uint32_t *out, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4, uint32_t *hash5, uint32_t *nonceVector) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = nonceVector[thread]; + uint32_t hashPosition = nounce - startNounce; + // Die Aufgabe der combine-funktion besteht aus zwei Teilen. + // 1) Komprimiere die hashes zu einem kleinen Array + // 2) Errechne dort den combines-value + + // Die Kompression wird dadurch verwirklicht, dass im out-array weiterhin mit "thread" indiziert + // wird. Die anderen Werte werden mit der nonce indiziert + + combine_hashes(&out[8 * thread], &hash2[8 * hashPosition], &hash3[16 * hashPosition], &hash4[16 * hashPosition], &hash5[16 * hashPosition]); + } +} + +// Setup-Funktionen +__host__ void combine_cpu_init(int thr_id, int threads) +{ + // Speicher für alle Ergebnisse belegen + cudaMalloc(&d_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads); +} + +void combine_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *hash) +{ + // diese Kopien sind optional, da die Hashes jetzt bereits auf der GPU liegen sollten + + const int threadsperblock = 128; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) + size_t shared_size = 0; + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + + combine_gpu_hash<<>>(threads, startNounce, d_hashoutput[thr_id], d_hash2output[thr_id], d_hash3output[thr_id], d_hash4output[thr_id], d_hash5output[thr_id], d_nonceVector[thr_id]); + + // da die Hash Auswertung noch auf der CPU erfolgt, müssen die Ergebnisse auf jeden Fall zum Host kopiert werden + cudaMemcpy(hash, d_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads, cudaMemcpyDeviceToHost); +} diff --git a/cuda_combine.h b/cuda_combine.h new file mode 100644 index 0000000..ada3a21 --- /dev/null +++ b/cuda_combine.h @@ -0,0 +1,7 @@ +#ifndef _CUDA_COMBINE_H +#define _CUDA_COMBINE_H + +void combine_cpu_init(int thr_id, int threads); +void combine_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *hash); + +#endif diff --git a/cuda_fugue256.cu b/cuda_fugue256.cu new file mode 100644 index 0000000..0457130 --- /dev/null +++ b/cuda_fugue256.cu @@ -0,0 +1,782 @@ +#if 1 +/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +#include "sph_fugue.h" + +// heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +// Folgende Definitionen später durch header ersetzen +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +// schon in sph_fugue.h definiert +//#define SPH_C32(x) ((uint32_t)(x ## U)) + +uint32_t *d_fugue256_hashoutput[8]; +uint32_t *d_resultNonce[8]; + +__constant__ uint32_t GPUstate[30]; // Single GPU +__constant__ uint32_t pTarget[8]; // Single GPU + +//__constant__ uint32_t mixtab0[256]; // 1K +//__constant__ uint32_t mixtab1[256]; // 1K +//__constant__ uint32_t mixtab2[256]; // 1K +//__constant__ uint32_t mixtab3[256]; // 1K + +texture mixTab0Tex; +texture mixTab1Tex; +texture mixTab2Tex; +texture mixTab3Tex; + +#define mixtab0(x) tex1Dfetch(mixTab0Tex, x) +#define mixtab1(x) tex1Dfetch(mixTab1Tex, x) +#define mixtab2(x) tex1Dfetch(mixTab2Tex, x) +#define mixtab3(x) tex1Dfetch(mixTab3Tex, x) + +/* TABELLEN */ +static const uint32_t mixtab0_cpu[] = { + SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7), + SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7), + SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0), + SPH_C32(0x01010704), SPH_C32(0x67672e87), SPH_C32(0x2b2bd1ac), + SPH_C32(0xfefeccd5), SPH_C32(0xd7d71371), SPH_C32(0xabab7c9a), + SPH_C32(0x767659c3), SPH_C32(0xcaca4005), SPH_C32(0x8282a33e), + SPH_C32(0xc9c94909), SPH_C32(0x7d7d68ef), SPH_C32(0xfafad0c5), + SPH_C32(0x5959947f), SPH_C32(0x4747ce07), SPH_C32(0xf0f0e6ed), + SPH_C32(0xadad6e82), SPH_C32(0xd4d41a7d), SPH_C32(0xa2a243be), + SPH_C32(0xafaf608a), SPH_C32(0x9c9cf946), SPH_C32(0xa4a451a6), + SPH_C32(0x727245d3), SPH_C32(0xc0c0762d), SPH_C32(0xb7b728ea), + SPH_C32(0xfdfdc5d9), SPH_C32(0x9393d47a), SPH_C32(0x2626f298), + SPH_C32(0x363682d8), SPH_C32(0x3f3fbdfc), SPH_C32(0xf7f7f3f1), + SPH_C32(0xcccc521d), SPH_C32(0x34348cd0), SPH_C32(0xa5a556a2), + SPH_C32(0xe5e58db9), SPH_C32(0xf1f1e1e9), SPH_C32(0x71714cdf), + SPH_C32(0xd8d83e4d), SPH_C32(0x313197c4), SPH_C32(0x15156b54), + SPH_C32(0x04041c10), SPH_C32(0xc7c76331), SPH_C32(0x2323e98c), + SPH_C32(0xc3c37f21), SPH_C32(0x18184860), SPH_C32(0x9696cf6e), + SPH_C32(0x05051b14), SPH_C32(0x9a9aeb5e), SPH_C32(0x0707151c), + SPH_C32(0x12127e48), SPH_C32(0x8080ad36), SPH_C32(0xe2e298a5), + SPH_C32(0xebeba781), SPH_C32(0x2727f59c), SPH_C32(0xb2b233fe), + SPH_C32(0x757550cf), SPH_C32(0x09093f24), SPH_C32(0x8383a43a), + SPH_C32(0x2c2cc4b0), SPH_C32(0x1a1a4668), SPH_C32(0x1b1b416c), + SPH_C32(0x6e6e11a3), SPH_C32(0x5a5a9d73), SPH_C32(0xa0a04db6), + SPH_C32(0x5252a553), SPH_C32(0x3b3ba1ec), SPH_C32(0xd6d61475), + SPH_C32(0xb3b334fa), SPH_C32(0x2929dfa4), SPH_C32(0xe3e39fa1), + SPH_C32(0x2f2fcdbc), SPH_C32(0x8484b126), SPH_C32(0x5353a257), + SPH_C32(0xd1d10169), SPH_C32(0x00000000), SPH_C32(0xededb599), + SPH_C32(0x2020e080), SPH_C32(0xfcfcc2dd), SPH_C32(0xb1b13af2), + SPH_C32(0x5b5b9a77), SPH_C32(0x6a6a0db3), SPH_C32(0xcbcb4701), + SPH_C32(0xbebe17ce), SPH_C32(0x3939afe4), SPH_C32(0x4a4aed33), + SPH_C32(0x4c4cff2b), SPH_C32(0x5858937b), SPH_C32(0xcfcf5b11), + SPH_C32(0xd0d0066d), SPH_C32(0xefefbb91), SPH_C32(0xaaaa7b9e), + SPH_C32(0xfbfbd7c1), SPH_C32(0x4343d217), SPH_C32(0x4d4df82f), + SPH_C32(0x333399cc), SPH_C32(0x8585b622), SPH_C32(0x4545c00f), + SPH_C32(0xf9f9d9c9), SPH_C32(0x02020e08), SPH_C32(0x7f7f66e7), + SPH_C32(0x5050ab5b), SPH_C32(0x3c3cb4f0), SPH_C32(0x9f9ff04a), + SPH_C32(0xa8a87596), SPH_C32(0x5151ac5f), SPH_C32(0xa3a344ba), + SPH_C32(0x4040db1b), SPH_C32(0x8f8f800a), SPH_C32(0x9292d37e), + SPH_C32(0x9d9dfe42), SPH_C32(0x3838a8e0), SPH_C32(0xf5f5fdf9), + SPH_C32(0xbcbc19c6), SPH_C32(0xb6b62fee), SPH_C32(0xdada3045), + SPH_C32(0x2121e784), SPH_C32(0x10107040), SPH_C32(0xffffcbd1), + SPH_C32(0xf3f3efe1), SPH_C32(0xd2d20865), SPH_C32(0xcdcd5519), + SPH_C32(0x0c0c2430), SPH_C32(0x1313794c), SPH_C32(0xececb29d), + SPH_C32(0x5f5f8667), SPH_C32(0x9797c86a), SPH_C32(0x4444c70b), + SPH_C32(0x1717655c), SPH_C32(0xc4c46a3d), SPH_C32(0xa7a758aa), + SPH_C32(0x7e7e61e3), SPH_C32(0x3d3db3f4), SPH_C32(0x6464278b), + SPH_C32(0x5d5d886f), SPH_C32(0x19194f64), SPH_C32(0x737342d7), + SPH_C32(0x60603b9b), SPH_C32(0x8181aa32), SPH_C32(0x4f4ff627), + SPH_C32(0xdcdc225d), SPH_C32(0x2222ee88), SPH_C32(0x2a2ad6a8), + SPH_C32(0x9090dd76), SPH_C32(0x88889516), SPH_C32(0x4646c903), + SPH_C32(0xeeeebc95), SPH_C32(0xb8b805d6), SPH_C32(0x14146c50), + SPH_C32(0xdede2c55), SPH_C32(0x5e5e8163), SPH_C32(0x0b0b312c), + SPH_C32(0xdbdb3741), SPH_C32(0xe0e096ad), SPH_C32(0x32329ec8), + SPH_C32(0x3a3aa6e8), SPH_C32(0x0a0a3628), SPH_C32(0x4949e43f), + SPH_C32(0x06061218), SPH_C32(0x2424fc90), SPH_C32(0x5c5c8f6b), + SPH_C32(0xc2c27825), SPH_C32(0xd3d30f61), SPH_C32(0xacac6986), + SPH_C32(0x62623593), SPH_C32(0x9191da72), SPH_C32(0x9595c662), + SPH_C32(0xe4e48abd), SPH_C32(0x797974ff), SPH_C32(0xe7e783b1), + SPH_C32(0xc8c84e0d), SPH_C32(0x373785dc), SPH_C32(0x6d6d18af), + SPH_C32(0x8d8d8e02), SPH_C32(0xd5d51d79), SPH_C32(0x4e4ef123), + SPH_C32(0xa9a97292), SPH_C32(0x6c6c1fab), SPH_C32(0x5656b943), + SPH_C32(0xf4f4fafd), SPH_C32(0xeaeaa085), SPH_C32(0x6565208f), + SPH_C32(0x7a7a7df3), SPH_C32(0xaeae678e), SPH_C32(0x08083820), + SPH_C32(0xbaba0bde), SPH_C32(0x787873fb), SPH_C32(0x2525fb94), + SPH_C32(0x2e2ecab8), SPH_C32(0x1c1c5470), SPH_C32(0xa6a65fae), + SPH_C32(0xb4b421e6), SPH_C32(0xc6c66435), SPH_C32(0xe8e8ae8d), + SPH_C32(0xdddd2559), SPH_C32(0x747457cb), SPH_C32(0x1f1f5d7c), + SPH_C32(0x4b4bea37), SPH_C32(0xbdbd1ec2), SPH_C32(0x8b8b9c1a), + SPH_C32(0x8a8a9b1e), SPH_C32(0x70704bdb), SPH_C32(0x3e3ebaf8), + SPH_C32(0xb5b526e2), SPH_C32(0x66662983), SPH_C32(0x4848e33b), + SPH_C32(0x0303090c), SPH_C32(0xf6f6f4f5), SPH_C32(0x0e0e2a38), + SPH_C32(0x61613c9f), SPH_C32(0x35358bd4), SPH_C32(0x5757be47), + SPH_C32(0xb9b902d2), SPH_C32(0x8686bf2e), SPH_C32(0xc1c17129), + SPH_C32(0x1d1d5374), SPH_C32(0x9e9ef74e), SPH_C32(0xe1e191a9), + SPH_C32(0xf8f8decd), SPH_C32(0x9898e556), SPH_C32(0x11117744), + SPH_C32(0x696904bf), SPH_C32(0xd9d93949), SPH_C32(0x8e8e870e), + SPH_C32(0x9494c166), SPH_C32(0x9b9bec5a), SPH_C32(0x1e1e5a78), + SPH_C32(0x8787b82a), SPH_C32(0xe9e9a989), SPH_C32(0xcece5c15), + SPH_C32(0x5555b04f), SPH_C32(0x2828d8a0), SPH_C32(0xdfdf2b51), + SPH_C32(0x8c8c8906), SPH_C32(0xa1a14ab2), SPH_C32(0x89899212), + SPH_C32(0x0d0d2334), SPH_C32(0xbfbf10ca), SPH_C32(0xe6e684b5), + SPH_C32(0x4242d513), SPH_C32(0x686803bb), SPH_C32(0x4141dc1f), + SPH_C32(0x9999e252), SPH_C32(0x2d2dc3b4), SPH_C32(0x0f0f2d3c), + SPH_C32(0xb0b03df6), SPH_C32(0x5454b74b), SPH_C32(0xbbbb0cda), + SPH_C32(0x16166258) +}; + +static const uint32_t mixtab1_cpu[] = { + SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e), + SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a), + SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090), + SPH_C32(0x04010107), SPH_C32(0x8767672e), SPH_C32(0xac2b2bd1), + SPH_C32(0xd5fefecc), SPH_C32(0x71d7d713), SPH_C32(0x9aabab7c), + SPH_C32(0xc3767659), SPH_C32(0x05caca40), SPH_C32(0x3e8282a3), + SPH_C32(0x09c9c949), SPH_C32(0xef7d7d68), SPH_C32(0xc5fafad0), + SPH_C32(0x7f595994), SPH_C32(0x074747ce), SPH_C32(0xedf0f0e6), + SPH_C32(0x82adad6e), SPH_C32(0x7dd4d41a), SPH_C32(0xbea2a243), + SPH_C32(0x8aafaf60), SPH_C32(0x469c9cf9), SPH_C32(0xa6a4a451), + SPH_C32(0xd3727245), SPH_C32(0x2dc0c076), SPH_C32(0xeab7b728), + SPH_C32(0xd9fdfdc5), SPH_C32(0x7a9393d4), SPH_C32(0x982626f2), + SPH_C32(0xd8363682), SPH_C32(0xfc3f3fbd), SPH_C32(0xf1f7f7f3), + SPH_C32(0x1dcccc52), SPH_C32(0xd034348c), SPH_C32(0xa2a5a556), + SPH_C32(0xb9e5e58d), SPH_C32(0xe9f1f1e1), SPH_C32(0xdf71714c), + SPH_C32(0x4dd8d83e), SPH_C32(0xc4313197), SPH_C32(0x5415156b), + SPH_C32(0x1004041c), SPH_C32(0x31c7c763), SPH_C32(0x8c2323e9), + SPH_C32(0x21c3c37f), SPH_C32(0x60181848), SPH_C32(0x6e9696cf), + SPH_C32(0x1405051b), SPH_C32(0x5e9a9aeb), SPH_C32(0x1c070715), + SPH_C32(0x4812127e), SPH_C32(0x368080ad), SPH_C32(0xa5e2e298), + SPH_C32(0x81ebeba7), SPH_C32(0x9c2727f5), SPH_C32(0xfeb2b233), + SPH_C32(0xcf757550), SPH_C32(0x2409093f), SPH_C32(0x3a8383a4), + SPH_C32(0xb02c2cc4), SPH_C32(0x681a1a46), SPH_C32(0x6c1b1b41), + SPH_C32(0xa36e6e11), SPH_C32(0x735a5a9d), SPH_C32(0xb6a0a04d), + SPH_C32(0x535252a5), SPH_C32(0xec3b3ba1), SPH_C32(0x75d6d614), + SPH_C32(0xfab3b334), SPH_C32(0xa42929df), SPH_C32(0xa1e3e39f), + SPH_C32(0xbc2f2fcd), SPH_C32(0x268484b1), SPH_C32(0x575353a2), + SPH_C32(0x69d1d101), SPH_C32(0x00000000), SPH_C32(0x99ededb5), + SPH_C32(0x802020e0), SPH_C32(0xddfcfcc2), SPH_C32(0xf2b1b13a), + SPH_C32(0x775b5b9a), SPH_C32(0xb36a6a0d), SPH_C32(0x01cbcb47), + SPH_C32(0xcebebe17), SPH_C32(0xe43939af), SPH_C32(0x334a4aed), + SPH_C32(0x2b4c4cff), SPH_C32(0x7b585893), SPH_C32(0x11cfcf5b), + SPH_C32(0x6dd0d006), SPH_C32(0x91efefbb), SPH_C32(0x9eaaaa7b), + SPH_C32(0xc1fbfbd7), SPH_C32(0x174343d2), SPH_C32(0x2f4d4df8), + SPH_C32(0xcc333399), SPH_C32(0x228585b6), SPH_C32(0x0f4545c0), + SPH_C32(0xc9f9f9d9), SPH_C32(0x0802020e), SPH_C32(0xe77f7f66), + SPH_C32(0x5b5050ab), SPH_C32(0xf03c3cb4), SPH_C32(0x4a9f9ff0), + SPH_C32(0x96a8a875), SPH_C32(0x5f5151ac), SPH_C32(0xbaa3a344), + SPH_C32(0x1b4040db), SPH_C32(0x0a8f8f80), SPH_C32(0x7e9292d3), + SPH_C32(0x429d9dfe), SPH_C32(0xe03838a8), SPH_C32(0xf9f5f5fd), + SPH_C32(0xc6bcbc19), SPH_C32(0xeeb6b62f), SPH_C32(0x45dada30), + SPH_C32(0x842121e7), SPH_C32(0x40101070), SPH_C32(0xd1ffffcb), + SPH_C32(0xe1f3f3ef), SPH_C32(0x65d2d208), SPH_C32(0x19cdcd55), + SPH_C32(0x300c0c24), SPH_C32(0x4c131379), SPH_C32(0x9dececb2), + SPH_C32(0x675f5f86), SPH_C32(0x6a9797c8), SPH_C32(0x0b4444c7), + SPH_C32(0x5c171765), SPH_C32(0x3dc4c46a), SPH_C32(0xaaa7a758), + SPH_C32(0xe37e7e61), SPH_C32(0xf43d3db3), SPH_C32(0x8b646427), + SPH_C32(0x6f5d5d88), SPH_C32(0x6419194f), SPH_C32(0xd7737342), + SPH_C32(0x9b60603b), SPH_C32(0x328181aa), SPH_C32(0x274f4ff6), + SPH_C32(0x5ddcdc22), SPH_C32(0x882222ee), SPH_C32(0xa82a2ad6), + SPH_C32(0x769090dd), SPH_C32(0x16888895), SPH_C32(0x034646c9), + SPH_C32(0x95eeeebc), SPH_C32(0xd6b8b805), SPH_C32(0x5014146c), + SPH_C32(0x55dede2c), SPH_C32(0x635e5e81), SPH_C32(0x2c0b0b31), + SPH_C32(0x41dbdb37), SPH_C32(0xade0e096), SPH_C32(0xc832329e), + SPH_C32(0xe83a3aa6), SPH_C32(0x280a0a36), SPH_C32(0x3f4949e4), + SPH_C32(0x18060612), SPH_C32(0x902424fc), SPH_C32(0x6b5c5c8f), + SPH_C32(0x25c2c278), SPH_C32(0x61d3d30f), SPH_C32(0x86acac69), + SPH_C32(0x93626235), SPH_C32(0x729191da), SPH_C32(0x629595c6), + SPH_C32(0xbde4e48a), SPH_C32(0xff797974), SPH_C32(0xb1e7e783), + SPH_C32(0x0dc8c84e), SPH_C32(0xdc373785), SPH_C32(0xaf6d6d18), + SPH_C32(0x028d8d8e), SPH_C32(0x79d5d51d), SPH_C32(0x234e4ef1), + SPH_C32(0x92a9a972), SPH_C32(0xab6c6c1f), SPH_C32(0x435656b9), + SPH_C32(0xfdf4f4fa), SPH_C32(0x85eaeaa0), SPH_C32(0x8f656520), + SPH_C32(0xf37a7a7d), SPH_C32(0x8eaeae67), SPH_C32(0x20080838), + SPH_C32(0xdebaba0b), SPH_C32(0xfb787873), SPH_C32(0x942525fb), + SPH_C32(0xb82e2eca), SPH_C32(0x701c1c54), SPH_C32(0xaea6a65f), + SPH_C32(0xe6b4b421), SPH_C32(0x35c6c664), SPH_C32(0x8de8e8ae), + SPH_C32(0x59dddd25), SPH_C32(0xcb747457), SPH_C32(0x7c1f1f5d), + SPH_C32(0x374b4bea), SPH_C32(0xc2bdbd1e), SPH_C32(0x1a8b8b9c), + SPH_C32(0x1e8a8a9b), SPH_C32(0xdb70704b), SPH_C32(0xf83e3eba), + SPH_C32(0xe2b5b526), SPH_C32(0x83666629), SPH_C32(0x3b4848e3), + SPH_C32(0x0c030309), SPH_C32(0xf5f6f6f4), SPH_C32(0x380e0e2a), + SPH_C32(0x9f61613c), SPH_C32(0xd435358b), SPH_C32(0x475757be), + SPH_C32(0xd2b9b902), SPH_C32(0x2e8686bf), SPH_C32(0x29c1c171), + SPH_C32(0x741d1d53), SPH_C32(0x4e9e9ef7), SPH_C32(0xa9e1e191), + SPH_C32(0xcdf8f8de), SPH_C32(0x569898e5), SPH_C32(0x44111177), + SPH_C32(0xbf696904), SPH_C32(0x49d9d939), SPH_C32(0x0e8e8e87), + SPH_C32(0x669494c1), SPH_C32(0x5a9b9bec), SPH_C32(0x781e1e5a), + SPH_C32(0x2a8787b8), SPH_C32(0x89e9e9a9), SPH_C32(0x15cece5c), + SPH_C32(0x4f5555b0), SPH_C32(0xa02828d8), SPH_C32(0x51dfdf2b), + SPH_C32(0x068c8c89), SPH_C32(0xb2a1a14a), SPH_C32(0x12898992), + SPH_C32(0x340d0d23), SPH_C32(0xcabfbf10), SPH_C32(0xb5e6e684), + SPH_C32(0x134242d5), SPH_C32(0xbb686803), SPH_C32(0x1f4141dc), + SPH_C32(0x529999e2), SPH_C32(0xb42d2dc3), SPH_C32(0x3c0f0f2d), + SPH_C32(0xf6b0b03d), SPH_C32(0x4b5454b7), SPH_C32(0xdabbbb0c), + SPH_C32(0x58161662) +}; + +static const uint32_t mixtab2_cpu[] = { + SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777), + SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b), + SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030), + SPH_C32(0x07040101), SPH_C32(0x2e876767), SPH_C32(0xd1ac2b2b), + SPH_C32(0xccd5fefe), SPH_C32(0x1371d7d7), SPH_C32(0x7c9aabab), + SPH_C32(0x59c37676), SPH_C32(0x4005caca), SPH_C32(0xa33e8282), + SPH_C32(0x4909c9c9), SPH_C32(0x68ef7d7d), SPH_C32(0xd0c5fafa), + SPH_C32(0x947f5959), SPH_C32(0xce074747), SPH_C32(0xe6edf0f0), + SPH_C32(0x6e82adad), SPH_C32(0x1a7dd4d4), SPH_C32(0x43bea2a2), + SPH_C32(0x608aafaf), SPH_C32(0xf9469c9c), SPH_C32(0x51a6a4a4), + SPH_C32(0x45d37272), SPH_C32(0x762dc0c0), SPH_C32(0x28eab7b7), + SPH_C32(0xc5d9fdfd), SPH_C32(0xd47a9393), SPH_C32(0xf2982626), + SPH_C32(0x82d83636), SPH_C32(0xbdfc3f3f), SPH_C32(0xf3f1f7f7), + SPH_C32(0x521dcccc), SPH_C32(0x8cd03434), SPH_C32(0x56a2a5a5), + SPH_C32(0x8db9e5e5), SPH_C32(0xe1e9f1f1), SPH_C32(0x4cdf7171), + SPH_C32(0x3e4dd8d8), SPH_C32(0x97c43131), SPH_C32(0x6b541515), + SPH_C32(0x1c100404), SPH_C32(0x6331c7c7), SPH_C32(0xe98c2323), + SPH_C32(0x7f21c3c3), SPH_C32(0x48601818), SPH_C32(0xcf6e9696), + SPH_C32(0x1b140505), SPH_C32(0xeb5e9a9a), SPH_C32(0x151c0707), + SPH_C32(0x7e481212), SPH_C32(0xad368080), SPH_C32(0x98a5e2e2), + SPH_C32(0xa781ebeb), SPH_C32(0xf59c2727), SPH_C32(0x33feb2b2), + SPH_C32(0x50cf7575), SPH_C32(0x3f240909), SPH_C32(0xa43a8383), + SPH_C32(0xc4b02c2c), SPH_C32(0x46681a1a), SPH_C32(0x416c1b1b), + SPH_C32(0x11a36e6e), SPH_C32(0x9d735a5a), SPH_C32(0x4db6a0a0), + SPH_C32(0xa5535252), SPH_C32(0xa1ec3b3b), SPH_C32(0x1475d6d6), + SPH_C32(0x34fab3b3), SPH_C32(0xdfa42929), SPH_C32(0x9fa1e3e3), + SPH_C32(0xcdbc2f2f), SPH_C32(0xb1268484), SPH_C32(0xa2575353), + SPH_C32(0x0169d1d1), SPH_C32(0x00000000), SPH_C32(0xb599eded), + SPH_C32(0xe0802020), SPH_C32(0xc2ddfcfc), SPH_C32(0x3af2b1b1), + SPH_C32(0x9a775b5b), SPH_C32(0x0db36a6a), SPH_C32(0x4701cbcb), + SPH_C32(0x17cebebe), SPH_C32(0xafe43939), SPH_C32(0xed334a4a), + SPH_C32(0xff2b4c4c), SPH_C32(0x937b5858), SPH_C32(0x5b11cfcf), + SPH_C32(0x066dd0d0), SPH_C32(0xbb91efef), SPH_C32(0x7b9eaaaa), + SPH_C32(0xd7c1fbfb), SPH_C32(0xd2174343), SPH_C32(0xf82f4d4d), + SPH_C32(0x99cc3333), SPH_C32(0xb6228585), SPH_C32(0xc00f4545), + SPH_C32(0xd9c9f9f9), SPH_C32(0x0e080202), SPH_C32(0x66e77f7f), + SPH_C32(0xab5b5050), SPH_C32(0xb4f03c3c), SPH_C32(0xf04a9f9f), + SPH_C32(0x7596a8a8), SPH_C32(0xac5f5151), SPH_C32(0x44baa3a3), + SPH_C32(0xdb1b4040), SPH_C32(0x800a8f8f), SPH_C32(0xd37e9292), + SPH_C32(0xfe429d9d), SPH_C32(0xa8e03838), SPH_C32(0xfdf9f5f5), + SPH_C32(0x19c6bcbc), SPH_C32(0x2feeb6b6), SPH_C32(0x3045dada), + SPH_C32(0xe7842121), SPH_C32(0x70401010), SPH_C32(0xcbd1ffff), + SPH_C32(0xefe1f3f3), SPH_C32(0x0865d2d2), SPH_C32(0x5519cdcd), + SPH_C32(0x24300c0c), SPH_C32(0x794c1313), SPH_C32(0xb29decec), + SPH_C32(0x86675f5f), SPH_C32(0xc86a9797), SPH_C32(0xc70b4444), + SPH_C32(0x655c1717), SPH_C32(0x6a3dc4c4), SPH_C32(0x58aaa7a7), + SPH_C32(0x61e37e7e), SPH_C32(0xb3f43d3d), SPH_C32(0x278b6464), + SPH_C32(0x886f5d5d), SPH_C32(0x4f641919), SPH_C32(0x42d77373), + SPH_C32(0x3b9b6060), SPH_C32(0xaa328181), SPH_C32(0xf6274f4f), + SPH_C32(0x225ddcdc), SPH_C32(0xee882222), SPH_C32(0xd6a82a2a), + SPH_C32(0xdd769090), SPH_C32(0x95168888), SPH_C32(0xc9034646), + SPH_C32(0xbc95eeee), SPH_C32(0x05d6b8b8), SPH_C32(0x6c501414), + SPH_C32(0x2c55dede), SPH_C32(0x81635e5e), SPH_C32(0x312c0b0b), + SPH_C32(0x3741dbdb), SPH_C32(0x96ade0e0), SPH_C32(0x9ec83232), + SPH_C32(0xa6e83a3a), SPH_C32(0x36280a0a), SPH_C32(0xe43f4949), + SPH_C32(0x12180606), SPH_C32(0xfc902424), SPH_C32(0x8f6b5c5c), + SPH_C32(0x7825c2c2), SPH_C32(0x0f61d3d3), SPH_C32(0x6986acac), + SPH_C32(0x35936262), SPH_C32(0xda729191), SPH_C32(0xc6629595), + SPH_C32(0x8abde4e4), SPH_C32(0x74ff7979), SPH_C32(0x83b1e7e7), + SPH_C32(0x4e0dc8c8), SPH_C32(0x85dc3737), SPH_C32(0x18af6d6d), + SPH_C32(0x8e028d8d), SPH_C32(0x1d79d5d5), SPH_C32(0xf1234e4e), + SPH_C32(0x7292a9a9), SPH_C32(0x1fab6c6c), SPH_C32(0xb9435656), + SPH_C32(0xfafdf4f4), SPH_C32(0xa085eaea), SPH_C32(0x208f6565), + SPH_C32(0x7df37a7a), SPH_C32(0x678eaeae), SPH_C32(0x38200808), + SPH_C32(0x0bdebaba), SPH_C32(0x73fb7878), SPH_C32(0xfb942525), + SPH_C32(0xcab82e2e), SPH_C32(0x54701c1c), SPH_C32(0x5faea6a6), + SPH_C32(0x21e6b4b4), SPH_C32(0x6435c6c6), SPH_C32(0xae8de8e8), + SPH_C32(0x2559dddd), SPH_C32(0x57cb7474), SPH_C32(0x5d7c1f1f), + SPH_C32(0xea374b4b), SPH_C32(0x1ec2bdbd), SPH_C32(0x9c1a8b8b), + SPH_C32(0x9b1e8a8a), SPH_C32(0x4bdb7070), SPH_C32(0xbaf83e3e), + SPH_C32(0x26e2b5b5), SPH_C32(0x29836666), SPH_C32(0xe33b4848), + SPH_C32(0x090c0303), SPH_C32(0xf4f5f6f6), SPH_C32(0x2a380e0e), + SPH_C32(0x3c9f6161), SPH_C32(0x8bd43535), SPH_C32(0xbe475757), + SPH_C32(0x02d2b9b9), SPH_C32(0xbf2e8686), SPH_C32(0x7129c1c1), + SPH_C32(0x53741d1d), SPH_C32(0xf74e9e9e), SPH_C32(0x91a9e1e1), + SPH_C32(0xdecdf8f8), SPH_C32(0xe5569898), SPH_C32(0x77441111), + SPH_C32(0x04bf6969), SPH_C32(0x3949d9d9), SPH_C32(0x870e8e8e), + SPH_C32(0xc1669494), SPH_C32(0xec5a9b9b), SPH_C32(0x5a781e1e), + SPH_C32(0xb82a8787), SPH_C32(0xa989e9e9), SPH_C32(0x5c15cece), + SPH_C32(0xb04f5555), SPH_C32(0xd8a02828), SPH_C32(0x2b51dfdf), + SPH_C32(0x89068c8c), SPH_C32(0x4ab2a1a1), SPH_C32(0x92128989), + SPH_C32(0x23340d0d), SPH_C32(0x10cabfbf), SPH_C32(0x84b5e6e6), + SPH_C32(0xd5134242), SPH_C32(0x03bb6868), SPH_C32(0xdc1f4141), + SPH_C32(0xe2529999), SPH_C32(0xc3b42d2d), SPH_C32(0x2d3c0f0f), + SPH_C32(0x3df6b0b0), SPH_C32(0xb74b5454), SPH_C32(0x0cdabbbb), + SPH_C32(0x62581616) +}; + +static const uint32_t mixtab3_cpu[] = { + SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777), + SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b), + SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030), + SPH_C32(0x01070401), SPH_C32(0x672e8767), SPH_C32(0x2bd1ac2b), + SPH_C32(0xfeccd5fe), SPH_C32(0xd71371d7), SPH_C32(0xab7c9aab), + SPH_C32(0x7659c376), SPH_C32(0xca4005ca), SPH_C32(0x82a33e82), + SPH_C32(0xc94909c9), SPH_C32(0x7d68ef7d), SPH_C32(0xfad0c5fa), + SPH_C32(0x59947f59), SPH_C32(0x47ce0747), SPH_C32(0xf0e6edf0), + SPH_C32(0xad6e82ad), SPH_C32(0xd41a7dd4), SPH_C32(0xa243bea2), + SPH_C32(0xaf608aaf), SPH_C32(0x9cf9469c), SPH_C32(0xa451a6a4), + SPH_C32(0x7245d372), SPH_C32(0xc0762dc0), SPH_C32(0xb728eab7), + SPH_C32(0xfdc5d9fd), SPH_C32(0x93d47a93), SPH_C32(0x26f29826), + SPH_C32(0x3682d836), SPH_C32(0x3fbdfc3f), SPH_C32(0xf7f3f1f7), + SPH_C32(0xcc521dcc), SPH_C32(0x348cd034), SPH_C32(0xa556a2a5), + SPH_C32(0xe58db9e5), SPH_C32(0xf1e1e9f1), SPH_C32(0x714cdf71), + SPH_C32(0xd83e4dd8), SPH_C32(0x3197c431), SPH_C32(0x156b5415), + SPH_C32(0x041c1004), SPH_C32(0xc76331c7), SPH_C32(0x23e98c23), + SPH_C32(0xc37f21c3), SPH_C32(0x18486018), SPH_C32(0x96cf6e96), + SPH_C32(0x051b1405), SPH_C32(0x9aeb5e9a), SPH_C32(0x07151c07), + SPH_C32(0x127e4812), SPH_C32(0x80ad3680), SPH_C32(0xe298a5e2), + SPH_C32(0xeba781eb), SPH_C32(0x27f59c27), SPH_C32(0xb233feb2), + SPH_C32(0x7550cf75), SPH_C32(0x093f2409), SPH_C32(0x83a43a83), + SPH_C32(0x2cc4b02c), SPH_C32(0x1a46681a), SPH_C32(0x1b416c1b), + SPH_C32(0x6e11a36e), SPH_C32(0x5a9d735a), SPH_C32(0xa04db6a0), + SPH_C32(0x52a55352), SPH_C32(0x3ba1ec3b), SPH_C32(0xd61475d6), + SPH_C32(0xb334fab3), SPH_C32(0x29dfa429), SPH_C32(0xe39fa1e3), + SPH_C32(0x2fcdbc2f), SPH_C32(0x84b12684), SPH_C32(0x53a25753), + SPH_C32(0xd10169d1), SPH_C32(0x00000000), SPH_C32(0xedb599ed), + SPH_C32(0x20e08020), SPH_C32(0xfcc2ddfc), SPH_C32(0xb13af2b1), + SPH_C32(0x5b9a775b), SPH_C32(0x6a0db36a), SPH_C32(0xcb4701cb), + SPH_C32(0xbe17cebe), SPH_C32(0x39afe439), SPH_C32(0x4aed334a), + SPH_C32(0x4cff2b4c), SPH_C32(0x58937b58), SPH_C32(0xcf5b11cf), + SPH_C32(0xd0066dd0), SPH_C32(0xefbb91ef), SPH_C32(0xaa7b9eaa), + SPH_C32(0xfbd7c1fb), SPH_C32(0x43d21743), SPH_C32(0x4df82f4d), + SPH_C32(0x3399cc33), SPH_C32(0x85b62285), SPH_C32(0x45c00f45), + SPH_C32(0xf9d9c9f9), SPH_C32(0x020e0802), SPH_C32(0x7f66e77f), + SPH_C32(0x50ab5b50), SPH_C32(0x3cb4f03c), SPH_C32(0x9ff04a9f), + SPH_C32(0xa87596a8), SPH_C32(0x51ac5f51), SPH_C32(0xa344baa3), + SPH_C32(0x40db1b40), SPH_C32(0x8f800a8f), SPH_C32(0x92d37e92), + SPH_C32(0x9dfe429d), SPH_C32(0x38a8e038), SPH_C32(0xf5fdf9f5), + SPH_C32(0xbc19c6bc), SPH_C32(0xb62feeb6), SPH_C32(0xda3045da), + SPH_C32(0x21e78421), SPH_C32(0x10704010), SPH_C32(0xffcbd1ff), + SPH_C32(0xf3efe1f3), SPH_C32(0xd20865d2), SPH_C32(0xcd5519cd), + SPH_C32(0x0c24300c), SPH_C32(0x13794c13), SPH_C32(0xecb29dec), + SPH_C32(0x5f86675f), SPH_C32(0x97c86a97), SPH_C32(0x44c70b44), + SPH_C32(0x17655c17), SPH_C32(0xc46a3dc4), SPH_C32(0xa758aaa7), + SPH_C32(0x7e61e37e), SPH_C32(0x3db3f43d), SPH_C32(0x64278b64), + SPH_C32(0x5d886f5d), SPH_C32(0x194f6419), SPH_C32(0x7342d773), + SPH_C32(0x603b9b60), SPH_C32(0x81aa3281), SPH_C32(0x4ff6274f), + SPH_C32(0xdc225ddc), SPH_C32(0x22ee8822), SPH_C32(0x2ad6a82a), + SPH_C32(0x90dd7690), SPH_C32(0x88951688), SPH_C32(0x46c90346), + SPH_C32(0xeebc95ee), SPH_C32(0xb805d6b8), SPH_C32(0x146c5014), + SPH_C32(0xde2c55de), SPH_C32(0x5e81635e), SPH_C32(0x0b312c0b), + SPH_C32(0xdb3741db), SPH_C32(0xe096ade0), SPH_C32(0x329ec832), + SPH_C32(0x3aa6e83a), SPH_C32(0x0a36280a), SPH_C32(0x49e43f49), + SPH_C32(0x06121806), SPH_C32(0x24fc9024), SPH_C32(0x5c8f6b5c), + SPH_C32(0xc27825c2), SPH_C32(0xd30f61d3), SPH_C32(0xac6986ac), + SPH_C32(0x62359362), SPH_C32(0x91da7291), SPH_C32(0x95c66295), + SPH_C32(0xe48abde4), SPH_C32(0x7974ff79), SPH_C32(0xe783b1e7), + SPH_C32(0xc84e0dc8), SPH_C32(0x3785dc37), SPH_C32(0x6d18af6d), + SPH_C32(0x8d8e028d), SPH_C32(0xd51d79d5), SPH_C32(0x4ef1234e), + SPH_C32(0xa97292a9), SPH_C32(0x6c1fab6c), SPH_C32(0x56b94356), + SPH_C32(0xf4fafdf4), SPH_C32(0xeaa085ea), SPH_C32(0x65208f65), + SPH_C32(0x7a7df37a), SPH_C32(0xae678eae), SPH_C32(0x08382008), + SPH_C32(0xba0bdeba), SPH_C32(0x7873fb78), SPH_C32(0x25fb9425), + SPH_C32(0x2ecab82e), SPH_C32(0x1c54701c), SPH_C32(0xa65faea6), + SPH_C32(0xb421e6b4), SPH_C32(0xc66435c6), SPH_C32(0xe8ae8de8), + SPH_C32(0xdd2559dd), SPH_C32(0x7457cb74), SPH_C32(0x1f5d7c1f), + SPH_C32(0x4bea374b), SPH_C32(0xbd1ec2bd), SPH_C32(0x8b9c1a8b), + SPH_C32(0x8a9b1e8a), SPH_C32(0x704bdb70), SPH_C32(0x3ebaf83e), + SPH_C32(0xb526e2b5), SPH_C32(0x66298366), SPH_C32(0x48e33b48), + SPH_C32(0x03090c03), SPH_C32(0xf6f4f5f6), SPH_C32(0x0e2a380e), + SPH_C32(0x613c9f61), SPH_C32(0x358bd435), SPH_C32(0x57be4757), + SPH_C32(0xb902d2b9), SPH_C32(0x86bf2e86), SPH_C32(0xc17129c1), + SPH_C32(0x1d53741d), SPH_C32(0x9ef74e9e), SPH_C32(0xe191a9e1), + SPH_C32(0xf8decdf8), SPH_C32(0x98e55698), SPH_C32(0x11774411), + SPH_C32(0x6904bf69), SPH_C32(0xd93949d9), SPH_C32(0x8e870e8e), + SPH_C32(0x94c16694), SPH_C32(0x9bec5a9b), SPH_C32(0x1e5a781e), + SPH_C32(0x87b82a87), SPH_C32(0xe9a989e9), SPH_C32(0xce5c15ce), + SPH_C32(0x55b04f55), SPH_C32(0x28d8a028), SPH_C32(0xdf2b51df), + SPH_C32(0x8c89068c), SPH_C32(0xa14ab2a1), SPH_C32(0x89921289), + SPH_C32(0x0d23340d), SPH_C32(0xbf10cabf), SPH_C32(0xe684b5e6), + SPH_C32(0x42d51342), SPH_C32(0x6803bb68), SPH_C32(0x41dc1f41), + SPH_C32(0x99e25299), SPH_C32(0x2dc3b42d), SPH_C32(0x0f2d3c0f), + SPH_C32(0xb03df6b0), SPH_C32(0x54b74b54), SPH_C32(0xbb0cdabb), + SPH_C32(0x16625816) +}; + +#define TIX2(q, x00, x01, x08, x10, x24) { \ + x10 ^= x00; \ + x00 = (q); \ + x08 ^= x00; \ + x01 ^= x24; \ + } + +#define TIX3(q, x00, x01, x04, x08, x16, x27, x30) { \ + x16 ^= x00; \ + x00 = (q); \ + x08 ^= x00; \ + x01 ^= x27; \ + x04 ^= x30; \ + } + +#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) { \ + x22 ^= x00; \ + x00 = (q); \ + x08 ^= x00; \ + x01 ^= x24; \ + x04 ^= x27; \ + x07 ^= x30; \ + } + +#define CMIX30(x00, x01, x02, x04, x05, x06, x15, x16, x17) { \ + x00 ^= x04; \ + x01 ^= x05; \ + x02 ^= x06; \ + x15 ^= x04; \ + x16 ^= x05; \ + x17 ^= x06; \ + } + +#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) { \ + x00 ^= x04; \ + x01 ^= x05; \ + x02 ^= x06; \ + x18 ^= x04; \ + x19 ^= x05; \ + x20 ^= x06; \ + } + +#define SMIX(x0, x1, x2, x3) { \ + uint32_t c0 = 0; \ + uint32_t c1 = 0; \ + uint32_t c2 = 0; \ + uint32_t c3 = 0; \ + uint32_t r0 = 0; \ + uint32_t r1 = 0; \ + uint32_t r2 = 0; \ + uint32_t r3 = 0; \ + uint32_t tmp; \ + tmp = mixtab0(x0 >> 24); \ + c0 ^= tmp; \ + tmp = mixtab1((x0 >> 16) & 0xFF); \ + c0 ^= tmp; \ + r1 ^= tmp; \ + tmp = mixtab2((x0 >> 8) & 0xFF); \ + c0 ^= tmp; \ + r2 ^= tmp; \ + tmp = mixtab3(x0 & 0xFF); \ + c0 ^= tmp; \ + r3 ^= tmp; \ + tmp = mixtab0(x1 >> 24); \ + c1 ^= tmp; \ + r0 ^= tmp; \ + tmp = mixtab1((x1 >> 16) & 0xFF); \ + c1 ^= tmp; \ + tmp = mixtab2((x1 >> 8) & 0xFF); \ + c1 ^= tmp; \ + r2 ^= tmp; \ + tmp = mixtab3(x1 & 0xFF); \ + c1 ^= tmp; \ + r3 ^= tmp; \ + tmp = mixtab0(x2 >> 24); \ + c2 ^= tmp; \ + r0 ^= tmp; \ + tmp = mixtab1((x2 >> 16) & 0xFF); \ + c2 ^= tmp; \ + r1 ^= tmp; \ + tmp = mixtab2((x2 >> 8) & 0xFF); \ + c2 ^= tmp; \ + tmp = mixtab3(x2 & 0xFF); \ + c2 ^= tmp; \ + r3 ^= tmp; \ + tmp = mixtab0(x3 >> 24); \ + c3 ^= tmp; \ + r0 ^= tmp; \ + tmp = mixtab1((x3 >> 16) & 0xFF); \ + c3 ^= tmp; \ + r1 ^= tmp; \ + tmp = mixtab2((x3 >> 8) & 0xFF); \ + c3 ^= tmp; \ + r2 ^= tmp; \ + tmp = mixtab3(x3 & 0xFF); \ + c3 ^= tmp; \ + x0 = ((c0 ^ r0) & SPH_C32(0xFF000000)) \ + | ((c1 ^ r1) & SPH_C32(0x00FF0000)) \ + | ((c2 ^ r2) & SPH_C32(0x0000FF00)) \ + | ((c3 ^ r3) & SPH_C32(0x000000FF)); \ + x1 = ((c1 ^ (r0 << 8)) & SPH_C32(0xFF000000)) \ + | ((c2 ^ (r1 << 8)) & SPH_C32(0x00FF0000)) \ + | ((c3 ^ (r2 << 8)) & SPH_C32(0x0000FF00)) \ + | ((c0 ^ (r3 >> 24)) & SPH_C32(0x000000FF)); \ + x2 = ((c2 ^ (r0 << 16)) & SPH_C32(0xFF000000)) \ + | ((c3 ^ (r1 << 16)) & SPH_C32(0x00FF0000)) \ + | ((c0 ^ (r2 >> 16)) & SPH_C32(0x0000FF00)) \ + | ((c1 ^ (r3 >> 16)) & SPH_C32(0x000000FF)); \ + x3 = ((c3 ^ (r0 << 24)) & SPH_C32(0xFF000000)) \ + | ((c0 ^ (r1 >> 8)) & SPH_C32(0x00FF0000)) \ + | ((c1 ^ (r2 >> 8)) & SPH_C32(0x0000FF00)) \ + | ((c2 ^ (r3 >> 8)) & SPH_C32(0x000000FF)); \ + /* */ \ + } + +#define S00 (sc[ 0]) +#define S01 (sc[ 1]) +#define S02 (sc[ 2]) +#define S03 (sc[ 3]) +#define S04 (sc[ 4]) +#define S05 (sc[ 5]) +#define S06 (sc[ 6]) +#define S07 (sc[ 7]) +#define S08 (sc[ 8]) +#define S09 (sc[ 9]) +#define S10 (sc[10]) +#define S11 (sc[11]) +#define S12 (sc[12]) +#define S13 (sc[13]) +#define S14 (sc[14]) +#define S15 (sc[15]) +#define S16 (sc[16]) +#define S17 (sc[17]) +#define S18 (sc[18]) +#define S19 (sc[19]) +#define S20 (sc[20]) +#define S21 (sc[21]) +#define S22 (sc[22]) +#define S23 (sc[23]) +#define S24 (sc[24]) +#define S25 (sc[25]) +#define S26 (sc[26]) +#define S27 (sc[27]) +#define S28 (sc[28]) +#define S29 (sc[29]) +#define S30 (sc[30]) +#define S31 (sc[31]) +#define S32 (sc[32]) +#define S33 (sc[33]) +#define S34 (sc[34]) +#define S35 (sc[35]) + +#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) +/* GPU - FUNKTIONEN */ + +__global__ void fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + /* Nimm den State und verarbeite das letztenByte (die Nounce) */ + uint32_t sc[30]; + + #pragma unroll 30 + for(int i=0;i<30;i++) + sc[i] = GPUstate[i]; + + uint32_t nounce = startNounce + thread; // muss noch ermittelt werden + uint32_t q; + + + // Bei Byte 80 laufen die Teilrunden: 4-0-1 (hier fest) + + // Teilrunde 4 + q = SWAB32(nounce); + TIX2(q, S06, S07, S14, S16, S00); + CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20); + SMIX(S03, S04, S05, S06); + CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17); + SMIX(S00, S01, S02, S03); + + // Teilrunde 0 + q = 0; + TIX2(q, S00, S01, S08, S10, S24); + CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14); + SMIX(S27, S28, S29, S00); + CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11); + SMIX(S24, S25, S26, S27); + + // Teilrunde 1 + q = 0x280; // hoffentlich richtig rum... + TIX2(q, S24, S25, S02, S04, S18); + CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08); + SMIX(S21, S22, S23, S24); + CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05); + SMIX(S18, S19, S20, S21); + + // Rundenende + // rms = 12, d.h. 30 - 12 = 18 + + #pragma unroll 10 + for(int i=0;i<10;i++) + { + //ROR(3, 30); + uint32_t tmp[3]; + #pragma unroll 3 + for(int k=0;k<3;k++) + tmp[k] = sc[27+k]; + #pragma unroll 27 + for(int k=26;k>=0;k--) + sc[k+3] = sc[k]; + #pragma unroll 3 + for(int k=0;k<3;k++) + sc[k] = tmp[k]; + + + CMIX30(sc[18], sc[19], sc[20], sc[22], sc[23], sc[24], sc[3], sc[4], sc[5]); + SMIX(sc[18], sc[19], sc[20], sc[21]); + } + + #pragma unroll 13 + for(int i=0;i<13;i++) + { + sc[22] ^= sc[18]; + sc[3] ^= sc[18]; + + // ROR(15, 30); BEGIN + uint32_t tmp1[15]; + #pragma unroll 15 + for(int k=0;k<15;k++) + tmp1[k] = sc[15+k]; + #pragma unroll 15 + for(int k=14;k>=0;k--) + sc[k+15] = sc[k]; + #pragma unroll 15 + for(int k=0;k<15;k++) + sc[k] = tmp1[k]; + // ROR(15, 30); END + + SMIX(sc[18], sc[19], sc[20], sc[21]); + sc[22] ^= sc[18]; + sc[4] ^= sc[18]; + + // ROR(14, 30); BEGIN + uint32_t tmp2[14]; + #pragma unroll 14 + for(int k=0;k<14;k++) + tmp2[k] = sc[16+k]; + #pragma unroll 16 + for(int k=15;k>=0;k--) + sc[k+14] = sc[k]; + #pragma unroll 14 + for(int k=0;k<14;k++) + sc[k] = tmp2[k]; + // ROR(14, 30); END + + SMIX(sc[18], sc[19], sc[20], sc[21]); + } + + sc[22] ^= sc[18]; + sc[3] ^= sc[18]; + + /* + // SWAP32 und Daten ausgeben + #pragma unroll 4 + for(int i=0;i<4;i++) + ((uint32_t*)outputHash)[8*thread+i] = SWAB32(sc[19+i]); + + #pragma unroll 4 + for(int i=0;i<4;i++) + ((uint32_t*)outputHash)[8*thread+i+4] = SWAB32(sc[3+i]); + */ + uint32_t hash[8]; + #pragma unroll 4 + for(int i=0;i<4;i++) + ((uint32_t*)hash)[i] = SWAB32(sc[19+i]); + + #pragma unroll 4 + for(int i=0;i<4;i++) + ((uint32_t*)hash)[i+4] = SWAB32(sc[3+i]); + + int i; + bool rc = true; + + for (i = 7; i >= 0; i--) { + if (hash[i] > pTarget[i]) { + rc = false; + break; + } + if (hash[i] < pTarget[i]) { + rc = true; + break; + } + } + + if(rc == true) + { + if(resNounce[0] > SWAB32(nounce)) + resNounce[0] = SWAB32(nounce); + } + } +} + +#define texDef(texname, texmem, texsource, texsize) \ + unsigned int *texmem; \ + cudaMalloc(&texmem, texsize); \ + cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ + texname.normalized = 0; \ + texname.filterMode = cudaFilterModePoint; \ + texname.addressMode[0] = cudaAddressModeClamp; \ + { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); \ + cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ + + +void fugue256_cpu_init(int thr_id, int threads) +{ + cudaSetDevice(thr_id); + + // Kopiere die Hash-Tabellen in den GPU-Speicher + /* + cudaMemcpyToSymbol( mixtab0, + mixtab0_cpu, + sizeof(uint32_t) * 256 ); + cudaMemcpyToSymbol( mixtab1, + mixtab1_cpu, + sizeof(uint32_t) * 256 ); + cudaMemcpyToSymbol( mixtab2, + mixtab2_cpu, + sizeof(uint32_t) * 256 ); + cudaMemcpyToSymbol( mixtab3, + mixtab3_cpu, + sizeof(uint32_t) * 256 ); + */ + texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256); + texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256); + texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256); + texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256); + // Speicher für alle Ergebnisse belegen + cudaMalloc(&d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads); + cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); +} + +__host__ void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn) +{ + // CPU-Vorbereitungen treffen + sph_fugue256_context ctx_fugue_const; + sph_fugue256_init(&ctx_fugue_const); + sph_fugue256 (&ctx_fugue_const, data, 80); // State speichern + + cudaMemcpyToSymbol( GPUstate, + ctx_fugue_const.S, + sizeof(uint32_t) * 30 ); + + cudaMemcpyToSymbol( pTarget, + pTargetIn, + sizeof(uint32_t) * 8 ); + + cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); +} + +__host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce) +{ + const int threadsperblock = 512; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) + //size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte + size_t shared_size = 0; + + fugue256_gpu_hash<<>>(thr_id, threads, startNounce, d_fugue256_hashoutput[thr_id], d_resultNonce[thr_id]); + + // Strategisches Sleep Kommando zur Senkung der CPU Last + MyStreamSynchronize(NULL, 0, thr_id); + + //cudaMemcpy(outputHashes, d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t), cudaMemcpyDeviceToHost); + cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); +} + +#endif diff --git a/cuda_fugue256.h b/cuda_fugue256.h new file mode 100644 index 0000000..bb864fb --- /dev/null +++ b/cuda_fugue256.h @@ -0,0 +1,8 @@ +#ifndef _CUDA_FUGUE512_H +#define _CUDA_FUGUE512_H + +void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce); +void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn); +void fugue256_cpu_init(int thr_id, int threads); + +#endif diff --git a/cuda_groestl512.cu b/cuda_groestl512.cu new file mode 100644 index 0000000..1c1dce9 --- /dev/null +++ b/cuda_groestl512.cu @@ -0,0 +1,837 @@ +/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +#define USE_SHARED 0 +#define W_ALIGNMENT 65 + +// Folgende Definitionen später durch header ersetzen +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +// globaler Speicher für alle HeftyHashes aller Threads +extern uint32_t *d_heftyHashes[8]; +extern uint32_t *d_nonceVector[8]; + +// globaler Speicher für unsere Ergebnisse +uint32_t *d_hash4output[8]; + +__constant__ uint32_t groestl_gpu_state[32]; +__constant__ uint32_t groestl_gpu_msg[32]; + +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#define PC32up(j, r) ((uint32_t)((j) + (r))) +#define PC32dn(j, r) 0 +#define QC32up(j, r) 0xFFFFFFFF +#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24))) + +#define B32_0(x) ((x) & 0xFF) +#define B32_1(x) (((x) >> 8) & 0xFF) +#define B32_2(x) (((x) >> 16) & 0xFF) +#define B32_3(x) ((x) >> 24) + +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define C32e(x) ((SPH_C32(x) >> 24) \ + | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ + | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ + | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) + +#define T0up(x) tex1Dfetch(t0up, x) +#define T0dn(x) tex1Dfetch(t0dn, x) +#define T1up(x) tex1Dfetch(t1up, x) +#define T1dn(x) tex1Dfetch(t1dn, x) +#define T2up(x) tex1Dfetch(t2up, x) +#define T2dn(x) tex1Dfetch(t2dn, x) +#define T3up(x) tex1Dfetch(t3up, x) +#define T3dn(x) tex1Dfetch(t3dn, x) + +texture t0up; +texture t0dn; +texture t1up; +texture t1dn; +texture t2up; +texture t2dn; +texture t3up; +texture t3dn; + +static const uint32_t T0up_cpu[] = { + C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d), + C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54), + C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d), + C32e(0xe7cc2b19), C32e(0xb513a662), C32e(0x4d7c31e6), C32e(0xec59b59a), + C32e(0x8f40cf45), C32e(0x1fa3bc9d), C32e(0x8949c040), C32e(0xfa689287), + C32e(0xefd03f15), C32e(0xb29426eb), C32e(0x8ece40c9), C32e(0xfbe61d0b), + C32e(0x416e2fec), C32e(0xb31aa967), C32e(0x5f431cfd), C32e(0x456025ea), + C32e(0x23f9dabf), C32e(0x535102f7), C32e(0xe445a196), C32e(0x9b76ed5b), + C32e(0x75285dc2), C32e(0xe1c5241c), C32e(0x3dd4e9ae), C32e(0x4cf2be6a), + C32e(0x6c82ee5a), C32e(0x7ebdc341), C32e(0xf5f30602), C32e(0x8352d14f), + C32e(0x688ce45c), C32e(0x515607f4), C32e(0xd18d5c34), C32e(0xf9e11808), + C32e(0xe24cae93), C32e(0xab3e9573), C32e(0x6297f553), C32e(0x2a6b413f), + C32e(0x081c140c), C32e(0x9563f652), C32e(0x46e9af65), C32e(0x9d7fe25e), + C32e(0x30487828), C32e(0x37cff8a1), C32e(0x0a1b110f), C32e(0x2febc4b5), + C32e(0x0e151b09), C32e(0x247e5a36), C32e(0x1badb69b), C32e(0xdf98473d), + C32e(0xcda76a26), C32e(0x4ef5bb69), C32e(0x7f334ccd), C32e(0xea50ba9f), + C32e(0x123f2d1b), C32e(0x1da4b99e), C32e(0x58c49c74), C32e(0x3446722e), + C32e(0x3641772d), C32e(0xdc11cdb2), C32e(0xb49d29ee), C32e(0x5b4d16fb), + C32e(0xa4a501f6), C32e(0x76a1d74d), C32e(0xb714a361), C32e(0x7d3449ce), + C32e(0x52df8d7b), C32e(0xdd9f423e), C32e(0x5ecd9371), C32e(0x13b1a297), + C32e(0xa6a204f5), C32e(0xb901b868), C32e(0x00000000), C32e(0xc1b5742c), + C32e(0x40e0a060), C32e(0xe3c2211f), C32e(0x793a43c8), C32e(0xb69a2ced), + C32e(0xd40dd9be), C32e(0x8d47ca46), C32e(0x671770d9), C32e(0x72afdd4b), + C32e(0x94ed79de), C32e(0x98ff67d4), C32e(0xb09323e8), C32e(0x855bde4a), + C32e(0xbb06bd6b), C32e(0xc5bb7e2a), C32e(0x4f7b34e5), C32e(0xedd73a16), + C32e(0x86d254c5), C32e(0x9af862d7), C32e(0x6699ff55), C32e(0x11b6a794), + C32e(0x8ac04acf), C32e(0xe9d93010), C32e(0x040e0a06), C32e(0xfe669881), + C32e(0xa0ab0bf0), C32e(0x78b4cc44), C32e(0x25f0d5ba), C32e(0x4b753ee3), + C32e(0xa2ac0ef3), C32e(0x5d4419fe), C32e(0x80db5bc0), C32e(0x0580858a), + C32e(0x3fd3ecad), C32e(0x21fedfbc), C32e(0x70a8d848), C32e(0xf1fd0c04), + C32e(0x63197adf), C32e(0x772f58c1), C32e(0xaf309f75), C32e(0x42e7a563), + C32e(0x20705030), C32e(0xe5cb2e1a), C32e(0xfdef120e), C32e(0xbf08b76d), + C32e(0x8155d44c), C32e(0x18243c14), C32e(0x26795f35), C32e(0xc3b2712f), + C32e(0xbe8638e1), C32e(0x35c8fda2), C32e(0x88c74fcc), C32e(0x2e654b39), + C32e(0x936af957), C32e(0x55580df2), C32e(0xfc619d82), C32e(0x7ab3c947), + C32e(0xc827efac), C32e(0xba8832e7), C32e(0x324f7d2b), C32e(0xe642a495), + C32e(0xc03bfba0), C32e(0x19aab398), C32e(0x9ef668d1), C32e(0xa322817f), + C32e(0x44eeaa66), C32e(0x54d6827e), C32e(0x3bdde6ab), C32e(0x0b959e83), + C32e(0x8cc945ca), C32e(0xc7bc7b29), C32e(0x6b056ed3), C32e(0x286c443c), + C32e(0xa72c8b79), C32e(0xbc813de2), C32e(0x1631271d), C32e(0xad379a76), + C32e(0xdb964d3b), C32e(0x649efa56), C32e(0x74a6d24e), C32e(0x1436221e), + C32e(0x92e476db), C32e(0x0c121e0a), C32e(0x48fcb46c), C32e(0xb88f37e4), + C32e(0x9f78e75d), C32e(0xbd0fb26e), C32e(0x43692aef), C32e(0xc435f1a6), + C32e(0x39dae3a8), C32e(0x31c6f7a4), C32e(0xd38a5937), C32e(0xf274868b), + C32e(0xd5835632), C32e(0x8b4ec543), C32e(0x6e85eb59), C32e(0xda18c2b7), + C32e(0x018e8f8c), C32e(0xb11dac64), C32e(0x9cf16dd2), C32e(0x49723be0), + C32e(0xd81fc7b4), C32e(0xacb915fa), C32e(0xf3fa0907), C32e(0xcfa06f25), + C32e(0xca20eaaf), C32e(0xf47d898e), C32e(0x476720e9), C32e(0x10382818), + C32e(0x6f0b64d5), C32e(0xf0738388), C32e(0x4afbb16f), C32e(0x5cca9672), + C32e(0x38546c24), C32e(0x575f08f1), C32e(0x732152c7), C32e(0x9764f351), + C32e(0xcbae6523), C32e(0xa125847c), C32e(0xe857bf9c), C32e(0x3e5d6321), + C32e(0x96ea7cdd), C32e(0x611e7fdc), C32e(0x0d9c9186), C32e(0x0f9b9485), + C32e(0xe04bab90), C32e(0x7cbac642), C32e(0x712657c4), C32e(0xcc29e5aa), + C32e(0x90e373d8), C32e(0x06090f05), C32e(0xf7f40301), C32e(0x1c2a3612), + C32e(0xc23cfea3), C32e(0x6a8be15f), C32e(0xaebe10f9), C32e(0x69026bd0), + C32e(0x17bfa891), C32e(0x9971e858), C32e(0x3a536927), C32e(0x27f7d0b9), + C32e(0xd9914838), C32e(0xebde3513), C32e(0x2be5ceb3), C32e(0x22775533), + C32e(0xd204d6bb), C32e(0xa9399070), C32e(0x07878089), C32e(0x33c1f2a7), + C32e(0x2decc1b6), C32e(0x3c5a6622), C32e(0x15b8ad92), C32e(0xc9a96020), + C32e(0x875cdb49), C32e(0xaab01aff), C32e(0x50d88878), C32e(0xa52b8e7a), + C32e(0x03898a8f), C32e(0x594a13f8), C32e(0x09929b80), C32e(0x1a233917), + C32e(0x651075da), C32e(0xd7845331), C32e(0x84d551c6), C32e(0xd003d3b8), + C32e(0x82dc5ec3), C32e(0x29e2cbb0), C32e(0x5ac39977), C32e(0x1e2d3311), + C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a) +}; + +static const uint32_t T0dn_cpu[] = { + C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6), + C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491), + C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56), + C32e(0x2bd519e7), C32e(0xa67162b5), C32e(0x319ae64d), C32e(0xb5c39aec), + C32e(0xcf05458f), C32e(0xbc3e9d1f), C32e(0xc0094089), C32e(0x92ef87fa), + C32e(0x3fc515ef), C32e(0x267febb2), C32e(0x4007c98e), C32e(0x1ded0bfb), + C32e(0x2f82ec41), C32e(0xa97d67b3), C32e(0x1cbefd5f), C32e(0x258aea45), + C32e(0xda46bf23), C32e(0x02a6f753), C32e(0xa1d396e4), C32e(0xed2d5b9b), + C32e(0x5deac275), C32e(0x24d91ce1), C32e(0xe97aae3d), C32e(0xbe986a4c), + C32e(0xeed85a6c), C32e(0xc3fc417e), C32e(0x06f102f5), C32e(0xd11d4f83), + C32e(0xe4d05c68), C32e(0x07a2f451), C32e(0x5cb934d1), C32e(0x18e908f9), + C32e(0xaedf93e2), C32e(0x954d73ab), C32e(0xf5c45362), C32e(0x41543f2a), + C32e(0x14100c08), C32e(0xf6315295), C32e(0xaf8c6546), C32e(0xe2215e9d), + C32e(0x78602830), C32e(0xf86ea137), C32e(0x11140f0a), C32e(0xc45eb52f), + C32e(0x1b1c090e), C32e(0x5a483624), C32e(0xb6369b1b), C32e(0x47a53ddf), + C32e(0x6a8126cd), C32e(0xbb9c694e), C32e(0x4cfecd7f), C32e(0xbacf9fea), + C32e(0x2d241b12), C32e(0xb93a9e1d), C32e(0x9cb07458), C32e(0x72682e34), + C32e(0x776c2d36), C32e(0xcda3b2dc), C32e(0x2973eeb4), C32e(0x16b6fb5b), + C32e(0x0153f6a4), C32e(0xd7ec4d76), C32e(0xa37561b7), C32e(0x49face7d), + C32e(0x8da47b52), C32e(0x42a13edd), C32e(0x93bc715e), C32e(0xa2269713), + C32e(0x0457f5a6), C32e(0xb86968b9), C32e(0x00000000), C32e(0x74992cc1), + C32e(0xa0806040), C32e(0x21dd1fe3), C32e(0x43f2c879), C32e(0x2c77edb6), + C32e(0xd9b3bed4), C32e(0xca01468d), C32e(0x70ced967), C32e(0xdde44b72), + C32e(0x7933de94), C32e(0x672bd498), C32e(0x237be8b0), C32e(0xde114a85), + C32e(0xbd6d6bbb), C32e(0x7e912ac5), C32e(0x349ee54f), C32e(0x3ac116ed), + C32e(0x5417c586), C32e(0x622fd79a), C32e(0xffcc5566), C32e(0xa7229411), + C32e(0x4a0fcf8a), C32e(0x30c910e9), C32e(0x0a080604), C32e(0x98e781fe), + C32e(0x0b5bf0a0), C32e(0xccf04478), C32e(0xd54aba25), C32e(0x3e96e34b), + C32e(0x0e5ff3a2), C32e(0x19bafe5d), C32e(0x5b1bc080), C32e(0x850a8a05), + C32e(0xec7ead3f), C32e(0xdf42bc21), C32e(0xd8e04870), C32e(0x0cf904f1), + C32e(0x7ac6df63), C32e(0x58eec177), C32e(0x9f4575af), C32e(0xa5846342), + C32e(0x50403020), C32e(0x2ed11ae5), C32e(0x12e10efd), C32e(0xb7656dbf), + C32e(0xd4194c81), C32e(0x3c301418), C32e(0x5f4c3526), C32e(0x719d2fc3), + C32e(0x3867e1be), C32e(0xfd6aa235), C32e(0x4f0bcc88), C32e(0x4b5c392e), + C32e(0xf93d5793), C32e(0x0daaf255), C32e(0x9de382fc), C32e(0xc9f4477a), + C32e(0xef8bacc8), C32e(0x326fe7ba), C32e(0x7d642b32), C32e(0xa4d795e6), + C32e(0xfb9ba0c0), C32e(0xb3329819), C32e(0x6827d19e), C32e(0x815d7fa3), + C32e(0xaa886644), C32e(0x82a87e54), C32e(0xe676ab3b), C32e(0x9e16830b), + C32e(0x4503ca8c), C32e(0x7b9529c7), C32e(0x6ed6d36b), C32e(0x44503c28), + C32e(0x8b5579a7), C32e(0x3d63e2bc), C32e(0x272c1d16), C32e(0x9a4176ad), + C32e(0x4dad3bdb), C32e(0xfac85664), C32e(0xd2e84e74), C32e(0x22281e14), + C32e(0x763fdb92), C32e(0x1e180a0c), C32e(0xb4906c48), C32e(0x376be4b8), + C32e(0xe7255d9f), C32e(0xb2616ebd), C32e(0x2a86ef43), C32e(0xf193a6c4), + C32e(0xe372a839), C32e(0xf762a431), C32e(0x59bd37d3), C32e(0x86ff8bf2), + C32e(0x56b132d5), C32e(0xc50d438b), C32e(0xebdc596e), C32e(0xc2afb7da), + C32e(0x8f028c01), C32e(0xac7964b1), C32e(0x6d23d29c), C32e(0x3b92e049), + C32e(0xc7abb4d8), C32e(0x1543faac), C32e(0x09fd07f3), C32e(0x6f8525cf), + C32e(0xea8fafca), C32e(0x89f38ef4), C32e(0x208ee947), C32e(0x28201810), + C32e(0x64ded56f), C32e(0x83fb88f0), C32e(0xb1946f4a), C32e(0x96b8725c), + C32e(0x6c702438), C32e(0x08aef157), C32e(0x52e6c773), C32e(0xf3355197), + C32e(0x658d23cb), C32e(0x84597ca1), C32e(0xbfcb9ce8), C32e(0x637c213e), + C32e(0x7c37dd96), C32e(0x7fc2dc61), C32e(0x911a860d), C32e(0x941e850f), + C32e(0xabdb90e0), C32e(0xc6f8427c), C32e(0x57e2c471), C32e(0xe583aacc), + C32e(0x733bd890), C32e(0x0f0c0506), C32e(0x03f501f7), C32e(0x3638121c), + C32e(0xfe9fa3c2), C32e(0xe1d45f6a), C32e(0x1047f9ae), C32e(0x6bd2d069), + C32e(0xa82e9117), C32e(0xe8295899), C32e(0x6974273a), C32e(0xd04eb927), + C32e(0x48a938d9), C32e(0x35cd13eb), C32e(0xce56b32b), C32e(0x55443322), + C32e(0xd6bfbbd2), C32e(0x904970a9), C32e(0x800e8907), C32e(0xf266a733), + C32e(0xc15ab62d), C32e(0x6678223c), C32e(0xad2a9215), C32e(0x608920c9), + C32e(0xdb154987), C32e(0x1a4fffaa), C32e(0x88a07850), C32e(0x8e517aa5), + C32e(0x8a068f03), C32e(0x13b2f859), C32e(0x9b128009), C32e(0x3934171a), + C32e(0x75cada65), C32e(0x53b531d7), C32e(0x5113c684), C32e(0xd3bbb8d0), + C32e(0x5e1fc382), C32e(0xcb52b029), C32e(0x99b4775a), C32e(0x333c111e), + C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c) +}; + +static const uint32_t T1up_cpu[] = { + C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c), + C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc), + C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187), + C32e(0xe7e7cc2b), C32e(0xb5b513a6), C32e(0x4d4d7c31), C32e(0xecec59b5), + C32e(0x8f8f40cf), C32e(0x1f1fa3bc), C32e(0x898949c0), C32e(0xfafa6892), + C32e(0xefefd03f), C32e(0xb2b29426), C32e(0x8e8ece40), C32e(0xfbfbe61d), + C32e(0x41416e2f), C32e(0xb3b31aa9), C32e(0x5f5f431c), C32e(0x45456025), + C32e(0x2323f9da), C32e(0x53535102), C32e(0xe4e445a1), C32e(0x9b9b76ed), + C32e(0x7575285d), C32e(0xe1e1c524), C32e(0x3d3dd4e9), C32e(0x4c4cf2be), + C32e(0x6c6c82ee), C32e(0x7e7ebdc3), C32e(0xf5f5f306), C32e(0x838352d1), + C32e(0x68688ce4), C32e(0x51515607), C32e(0xd1d18d5c), C32e(0xf9f9e118), + C32e(0xe2e24cae), C32e(0xabab3e95), C32e(0x626297f5), C32e(0x2a2a6b41), + C32e(0x08081c14), C32e(0x959563f6), C32e(0x4646e9af), C32e(0x9d9d7fe2), + C32e(0x30304878), C32e(0x3737cff8), C32e(0x0a0a1b11), C32e(0x2f2febc4), + C32e(0x0e0e151b), C32e(0x24247e5a), C32e(0x1b1badb6), C32e(0xdfdf9847), + C32e(0xcdcda76a), C32e(0x4e4ef5bb), C32e(0x7f7f334c), C32e(0xeaea50ba), + C32e(0x12123f2d), C32e(0x1d1da4b9), C32e(0x5858c49c), C32e(0x34344672), + C32e(0x36364177), C32e(0xdcdc11cd), C32e(0xb4b49d29), C32e(0x5b5b4d16), + C32e(0xa4a4a501), C32e(0x7676a1d7), C32e(0xb7b714a3), C32e(0x7d7d3449), + C32e(0x5252df8d), C32e(0xdddd9f42), C32e(0x5e5ecd93), C32e(0x1313b1a2), + C32e(0xa6a6a204), C32e(0xb9b901b8), C32e(0x00000000), C32e(0xc1c1b574), + C32e(0x4040e0a0), C32e(0xe3e3c221), C32e(0x79793a43), C32e(0xb6b69a2c), + C32e(0xd4d40dd9), C32e(0x8d8d47ca), C32e(0x67671770), C32e(0x7272afdd), + C32e(0x9494ed79), C32e(0x9898ff67), C32e(0xb0b09323), C32e(0x85855bde), + C32e(0xbbbb06bd), C32e(0xc5c5bb7e), C32e(0x4f4f7b34), C32e(0xededd73a), + C32e(0x8686d254), C32e(0x9a9af862), C32e(0x666699ff), C32e(0x1111b6a7), + C32e(0x8a8ac04a), C32e(0xe9e9d930), C32e(0x04040e0a), C32e(0xfefe6698), + C32e(0xa0a0ab0b), C32e(0x7878b4cc), C32e(0x2525f0d5), C32e(0x4b4b753e), + C32e(0xa2a2ac0e), C32e(0x5d5d4419), C32e(0x8080db5b), C32e(0x05058085), + C32e(0x3f3fd3ec), C32e(0x2121fedf), C32e(0x7070a8d8), C32e(0xf1f1fd0c), + C32e(0x6363197a), C32e(0x77772f58), C32e(0xafaf309f), C32e(0x4242e7a5), + C32e(0x20207050), C32e(0xe5e5cb2e), C32e(0xfdfdef12), C32e(0xbfbf08b7), + C32e(0x818155d4), C32e(0x1818243c), C32e(0x2626795f), C32e(0xc3c3b271), + C32e(0xbebe8638), C32e(0x3535c8fd), C32e(0x8888c74f), C32e(0x2e2e654b), + C32e(0x93936af9), C32e(0x5555580d), C32e(0xfcfc619d), C32e(0x7a7ab3c9), + C32e(0xc8c827ef), C32e(0xbaba8832), C32e(0x32324f7d), C32e(0xe6e642a4), + C32e(0xc0c03bfb), C32e(0x1919aab3), C32e(0x9e9ef668), C32e(0xa3a32281), + C32e(0x4444eeaa), C32e(0x5454d682), C32e(0x3b3bdde6), C32e(0x0b0b959e), + C32e(0x8c8cc945), C32e(0xc7c7bc7b), C32e(0x6b6b056e), C32e(0x28286c44), + C32e(0xa7a72c8b), C32e(0xbcbc813d), C32e(0x16163127), C32e(0xadad379a), + C32e(0xdbdb964d), C32e(0x64649efa), C32e(0x7474a6d2), C32e(0x14143622), + C32e(0x9292e476), C32e(0x0c0c121e), C32e(0x4848fcb4), C32e(0xb8b88f37), + C32e(0x9f9f78e7), C32e(0xbdbd0fb2), C32e(0x4343692a), C32e(0xc4c435f1), + C32e(0x3939dae3), C32e(0x3131c6f7), C32e(0xd3d38a59), C32e(0xf2f27486), + C32e(0xd5d58356), C32e(0x8b8b4ec5), C32e(0x6e6e85eb), C32e(0xdada18c2), + C32e(0x01018e8f), C32e(0xb1b11dac), C32e(0x9c9cf16d), C32e(0x4949723b), + C32e(0xd8d81fc7), C32e(0xacacb915), C32e(0xf3f3fa09), C32e(0xcfcfa06f), + C32e(0xcaca20ea), C32e(0xf4f47d89), C32e(0x47476720), C32e(0x10103828), + C32e(0x6f6f0b64), C32e(0xf0f07383), C32e(0x4a4afbb1), C32e(0x5c5cca96), + C32e(0x3838546c), C32e(0x57575f08), C32e(0x73732152), C32e(0x979764f3), + C32e(0xcbcbae65), C32e(0xa1a12584), C32e(0xe8e857bf), C32e(0x3e3e5d63), + C32e(0x9696ea7c), C32e(0x61611e7f), C32e(0x0d0d9c91), C32e(0x0f0f9b94), + C32e(0xe0e04bab), C32e(0x7c7cbac6), C32e(0x71712657), C32e(0xcccc29e5), + C32e(0x9090e373), C32e(0x0606090f), C32e(0xf7f7f403), C32e(0x1c1c2a36), + C32e(0xc2c23cfe), C32e(0x6a6a8be1), C32e(0xaeaebe10), C32e(0x6969026b), + C32e(0x1717bfa8), C32e(0x999971e8), C32e(0x3a3a5369), C32e(0x2727f7d0), + C32e(0xd9d99148), C32e(0xebebde35), C32e(0x2b2be5ce), C32e(0x22227755), + C32e(0xd2d204d6), C32e(0xa9a93990), C32e(0x07078780), C32e(0x3333c1f2), + C32e(0x2d2decc1), C32e(0x3c3c5a66), C32e(0x1515b8ad), C32e(0xc9c9a960), + C32e(0x87875cdb), C32e(0xaaaab01a), C32e(0x5050d888), C32e(0xa5a52b8e), + C32e(0x0303898a), C32e(0x59594a13), C32e(0x0909929b), C32e(0x1a1a2339), + C32e(0x65651075), C32e(0xd7d78453), C32e(0x8484d551), C32e(0xd0d003d3), + C32e(0x8282dc5e), C32e(0x2929e2cb), C32e(0x5a5ac399), C32e(0x1e1e2d33), + C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e) +}; + +static const uint32_t T1dn_cpu[] = { + C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d), + C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954), + C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d), + C32e(0x192bd519), C32e(0x62a67162), C32e(0xe6319ae6), C32e(0x9ab5c39a), + C32e(0x45cf0545), C32e(0x9dbc3e9d), C32e(0x40c00940), C32e(0x8792ef87), + C32e(0x153fc515), C32e(0xeb267feb), C32e(0xc94007c9), C32e(0x0b1ded0b), + C32e(0xec2f82ec), C32e(0x67a97d67), C32e(0xfd1cbefd), C32e(0xea258aea), + C32e(0xbfda46bf), C32e(0xf702a6f7), C32e(0x96a1d396), C32e(0x5bed2d5b), + C32e(0xc25deac2), C32e(0x1c24d91c), C32e(0xaee97aae), C32e(0x6abe986a), + C32e(0x5aeed85a), C32e(0x41c3fc41), C32e(0x0206f102), C32e(0x4fd11d4f), + C32e(0x5ce4d05c), C32e(0xf407a2f4), C32e(0x345cb934), C32e(0x0818e908), + C32e(0x93aedf93), C32e(0x73954d73), C32e(0x53f5c453), C32e(0x3f41543f), + C32e(0x0c14100c), C32e(0x52f63152), C32e(0x65af8c65), C32e(0x5ee2215e), + C32e(0x28786028), C32e(0xa1f86ea1), C32e(0x0f11140f), C32e(0xb5c45eb5), + C32e(0x091b1c09), C32e(0x365a4836), C32e(0x9bb6369b), C32e(0x3d47a53d), + C32e(0x266a8126), C32e(0x69bb9c69), C32e(0xcd4cfecd), C32e(0x9fbacf9f), + C32e(0x1b2d241b), C32e(0x9eb93a9e), C32e(0x749cb074), C32e(0x2e72682e), + C32e(0x2d776c2d), C32e(0xb2cda3b2), C32e(0xee2973ee), C32e(0xfb16b6fb), + C32e(0xf60153f6), C32e(0x4dd7ec4d), C32e(0x61a37561), C32e(0xce49face), + C32e(0x7b8da47b), C32e(0x3e42a13e), C32e(0x7193bc71), C32e(0x97a22697), + C32e(0xf50457f5), C32e(0x68b86968), C32e(0x00000000), C32e(0x2c74992c), + C32e(0x60a08060), C32e(0x1f21dd1f), C32e(0xc843f2c8), C32e(0xed2c77ed), + C32e(0xbed9b3be), C32e(0x46ca0146), C32e(0xd970ced9), C32e(0x4bdde44b), + C32e(0xde7933de), C32e(0xd4672bd4), C32e(0xe8237be8), C32e(0x4ade114a), + C32e(0x6bbd6d6b), C32e(0x2a7e912a), C32e(0xe5349ee5), C32e(0x163ac116), + C32e(0xc55417c5), C32e(0xd7622fd7), C32e(0x55ffcc55), C32e(0x94a72294), + C32e(0xcf4a0fcf), C32e(0x1030c910), C32e(0x060a0806), C32e(0x8198e781), + C32e(0xf00b5bf0), C32e(0x44ccf044), C32e(0xbad54aba), C32e(0xe33e96e3), + C32e(0xf30e5ff3), C32e(0xfe19bafe), C32e(0xc05b1bc0), C32e(0x8a850a8a), + C32e(0xadec7ead), C32e(0xbcdf42bc), C32e(0x48d8e048), C32e(0x040cf904), + C32e(0xdf7ac6df), C32e(0xc158eec1), C32e(0x759f4575), C32e(0x63a58463), + C32e(0x30504030), C32e(0x1a2ed11a), C32e(0x0e12e10e), C32e(0x6db7656d), + C32e(0x4cd4194c), C32e(0x143c3014), C32e(0x355f4c35), C32e(0x2f719d2f), + C32e(0xe13867e1), C32e(0xa2fd6aa2), C32e(0xcc4f0bcc), C32e(0x394b5c39), + C32e(0x57f93d57), C32e(0xf20daaf2), C32e(0x829de382), C32e(0x47c9f447), + C32e(0xacef8bac), C32e(0xe7326fe7), C32e(0x2b7d642b), C32e(0x95a4d795), + C32e(0xa0fb9ba0), C32e(0x98b33298), C32e(0xd16827d1), C32e(0x7f815d7f), + C32e(0x66aa8866), C32e(0x7e82a87e), C32e(0xabe676ab), C32e(0x839e1683), + C32e(0xca4503ca), C32e(0x297b9529), C32e(0xd36ed6d3), C32e(0x3c44503c), + C32e(0x798b5579), C32e(0xe23d63e2), C32e(0x1d272c1d), C32e(0x769a4176), + C32e(0x3b4dad3b), C32e(0x56fac856), C32e(0x4ed2e84e), C32e(0x1e22281e), + C32e(0xdb763fdb), C32e(0x0a1e180a), C32e(0x6cb4906c), C32e(0xe4376be4), + C32e(0x5de7255d), C32e(0x6eb2616e), C32e(0xef2a86ef), C32e(0xa6f193a6), + C32e(0xa8e372a8), C32e(0xa4f762a4), C32e(0x3759bd37), C32e(0x8b86ff8b), + C32e(0x3256b132), C32e(0x43c50d43), C32e(0x59ebdc59), C32e(0xb7c2afb7), + C32e(0x8c8f028c), C32e(0x64ac7964), C32e(0xd26d23d2), C32e(0xe03b92e0), + C32e(0xb4c7abb4), C32e(0xfa1543fa), C32e(0x0709fd07), C32e(0x256f8525), + C32e(0xafea8faf), C32e(0x8e89f38e), C32e(0xe9208ee9), C32e(0x18282018), + C32e(0xd564ded5), C32e(0x8883fb88), C32e(0x6fb1946f), C32e(0x7296b872), + C32e(0x246c7024), C32e(0xf108aef1), C32e(0xc752e6c7), C32e(0x51f33551), + C32e(0x23658d23), C32e(0x7c84597c), C32e(0x9cbfcb9c), C32e(0x21637c21), + C32e(0xdd7c37dd), C32e(0xdc7fc2dc), C32e(0x86911a86), C32e(0x85941e85), + C32e(0x90abdb90), C32e(0x42c6f842), C32e(0xc457e2c4), C32e(0xaae583aa), + C32e(0xd8733bd8), C32e(0x050f0c05), C32e(0x0103f501), C32e(0x12363812), + C32e(0xa3fe9fa3), C32e(0x5fe1d45f), C32e(0xf91047f9), C32e(0xd06bd2d0), + C32e(0x91a82e91), C32e(0x58e82958), C32e(0x27697427), C32e(0xb9d04eb9), + C32e(0x3848a938), C32e(0x1335cd13), C32e(0xb3ce56b3), C32e(0x33554433), + C32e(0xbbd6bfbb), C32e(0x70904970), C32e(0x89800e89), C32e(0xa7f266a7), + C32e(0xb6c15ab6), C32e(0x22667822), C32e(0x92ad2a92), C32e(0x20608920), + C32e(0x49db1549), C32e(0xff1a4fff), C32e(0x7888a078), C32e(0x7a8e517a), + C32e(0x8f8a068f), C32e(0xf813b2f8), C32e(0x809b1280), C32e(0x17393417), + C32e(0xda75cada), C32e(0x3153b531), C32e(0xc65113c6), C32e(0xb8d3bbb8), + C32e(0xc35e1fc3), C32e(0xb0cb52b0), C32e(0x7799b477), C32e(0x11333c11), + C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a) +}; + +static const uint32_t T2up_cpu[] = { + C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a), + C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d), + C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1), + C32e(0x19e7e7cc), C32e(0x62b5b513), C32e(0xe64d4d7c), C32e(0x9aecec59), + C32e(0x458f8f40), C32e(0x9d1f1fa3), C32e(0x40898949), C32e(0x87fafa68), + C32e(0x15efefd0), C32e(0xebb2b294), C32e(0xc98e8ece), C32e(0x0bfbfbe6), + C32e(0xec41416e), C32e(0x67b3b31a), C32e(0xfd5f5f43), C32e(0xea454560), + C32e(0xbf2323f9), C32e(0xf7535351), C32e(0x96e4e445), C32e(0x5b9b9b76), + C32e(0xc2757528), C32e(0x1ce1e1c5), C32e(0xae3d3dd4), C32e(0x6a4c4cf2), + C32e(0x5a6c6c82), C32e(0x417e7ebd), C32e(0x02f5f5f3), C32e(0x4f838352), + C32e(0x5c68688c), C32e(0xf4515156), C32e(0x34d1d18d), C32e(0x08f9f9e1), + C32e(0x93e2e24c), C32e(0x73abab3e), C32e(0x53626297), C32e(0x3f2a2a6b), + C32e(0x0c08081c), C32e(0x52959563), C32e(0x654646e9), C32e(0x5e9d9d7f), + C32e(0x28303048), C32e(0xa13737cf), C32e(0x0f0a0a1b), C32e(0xb52f2feb), + C32e(0x090e0e15), C32e(0x3624247e), C32e(0x9b1b1bad), C32e(0x3ddfdf98), + C32e(0x26cdcda7), C32e(0x694e4ef5), C32e(0xcd7f7f33), C32e(0x9feaea50), + C32e(0x1b12123f), C32e(0x9e1d1da4), C32e(0x745858c4), C32e(0x2e343446), + C32e(0x2d363641), C32e(0xb2dcdc11), C32e(0xeeb4b49d), C32e(0xfb5b5b4d), + C32e(0xf6a4a4a5), C32e(0x4d7676a1), C32e(0x61b7b714), C32e(0xce7d7d34), + C32e(0x7b5252df), C32e(0x3edddd9f), C32e(0x715e5ecd), C32e(0x971313b1), + C32e(0xf5a6a6a2), C32e(0x68b9b901), C32e(0x00000000), C32e(0x2cc1c1b5), + C32e(0x604040e0), C32e(0x1fe3e3c2), C32e(0xc879793a), C32e(0xedb6b69a), + C32e(0xbed4d40d), C32e(0x468d8d47), C32e(0xd9676717), C32e(0x4b7272af), + C32e(0xde9494ed), C32e(0xd49898ff), C32e(0xe8b0b093), C32e(0x4a85855b), + C32e(0x6bbbbb06), C32e(0x2ac5c5bb), C32e(0xe54f4f7b), C32e(0x16ededd7), + C32e(0xc58686d2), C32e(0xd79a9af8), C32e(0x55666699), C32e(0x941111b6), + C32e(0xcf8a8ac0), C32e(0x10e9e9d9), C32e(0x0604040e), C32e(0x81fefe66), + C32e(0xf0a0a0ab), C32e(0x447878b4), C32e(0xba2525f0), C32e(0xe34b4b75), + C32e(0xf3a2a2ac), C32e(0xfe5d5d44), C32e(0xc08080db), C32e(0x8a050580), + C32e(0xad3f3fd3), C32e(0xbc2121fe), C32e(0x487070a8), C32e(0x04f1f1fd), + C32e(0xdf636319), C32e(0xc177772f), C32e(0x75afaf30), C32e(0x634242e7), + C32e(0x30202070), C32e(0x1ae5e5cb), C32e(0x0efdfdef), C32e(0x6dbfbf08), + C32e(0x4c818155), C32e(0x14181824), C32e(0x35262679), C32e(0x2fc3c3b2), + C32e(0xe1bebe86), C32e(0xa23535c8), C32e(0xcc8888c7), C32e(0x392e2e65), + C32e(0x5793936a), C32e(0xf2555558), C32e(0x82fcfc61), C32e(0x477a7ab3), + C32e(0xacc8c827), C32e(0xe7baba88), C32e(0x2b32324f), C32e(0x95e6e642), + C32e(0xa0c0c03b), C32e(0x981919aa), C32e(0xd19e9ef6), C32e(0x7fa3a322), + C32e(0x664444ee), C32e(0x7e5454d6), C32e(0xab3b3bdd), C32e(0x830b0b95), + C32e(0xca8c8cc9), C32e(0x29c7c7bc), C32e(0xd36b6b05), C32e(0x3c28286c), + C32e(0x79a7a72c), C32e(0xe2bcbc81), C32e(0x1d161631), C32e(0x76adad37), + C32e(0x3bdbdb96), C32e(0x5664649e), C32e(0x4e7474a6), C32e(0x1e141436), + C32e(0xdb9292e4), C32e(0x0a0c0c12), C32e(0x6c4848fc), C32e(0xe4b8b88f), + C32e(0x5d9f9f78), C32e(0x6ebdbd0f), C32e(0xef434369), C32e(0xa6c4c435), + C32e(0xa83939da), C32e(0xa43131c6), C32e(0x37d3d38a), C32e(0x8bf2f274), + C32e(0x32d5d583), C32e(0x438b8b4e), C32e(0x596e6e85), C32e(0xb7dada18), + C32e(0x8c01018e), C32e(0x64b1b11d), C32e(0xd29c9cf1), C32e(0xe0494972), + C32e(0xb4d8d81f), C32e(0xfaacacb9), C32e(0x07f3f3fa), C32e(0x25cfcfa0), + C32e(0xafcaca20), C32e(0x8ef4f47d), C32e(0xe9474767), C32e(0x18101038), + C32e(0xd56f6f0b), C32e(0x88f0f073), C32e(0x6f4a4afb), C32e(0x725c5cca), + C32e(0x24383854), C32e(0xf157575f), C32e(0xc7737321), C32e(0x51979764), + C32e(0x23cbcbae), C32e(0x7ca1a125), C32e(0x9ce8e857), C32e(0x213e3e5d), + C32e(0xdd9696ea), C32e(0xdc61611e), C32e(0x860d0d9c), C32e(0x850f0f9b), + C32e(0x90e0e04b), C32e(0x427c7cba), C32e(0xc4717126), C32e(0xaacccc29), + C32e(0xd89090e3), C32e(0x05060609), C32e(0x01f7f7f4), C32e(0x121c1c2a), + C32e(0xa3c2c23c), C32e(0x5f6a6a8b), C32e(0xf9aeaebe), C32e(0xd0696902), + C32e(0x911717bf), C32e(0x58999971), C32e(0x273a3a53), C32e(0xb92727f7), + C32e(0x38d9d991), C32e(0x13ebebde), C32e(0xb32b2be5), C32e(0x33222277), + C32e(0xbbd2d204), C32e(0x70a9a939), C32e(0x89070787), C32e(0xa73333c1), + C32e(0xb62d2dec), C32e(0x223c3c5a), C32e(0x921515b8), C32e(0x20c9c9a9), + C32e(0x4987875c), C32e(0xffaaaab0), C32e(0x785050d8), C32e(0x7aa5a52b), + C32e(0x8f030389), C32e(0xf859594a), C32e(0x80090992), C32e(0x171a1a23), + C32e(0xda656510), C32e(0x31d7d784), C32e(0xc68484d5), C32e(0xb8d0d003), + C32e(0xc38282dc), C32e(0xb02929e2), C32e(0x775a5ac3), C32e(0x111e1e2d), + C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62) +}; + +static const uint32_t T2dn_cpu[] = { + C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7), + C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39), + C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac), + C32e(0x2b192bd5), C32e(0xa662a671), C32e(0x31e6319a), C32e(0xb59ab5c3), + C32e(0xcf45cf05), C32e(0xbc9dbc3e), C32e(0xc040c009), C32e(0x928792ef), + C32e(0x3f153fc5), C32e(0x26eb267f), C32e(0x40c94007), C32e(0x1d0b1ded), + C32e(0x2fec2f82), C32e(0xa967a97d), C32e(0x1cfd1cbe), C32e(0x25ea258a), + C32e(0xdabfda46), C32e(0x02f702a6), C32e(0xa196a1d3), C32e(0xed5bed2d), + C32e(0x5dc25dea), C32e(0x241c24d9), C32e(0xe9aee97a), C32e(0xbe6abe98), + C32e(0xee5aeed8), C32e(0xc341c3fc), C32e(0x060206f1), C32e(0xd14fd11d), + C32e(0xe45ce4d0), C32e(0x07f407a2), C32e(0x5c345cb9), C32e(0x180818e9), + C32e(0xae93aedf), C32e(0x9573954d), C32e(0xf553f5c4), C32e(0x413f4154), + C32e(0x140c1410), C32e(0xf652f631), C32e(0xaf65af8c), C32e(0xe25ee221), + C32e(0x78287860), C32e(0xf8a1f86e), C32e(0x110f1114), C32e(0xc4b5c45e), + C32e(0x1b091b1c), C32e(0x5a365a48), C32e(0xb69bb636), C32e(0x473d47a5), + C32e(0x6a266a81), C32e(0xbb69bb9c), C32e(0x4ccd4cfe), C32e(0xba9fbacf), + C32e(0x2d1b2d24), C32e(0xb99eb93a), C32e(0x9c749cb0), C32e(0x722e7268), + C32e(0x772d776c), C32e(0xcdb2cda3), C32e(0x29ee2973), C32e(0x16fb16b6), + C32e(0x01f60153), C32e(0xd74dd7ec), C32e(0xa361a375), C32e(0x49ce49fa), + C32e(0x8d7b8da4), C32e(0x423e42a1), C32e(0x937193bc), C32e(0xa297a226), + C32e(0x04f50457), C32e(0xb868b869), C32e(0x00000000), C32e(0x742c7499), + C32e(0xa060a080), C32e(0x211f21dd), C32e(0x43c843f2), C32e(0x2ced2c77), + C32e(0xd9bed9b3), C32e(0xca46ca01), C32e(0x70d970ce), C32e(0xdd4bdde4), + C32e(0x79de7933), C32e(0x67d4672b), C32e(0x23e8237b), C32e(0xde4ade11), + C32e(0xbd6bbd6d), C32e(0x7e2a7e91), C32e(0x34e5349e), C32e(0x3a163ac1), + C32e(0x54c55417), C32e(0x62d7622f), C32e(0xff55ffcc), C32e(0xa794a722), + C32e(0x4acf4a0f), C32e(0x301030c9), C32e(0x0a060a08), C32e(0x988198e7), + C32e(0x0bf00b5b), C32e(0xcc44ccf0), C32e(0xd5bad54a), C32e(0x3ee33e96), + C32e(0x0ef30e5f), C32e(0x19fe19ba), C32e(0x5bc05b1b), C32e(0x858a850a), + C32e(0xecadec7e), C32e(0xdfbcdf42), C32e(0xd848d8e0), C32e(0x0c040cf9), + C32e(0x7adf7ac6), C32e(0x58c158ee), C32e(0x9f759f45), C32e(0xa563a584), + C32e(0x50305040), C32e(0x2e1a2ed1), C32e(0x120e12e1), C32e(0xb76db765), + C32e(0xd44cd419), C32e(0x3c143c30), C32e(0x5f355f4c), C32e(0x712f719d), + C32e(0x38e13867), C32e(0xfda2fd6a), C32e(0x4fcc4f0b), C32e(0x4b394b5c), + C32e(0xf957f93d), C32e(0x0df20daa), C32e(0x9d829de3), C32e(0xc947c9f4), + C32e(0xefacef8b), C32e(0x32e7326f), C32e(0x7d2b7d64), C32e(0xa495a4d7), + C32e(0xfba0fb9b), C32e(0xb398b332), C32e(0x68d16827), C32e(0x817f815d), + C32e(0xaa66aa88), C32e(0x827e82a8), C32e(0xe6abe676), C32e(0x9e839e16), + C32e(0x45ca4503), C32e(0x7b297b95), C32e(0x6ed36ed6), C32e(0x443c4450), + C32e(0x8b798b55), C32e(0x3de23d63), C32e(0x271d272c), C32e(0x9a769a41), + C32e(0x4d3b4dad), C32e(0xfa56fac8), C32e(0xd24ed2e8), C32e(0x221e2228), + C32e(0x76db763f), C32e(0x1e0a1e18), C32e(0xb46cb490), C32e(0x37e4376b), + C32e(0xe75de725), C32e(0xb26eb261), C32e(0x2aef2a86), C32e(0xf1a6f193), + C32e(0xe3a8e372), C32e(0xf7a4f762), C32e(0x593759bd), C32e(0x868b86ff), + C32e(0x563256b1), C32e(0xc543c50d), C32e(0xeb59ebdc), C32e(0xc2b7c2af), + C32e(0x8f8c8f02), C32e(0xac64ac79), C32e(0x6dd26d23), C32e(0x3be03b92), + C32e(0xc7b4c7ab), C32e(0x15fa1543), C32e(0x090709fd), C32e(0x6f256f85), + C32e(0xeaafea8f), C32e(0x898e89f3), C32e(0x20e9208e), C32e(0x28182820), + C32e(0x64d564de), C32e(0x838883fb), C32e(0xb16fb194), C32e(0x967296b8), + C32e(0x6c246c70), C32e(0x08f108ae), C32e(0x52c752e6), C32e(0xf351f335), + C32e(0x6523658d), C32e(0x847c8459), C32e(0xbf9cbfcb), C32e(0x6321637c), + C32e(0x7cdd7c37), C32e(0x7fdc7fc2), C32e(0x9186911a), C32e(0x9485941e), + C32e(0xab90abdb), C32e(0xc642c6f8), C32e(0x57c457e2), C32e(0xe5aae583), + C32e(0x73d8733b), C32e(0x0f050f0c), C32e(0x030103f5), C32e(0x36123638), + C32e(0xfea3fe9f), C32e(0xe15fe1d4), C32e(0x10f91047), C32e(0x6bd06bd2), + C32e(0xa891a82e), C32e(0xe858e829), C32e(0x69276974), C32e(0xd0b9d04e), + C32e(0x483848a9), C32e(0x351335cd), C32e(0xceb3ce56), C32e(0x55335544), + C32e(0xd6bbd6bf), C32e(0x90709049), C32e(0x8089800e), C32e(0xf2a7f266), + C32e(0xc1b6c15a), C32e(0x66226678), C32e(0xad92ad2a), C32e(0x60206089), + C32e(0xdb49db15), C32e(0x1aff1a4f), C32e(0x887888a0), C32e(0x8e7a8e51), + C32e(0x8a8f8a06), C32e(0x13f813b2), C32e(0x9b809b12), C32e(0x39173934), + C32e(0x75da75ca), C32e(0x533153b5), C32e(0x51c65113), C32e(0xd3b8d3bb), + C32e(0x5ec35e1f), C32e(0xcbb0cb52), C32e(0x997799b4), C32e(0x3311333c), + C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58) +}; + +static const uint32_t T3up_cpu[] = { + C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6), + C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191), + C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656), + C32e(0xd519e7e7), C32e(0x7162b5b5), C32e(0x9ae64d4d), C32e(0xc39aecec), + C32e(0x05458f8f), C32e(0x3e9d1f1f), C32e(0x09408989), C32e(0xef87fafa), + C32e(0xc515efef), C32e(0x7febb2b2), C32e(0x07c98e8e), C32e(0xed0bfbfb), + C32e(0x82ec4141), C32e(0x7d67b3b3), C32e(0xbefd5f5f), C32e(0x8aea4545), + C32e(0x46bf2323), C32e(0xa6f75353), C32e(0xd396e4e4), C32e(0x2d5b9b9b), + C32e(0xeac27575), C32e(0xd91ce1e1), C32e(0x7aae3d3d), C32e(0x986a4c4c), + C32e(0xd85a6c6c), C32e(0xfc417e7e), C32e(0xf102f5f5), C32e(0x1d4f8383), + C32e(0xd05c6868), C32e(0xa2f45151), C32e(0xb934d1d1), C32e(0xe908f9f9), + C32e(0xdf93e2e2), C32e(0x4d73abab), C32e(0xc4536262), C32e(0x543f2a2a), + C32e(0x100c0808), C32e(0x31529595), C32e(0x8c654646), C32e(0x215e9d9d), + C32e(0x60283030), C32e(0x6ea13737), C32e(0x140f0a0a), C32e(0x5eb52f2f), + C32e(0x1c090e0e), C32e(0x48362424), C32e(0x369b1b1b), C32e(0xa53ddfdf), + C32e(0x8126cdcd), C32e(0x9c694e4e), C32e(0xfecd7f7f), C32e(0xcf9feaea), + C32e(0x241b1212), C32e(0x3a9e1d1d), C32e(0xb0745858), C32e(0x682e3434), + C32e(0x6c2d3636), C32e(0xa3b2dcdc), C32e(0x73eeb4b4), C32e(0xb6fb5b5b), + C32e(0x53f6a4a4), C32e(0xec4d7676), C32e(0x7561b7b7), C32e(0xface7d7d), + C32e(0xa47b5252), C32e(0xa13edddd), C32e(0xbc715e5e), C32e(0x26971313), + C32e(0x57f5a6a6), C32e(0x6968b9b9), C32e(0x00000000), C32e(0x992cc1c1), + C32e(0x80604040), C32e(0xdd1fe3e3), C32e(0xf2c87979), C32e(0x77edb6b6), + C32e(0xb3bed4d4), C32e(0x01468d8d), C32e(0xced96767), C32e(0xe44b7272), + C32e(0x33de9494), C32e(0x2bd49898), C32e(0x7be8b0b0), C32e(0x114a8585), + C32e(0x6d6bbbbb), C32e(0x912ac5c5), C32e(0x9ee54f4f), C32e(0xc116eded), + C32e(0x17c58686), C32e(0x2fd79a9a), C32e(0xcc556666), C32e(0x22941111), + C32e(0x0fcf8a8a), C32e(0xc910e9e9), C32e(0x08060404), C32e(0xe781fefe), + C32e(0x5bf0a0a0), C32e(0xf0447878), C32e(0x4aba2525), C32e(0x96e34b4b), + C32e(0x5ff3a2a2), C32e(0xbafe5d5d), C32e(0x1bc08080), C32e(0x0a8a0505), + C32e(0x7ead3f3f), C32e(0x42bc2121), C32e(0xe0487070), C32e(0xf904f1f1), + C32e(0xc6df6363), C32e(0xeec17777), C32e(0x4575afaf), C32e(0x84634242), + C32e(0x40302020), C32e(0xd11ae5e5), C32e(0xe10efdfd), C32e(0x656dbfbf), + C32e(0x194c8181), C32e(0x30141818), C32e(0x4c352626), C32e(0x9d2fc3c3), + C32e(0x67e1bebe), C32e(0x6aa23535), C32e(0x0bcc8888), C32e(0x5c392e2e), + C32e(0x3d579393), C32e(0xaaf25555), C32e(0xe382fcfc), C32e(0xf4477a7a), + C32e(0x8bacc8c8), C32e(0x6fe7baba), C32e(0x642b3232), C32e(0xd795e6e6), + C32e(0x9ba0c0c0), C32e(0x32981919), C32e(0x27d19e9e), C32e(0x5d7fa3a3), + C32e(0x88664444), C32e(0xa87e5454), C32e(0x76ab3b3b), C32e(0x16830b0b), + C32e(0x03ca8c8c), C32e(0x9529c7c7), C32e(0xd6d36b6b), C32e(0x503c2828), + C32e(0x5579a7a7), C32e(0x63e2bcbc), C32e(0x2c1d1616), C32e(0x4176adad), + C32e(0xad3bdbdb), C32e(0xc8566464), C32e(0xe84e7474), C32e(0x281e1414), + C32e(0x3fdb9292), C32e(0x180a0c0c), C32e(0x906c4848), C32e(0x6be4b8b8), + C32e(0x255d9f9f), C32e(0x616ebdbd), C32e(0x86ef4343), C32e(0x93a6c4c4), + C32e(0x72a83939), C32e(0x62a43131), C32e(0xbd37d3d3), C32e(0xff8bf2f2), + C32e(0xb132d5d5), C32e(0x0d438b8b), C32e(0xdc596e6e), C32e(0xafb7dada), + C32e(0x028c0101), C32e(0x7964b1b1), C32e(0x23d29c9c), C32e(0x92e04949), + C32e(0xabb4d8d8), C32e(0x43faacac), C32e(0xfd07f3f3), C32e(0x8525cfcf), + C32e(0x8fafcaca), C32e(0xf38ef4f4), C32e(0x8ee94747), C32e(0x20181010), + C32e(0xded56f6f), C32e(0xfb88f0f0), C32e(0x946f4a4a), C32e(0xb8725c5c), + C32e(0x70243838), C32e(0xaef15757), C32e(0xe6c77373), C32e(0x35519797), + C32e(0x8d23cbcb), C32e(0x597ca1a1), C32e(0xcb9ce8e8), C32e(0x7c213e3e), + C32e(0x37dd9696), C32e(0xc2dc6161), C32e(0x1a860d0d), C32e(0x1e850f0f), + C32e(0xdb90e0e0), C32e(0xf8427c7c), C32e(0xe2c47171), C32e(0x83aacccc), + C32e(0x3bd89090), C32e(0x0c050606), C32e(0xf501f7f7), C32e(0x38121c1c), + C32e(0x9fa3c2c2), C32e(0xd45f6a6a), C32e(0x47f9aeae), C32e(0xd2d06969), + C32e(0x2e911717), C32e(0x29589999), C32e(0x74273a3a), C32e(0x4eb92727), + C32e(0xa938d9d9), C32e(0xcd13ebeb), C32e(0x56b32b2b), C32e(0x44332222), + C32e(0xbfbbd2d2), C32e(0x4970a9a9), C32e(0x0e890707), C32e(0x66a73333), + C32e(0x5ab62d2d), C32e(0x78223c3c), C32e(0x2a921515), C32e(0x8920c9c9), + C32e(0x15498787), C32e(0x4fffaaaa), C32e(0xa0785050), C32e(0x517aa5a5), + C32e(0x068f0303), C32e(0xb2f85959), C32e(0x12800909), C32e(0x34171a1a), + C32e(0xcada6565), C32e(0xb531d7d7), C32e(0x13c68484), C32e(0xbbb8d0d0), + C32e(0x1fc38282), C32e(0x52b02929), C32e(0xb4775a5a), C32e(0x3c111e1e), + C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c) +}; + +static const uint32_t T3dn_cpu[] = { + C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c), + C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc), + C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87), + C32e(0xcc2b192b), C32e(0x13a662a6), C32e(0x7c31e631), C32e(0x59b59ab5), + C32e(0x40cf45cf), C32e(0xa3bc9dbc), C32e(0x49c040c0), C32e(0x68928792), + C32e(0xd03f153f), C32e(0x9426eb26), C32e(0xce40c940), C32e(0xe61d0b1d), + C32e(0x6e2fec2f), C32e(0x1aa967a9), C32e(0x431cfd1c), C32e(0x6025ea25), + C32e(0xf9dabfda), C32e(0x5102f702), C32e(0x45a196a1), C32e(0x76ed5bed), + C32e(0x285dc25d), C32e(0xc5241c24), C32e(0xd4e9aee9), C32e(0xf2be6abe), + C32e(0x82ee5aee), C32e(0xbdc341c3), C32e(0xf3060206), C32e(0x52d14fd1), + C32e(0x8ce45ce4), C32e(0x5607f407), C32e(0x8d5c345c), C32e(0xe1180818), + C32e(0x4cae93ae), C32e(0x3e957395), C32e(0x97f553f5), C32e(0x6b413f41), + C32e(0x1c140c14), C32e(0x63f652f6), C32e(0xe9af65af), C32e(0x7fe25ee2), + C32e(0x48782878), C32e(0xcff8a1f8), C32e(0x1b110f11), C32e(0xebc4b5c4), + C32e(0x151b091b), C32e(0x7e5a365a), C32e(0xadb69bb6), C32e(0x98473d47), + C32e(0xa76a266a), C32e(0xf5bb69bb), C32e(0x334ccd4c), C32e(0x50ba9fba), + C32e(0x3f2d1b2d), C32e(0xa4b99eb9), C32e(0xc49c749c), C32e(0x46722e72), + C32e(0x41772d77), C32e(0x11cdb2cd), C32e(0x9d29ee29), C32e(0x4d16fb16), + C32e(0xa501f601), C32e(0xa1d74dd7), C32e(0x14a361a3), C32e(0x3449ce49), + C32e(0xdf8d7b8d), C32e(0x9f423e42), C32e(0xcd937193), C32e(0xb1a297a2), + C32e(0xa204f504), C32e(0x01b868b8), C32e(0x00000000), C32e(0xb5742c74), + C32e(0xe0a060a0), C32e(0xc2211f21), C32e(0x3a43c843), C32e(0x9a2ced2c), + C32e(0x0dd9bed9), C32e(0x47ca46ca), C32e(0x1770d970), C32e(0xafdd4bdd), + C32e(0xed79de79), C32e(0xff67d467), C32e(0x9323e823), C32e(0x5bde4ade), + C32e(0x06bd6bbd), C32e(0xbb7e2a7e), C32e(0x7b34e534), C32e(0xd73a163a), + C32e(0xd254c554), C32e(0xf862d762), C32e(0x99ff55ff), C32e(0xb6a794a7), + C32e(0xc04acf4a), C32e(0xd9301030), C32e(0x0e0a060a), C32e(0x66988198), + C32e(0xab0bf00b), C32e(0xb4cc44cc), C32e(0xf0d5bad5), C32e(0x753ee33e), + C32e(0xac0ef30e), C32e(0x4419fe19), C32e(0xdb5bc05b), C32e(0x80858a85), + C32e(0xd3ecadec), C32e(0xfedfbcdf), C32e(0xa8d848d8), C32e(0xfd0c040c), + C32e(0x197adf7a), C32e(0x2f58c158), C32e(0x309f759f), C32e(0xe7a563a5), + C32e(0x70503050), C32e(0xcb2e1a2e), C32e(0xef120e12), C32e(0x08b76db7), + C32e(0x55d44cd4), C32e(0x243c143c), C32e(0x795f355f), C32e(0xb2712f71), + C32e(0x8638e138), C32e(0xc8fda2fd), C32e(0xc74fcc4f), C32e(0x654b394b), + C32e(0x6af957f9), C32e(0x580df20d), C32e(0x619d829d), C32e(0xb3c947c9), + C32e(0x27efacef), C32e(0x8832e732), C32e(0x4f7d2b7d), C32e(0x42a495a4), + C32e(0x3bfba0fb), C32e(0xaab398b3), C32e(0xf668d168), C32e(0x22817f81), + C32e(0xeeaa66aa), C32e(0xd6827e82), C32e(0xdde6abe6), C32e(0x959e839e), + C32e(0xc945ca45), C32e(0xbc7b297b), C32e(0x056ed36e), C32e(0x6c443c44), + C32e(0x2c8b798b), C32e(0x813de23d), C32e(0x31271d27), C32e(0x379a769a), + C32e(0x964d3b4d), C32e(0x9efa56fa), C32e(0xa6d24ed2), C32e(0x36221e22), + C32e(0xe476db76), C32e(0x121e0a1e), C32e(0xfcb46cb4), C32e(0x8f37e437), + C32e(0x78e75de7), C32e(0x0fb26eb2), C32e(0x692aef2a), C32e(0x35f1a6f1), + C32e(0xdae3a8e3), C32e(0xc6f7a4f7), C32e(0x8a593759), C32e(0x74868b86), + C32e(0x83563256), C32e(0x4ec543c5), C32e(0x85eb59eb), C32e(0x18c2b7c2), + C32e(0x8e8f8c8f), C32e(0x1dac64ac), C32e(0xf16dd26d), C32e(0x723be03b), + C32e(0x1fc7b4c7), C32e(0xb915fa15), C32e(0xfa090709), C32e(0xa06f256f), + C32e(0x20eaafea), C32e(0x7d898e89), C32e(0x6720e920), C32e(0x38281828), + C32e(0x0b64d564), C32e(0x73838883), C32e(0xfbb16fb1), C32e(0xca967296), + C32e(0x546c246c), C32e(0x5f08f108), C32e(0x2152c752), C32e(0x64f351f3), + C32e(0xae652365), C32e(0x25847c84), C32e(0x57bf9cbf), C32e(0x5d632163), + C32e(0xea7cdd7c), C32e(0x1e7fdc7f), C32e(0x9c918691), C32e(0x9b948594), + C32e(0x4bab90ab), C32e(0xbac642c6), C32e(0x2657c457), C32e(0x29e5aae5), + C32e(0xe373d873), C32e(0x090f050f), C32e(0xf4030103), C32e(0x2a361236), + C32e(0x3cfea3fe), C32e(0x8be15fe1), C32e(0xbe10f910), C32e(0x026bd06b), + C32e(0xbfa891a8), C32e(0x71e858e8), C32e(0x53692769), C32e(0xf7d0b9d0), + C32e(0x91483848), C32e(0xde351335), C32e(0xe5ceb3ce), C32e(0x77553355), + C32e(0x04d6bbd6), C32e(0x39907090), C32e(0x87808980), C32e(0xc1f2a7f2), + C32e(0xecc1b6c1), C32e(0x5a662266), C32e(0xb8ad92ad), C32e(0xa9602060), + C32e(0x5cdb49db), C32e(0xb01aff1a), C32e(0xd8887888), C32e(0x2b8e7a8e), + C32e(0x898a8f8a), C32e(0x4a13f813), C32e(0x929b809b), C32e(0x23391739), + C32e(0x1075da75), C32e(0x84533153), C32e(0xd551c651), C32e(0x03d3b8d3), + C32e(0xdc5ec35e), C32e(0xe2cbb0cb), C32e(0xc3997799), C32e(0x2d331133), + C32e(0x3d46cb46), C32e(0xb71ffc1f), C32e(0x0c61d661), C32e(0x624e3a4e) +}; + +__device__ void groestl512_perm_P(uint32_t *a) +{ + uint32_t t[32]; + +//#pragma unroll 14 + for(int r=0;r<14;r++) + { +#pragma unroll 16 + for(int k=0;k<16;k++) + { + a[(k*2)+0] ^= PC32up(k * 0x10, r); + //a[(k<<1)+1] ^= PC32dn(k * 0x10, r); + } + + // RBTT +#pragma unroll 16 + for(int k=0;k<32;k+=2) + { + t[k + 0] = T0up( B32_0(a[k & 0x1f]) ) ^ + T1up( B32_1(a[(k + 2) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 4) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 6) & 0x1f]) ) ^ + T0dn( B32_0(a[(k + 9) & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 11) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 13) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 23) & 0x1f]) ); + + t[k + 1] = T0dn( B32_0(a[k & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 2) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 4) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 6) & 0x1f]) ) ^ + T0up( B32_0(a[(k + 9) & 0x1f]) ) ^ + T1up( B32_1(a[(k + 11) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 13) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 23) & 0x1f]) ); + } +#pragma unroll 32 + for(int k=0;k<32;k++) + a[k] = t[k]; + } +} + +__device__ void groestl512_perm_Q(uint32_t *a) +{ +//#pragma unroll 14 + for(int r=0;r<14;r++) + { + uint32_t t[32]; + +#pragma unroll 16 + for(int k=0;k<16;k++) + { + a[(k*2)+0] ^= QC32up(k * 0x10, r); + a[(k*2)+1] ^= QC32dn(k * 0x10, r); + } + + // RBTT +#pragma unroll 16 + for(int k=0;k<32;k+=2) + { + t[k + 0] = T0up( B32_0(a[(k + 2) & 0x1f]) ) ^ + T1up( B32_1(a[(k + 6) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 10) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 22) & 0x1f]) ) ^ + T0dn( B32_0(a[(k + 1) & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 5) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 9) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 13) & 0x1f]) ); + + t[k + 1] = T0dn( B32_0(a[(k + 2) & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 6) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 10) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 22) & 0x1f]) ) ^ + T0up( B32_0(a[(k + 1) & 0x1f]) ) ^ + T1up( B32_1(a[(k + 5) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 9) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 13) & 0x1f]) ); + } +#pragma unroll 32 + for(int k=0;k<32;k++) + a[k] = t[k]; + } +} + +__global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { +#if USE_SHARED + extern __shared__ unsigned char s[]; + uint32_t offset = W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x; + uint32_t *message = (uint32_t*)(&s[offset + 0]); // 128 Byte + uint32_t *state = (uint32_t*)(&s[offset + 128]); // 128 Byte +#else + uint32_t message[32]; + uint32_t state[32]; +#endif + + // lese message ein & verknüpfe diese mit dem hash1 von hefty1 + // lese den state ein + +#pragma unroll 32 + for(int k=0;k<32;k++) + { + state[k] = groestl_gpu_state[k]; + message[k] = groestl_gpu_msg[k]; + } + + uint32_t nounce = nonceVector[thread]; + // nounce setzen + //message[19] = startNounce + thread; + message[19] = nounce; + + uint32_t hashPosition = nounce - startNounce; + + // den richtigen Hefty1 Hash holen +// memcpy(&message[21], &heftyHashes[8 * hashPosition], sizeof(uint32_t) * 8); + uint32_t *heftyHash = &heftyHashes[8 * hashPosition]; +#pragma unroll 8 + for (int k=0; k<8; ++k) + message[21+k] = heftyHash[k]; + + uint32_t g[32]; +#pragma unroll 32 + for(int u=0;u<32;u++) + g[u] = message[u] ^ state[u]; + + // Perm + groestl512_perm_P(g); + groestl512_perm_Q(message); + +#pragma unroll 32 + for(int u=0;u<32;u++) + { + state[u] ^= g[u] ^ message[u]; + g[u] = state[u]; + } + + groestl512_perm_P(g); + +#pragma unroll 32 + for(int u=0;u<32;u++) + state[u] ^= g[u]; + + // kopiere Ergebnis +#pragma unroll 16 + for(int k=0;k<16;k++) + ((uint32_t*)outputHash)[16*hashPosition+k] = state[k + 16]; + } +} + +#define texDef(texname, texmem, texsource, texsize) \ + unsigned int *texmem; \ + cudaMalloc(&texmem, texsize); \ + cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ + texname.normalized = 0; \ + texname.filterMode = cudaFilterModePoint; \ + texname.addressMode[0] = cudaAddressModeClamp; \ + { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); \ + cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ + +// Setup-Funktionen +__host__ void groestl512_cpu_init(int thr_id, int threads) +{ + // Texturen mit obigem Makro initialisieren + texDef(t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256); + texDef(t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256); + texDef(t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256); + texDef(t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256); + texDef(t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256); + texDef(t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256); + texDef(t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256); + texDef(t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); + + // Speicher für alle Ergebnisse belegen + cudaMalloc(&d_hash4output[thr_id], 16 * sizeof(uint32_t) * threads); +} + +__host__ void groestl512_cpu_setBlock(void *data) + // data muss 84-Byte haben! + // heftyHash hat 32-Byte +{ + // Nachricht expandieren und setzen + uint32_t msgBlock[32]; + + memset(msgBlock, 0, sizeof(uint32_t) * 32); + memcpy(&msgBlock[0], data, 84); + + // Erweitere die Nachricht auf den Nachrichtenblock (padding) + // Unsere Nachricht hat 116 Byte + msgBlock[29] = 0x80; + msgBlock[31] = 0x01000000; + + // groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird + // auf der GPU ausgeführt) + + // setze register + uint32_t groestl_state_init[32]; + memset(groestl_state_init, 0, sizeof(uint32_t) * 32); + groestl_state_init[31] = 0x20000; + + // state speichern + cudaMemcpyToSymbol( groestl_gpu_state, + groestl_state_init, + 128); + + // Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch) + cudaMemcpyToSymbol( groestl_gpu_msg, + msgBlock, + 128); +} + +__host__ void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy) +{ + // Hefty1 Hashes kopieren (eigentlich nur zum debuggen) + if (copy) + cudaMemcpy( d_heftyHashes[thr_id], heftyHashes, 8 * sizeof(uint32_t) * threads, cudaMemcpyHostToDevice ); +} + +__host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce) +{ + const int threadsperblock = 128; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) +#if USE_SHARED + size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte +#else + size_t shared_size = 0; +#endif + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + + groestl512_gpu_hash<<>>(threads, startNounce, d_hash4output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]); +} diff --git a/cuda_groestl512.h b/cuda_groestl512.h new file mode 100644 index 0000000..bbeee40 --- /dev/null +++ b/cuda_groestl512.h @@ -0,0 +1,9 @@ +#ifndef _CUDA_GROESTL512_H +#define _CUDA_GROESTL512_H + +void groestl512_cpu_init(int thr_id, int threads); +void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy); +void groestl512_cpu_setBlock(void *data); +void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce); + +#endif \ No newline at end of file diff --git a/cuda_hefty1.cu b/cuda_hefty1.cu new file mode 100644 index 0000000..c90d15d --- /dev/null +++ b/cuda_hefty1.cu @@ -0,0 +1,401 @@ +/* Diese Funktion ist auf 84-Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +#define USE_SHARED 0 +#define W_ALIGNMENT 65 + +// Folgende Definitionen später durch header ersetzen +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; + +// globaler Speicher für alle HeftyHashes aller Threads +uint32_t *d_heftyHashes[8]; + +/* Hash-Tabellen */ +__constant__ uint32_t hefty_gpu_constantTable[64]; + +// muss expandiert werden +__constant__ uint32_t hefty_gpu_blockHeader[16]; // 2x512 Bit Message +__constant__ uint32_t hefty_gpu_register[8]; +__constant__ uint32_t hefty_gpu_sponge[4]; + +uint32_t hefty_cpu_hashTable[] = { 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL }; +uint32_t hefty_cpu_constantTable[] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define R(x, n) ((x) >> (n)) +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define S0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) +#define S1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) +#define s0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) +#define s1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) + +#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) + +// uint8_t +#define smoosh4(x) ( ((x)>>4) ^ ((x) & 0x0F) ) +__host__ __forceinline__ __device__ uint8_t smoosh2(uint32_t x) +{ + uint16_t w = (x >> 16) ^ (x & 0xffff); + uint8_t n = smoosh4( (uint8_t)( (w >> 8) ^ (w & 0xFF) ) ); + return (n >> 2) ^ (n & 0x03); +} +// 4 auf einmal +#define smoosh4Quad(x) ( (((x)>>4) ^ (x)) & 0x0F0F0F0F ) +#define getByte(x,y) ( ((x) >> (y)) & 0xFF ) + +__host__ __device__ void Mangle(uint32_t *inp) +{ + uint32_t r = smoosh4Quad(inp[0]); + //uint8_t r0 = smoosh4( (uint8_t)(inp[0] >> 24) ); + //uint8_t r1 = smoosh4( (uint8_t)(inp[0] >> 16) ); + //uint8_t r2 = smoosh4( (uint8_t)(inp[0] >> 8) ); + //uint8_t r3 = smoosh4( (uint8_t)(inp[0] & 0xFF) ); + + inp[1] = inp[1] ^ S(inp[0], getByte(r, 24)); + + switch (smoosh2(inp[1])) { + case 0: inp[2] ^= S(inp[0], 1 + getByte(r,24)); break; + case 1: inp[2] += S(~inp[0], 1 + getByte(r,16)); break; + case 2: inp[2] &= S(~inp[0], 1 + getByte(r,8)); break; + case 3: inp[2] ^= S(inp[0], 1 + getByte(r,0)); break; + } + + uint32_t tmp = smoosh2(inp[1] ^ inp[2]); + switch (tmp) { + case 0: inp[3] ^= S(inp[0], 2 + getByte(r,24)); break; + case 1: inp[3] += S(~inp[0], 2 + getByte(r,16)); break; + case 2: inp[3] &= S(~inp[0], 2 + getByte(r,8)); break; + case 3: inp[3] ^= S(inp[0], 2 + getByte(r,0)); break; + } + + inp[0] ^= (inp[1] ^ inp[2]) + inp[3]; +} + +__host__ __forceinline__ __device__ void Absorb(uint32_t *inp, uint32_t x) +{ + inp[0] ^= x; + Mangle(inp); +} + +__host__ __forceinline__ __device__ uint32_t Squeeze(uint32_t *inp) +{ + uint32_t y = inp[0]; + Mangle(inp); + return y; +} + +__host__ __forceinline__ __device__ uint32_t Br(uint32_t *sponge, uint32_t x) +{ + uint32_t r = Squeeze(sponge); + + //uint8_t r0 = r >> 8; + uint8_t r1 = r & 0xFF; + uint32_t y = 1 << ((r >> 8) & 0x1F); + + //uint32_t retVal; + //retVal = x; + + uint32_t resArr[4]; + resArr[0] = x; + resArr[1] = x & ~y; + resArr[2] = x | y; + resArr[3] = x ^ y; + return resArr[r1 & 0x03]; + + /* + switch(r1 & 0x03) + { + case 0: + break; + case 1: + retVal = x & ~y; + break; + case 2: + retVal = x | y; + break; + case 3: + retVal = x ^ y; + break; + } + return retVal; + */ +} + +__forceinline__ __device__ void hefty_gpu_round(uint32_t *regs, uint32_t W, uint32_t K, uint32_t *sponge) +{ + uint32_t tmpBr; + + uint32_t brG = Br(sponge, regs[6]); + uint32_t brF = Br(sponge, regs[5]); + uint32_t tmp1 = Ch(regs[4], brF, brG) + regs[7] + W + K; + uint32_t brE = Br(sponge, regs[4]); + uint32_t tmp2 = tmp1 + S1(brE); + uint32_t brC = Br(sponge, regs[2]); + uint32_t brB = Br(sponge, regs[1]); + uint32_t brA = Br(sponge, regs[0]); + uint32_t tmp3 = Maj(brA, brB, brC); + tmpBr = Br(sponge, regs[0]); + uint32_t tmp4 = tmp3 + S0(tmpBr); + tmpBr = Br(sponge, tmp2); + + #pragma unroll 7 + for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; + regs[0] = tmp2 + tmp4; + regs[4] += tmpBr; +} + +__host__ void hefty_cpu_round(uint32_t *regs, uint32_t W, uint32_t K, uint32_t *sponge) +{ + uint32_t tmpBr; + + uint32_t brG = Br(sponge, regs[6]); + uint32_t brF = Br(sponge, regs[5]); + uint32_t tmp1 = Ch(regs[4], brF, brG) + regs[7] + W + K; + uint32_t brE = Br(sponge, regs[4]); + uint32_t tmp2 = tmp1 + S1(brE); + uint32_t brC = Br(sponge, regs[2]); + uint32_t brB = Br(sponge, regs[1]); + uint32_t brA = Br(sponge, regs[0]); + uint32_t tmp3 = Maj(brA, brB, brC); + tmpBr = Br(sponge, regs[0]); + uint32_t tmp4 = tmp3 + S0(tmpBr); + tmpBr = Br(sponge, tmp2); + + for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; + regs[0] = tmp2 + tmp4; + regs[4] += tmpBr; +} + +// Die Hash-Funktion +__global__ void hefty_gpu_hash(int threads, uint32_t startNounce, void *outputHash) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + // bestimme den aktuellen Zähler + uint32_t nounce = startNounce + thread; + + // jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory +#if USE_SHARED + extern __shared__ unsigned char s[]; + uint32_t *W = (uint32_t *)(&s[W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x]); +#else + // reduktion von 256 byte auf 128 byte + uint32_t W1[16]; + uint32_t W2[16]; +#endif + + // Initialisiere die register a bis h mit der Hash-Tabelle + uint32_t regs[8]; + uint32_t hash[8]; + uint32_t sponge[4]; + +#pragma unroll 4 + for(int k=0; k < 4; k++) + sponge[k] = hefty_gpu_sponge[k]; + + // pre +#pragma unroll 8 + for (int k=0; k < 8; k++) + { + regs[k] = hefty_gpu_register[k]; + hash[k] = regs[k]; + } + + //memcpy(W, &hefty_gpu_blockHeader[0], sizeof(uint32_t) * 16); // verbleibende 20 bytes aus Block 2 plus padding +#pragma unroll 16 + for(int k=0;k<16;k++) + W1[k] = hefty_gpu_blockHeader[k]; + W1[3] = SWAB32(nounce); + + + // 2. Runde +#pragma unroll 16 + for(int j=0;j<16;j++) + Absorb(sponge, W1[j] ^ hefty_gpu_constantTable[j]); + +// Progress W1 (Bytes 0...63) +#pragma unroll 16 + for(int j=0;j<16;j++) + { + Absorb(sponge, regs[3] ^ regs[7]); + hefty_gpu_round(regs, W1[j], hefty_gpu_constantTable[j], sponge); + } + +// Progress W2 (Bytes 64...127) then W3 (Bytes 128...191) ... + +#pragma unroll 3 + for(int k=0;k<3;k++) + { + #pragma unroll 2 + for(int j=0;j<2;j++) + W2[j] = s1(W1[14+j]) + W1[9+j] + s0(W1[1+j]) + W1[j]; + #pragma unroll 5 + for(int j=2;j<7;j++) + W2[j] = s1(W2[j-2]) + W1[9+j] + s0(W1[1+j]) + W1[j]; + + #pragma unroll 8 + for(int j=7;j<15;j++) + W2[j] = s1(W2[j-2]) + W2[j-7] + s0(W1[1+j]) + W1[j]; + + W2[15] = s1(W2[13]) + W2[8] + s0(W2[0]) + W1[15]; + + #pragma unroll 16 + for(int j=0;j<16;j++) + { + Absorb(sponge, regs[3] + regs[7]); + hefty_gpu_round(regs, W2[j], hefty_gpu_constantTable[j + 16 * (k+1)], sponge); + } + #pragma unroll 16 + for(int j=0;j<16;j++) + W1[j] = W2[j]; + } + + +#pragma unroll 8 + for(int k=0;k<8;k++) + hash[k] += regs[k]; + +#pragma unroll 8 + for(int k=0;k<8;k++) + ((uint32_t*)outputHash)[8*thread+k] = SWAB32(hash[k]); + } +} + +// Setup-Funktionen +__host__ void hefty_cpu_init(int thr_id, int threads) +{ + cudaSetDevice(thr_id); + + // Kopiere die Hash-Tabellen in den GPU-Speicher + cudaMemcpyToSymbol( hefty_gpu_constantTable, + hefty_cpu_constantTable, + sizeof(uint32_t) * 64 ); + + // Speicher für alle Hefty1 hashes belegen + cudaMalloc(&d_heftyHashes[thr_id], 8 * sizeof(uint32_t) * threads); +} + +__host__ void hefty_cpu_setBlock(int thr_id, int threads, void *data) + // data muss 84-Byte haben! +{ + // Nachricht expandieren und setzen + uint32_t msgBlock[32]; + + memset(msgBlock, 0, sizeof(uint32_t) * 32); + memcpy(&msgBlock[0], data, 84); + msgBlock[21] |= 0x80; + msgBlock[31] = 672; // bitlen + + for(int i=0;i<31;i++) // Byteorder drehen + msgBlock[i] = SWAB32(msgBlock[i]); + + // die erste Runde wird auf der CPU durchgeführt, da diese für + // alle Threads gleich ist. Der Hash wird dann an die Threads + // übergeben + + // Erstelle expandierten Block W + uint32_t W[64]; + memcpy(W, &msgBlock[0], sizeof(uint32_t) * 16); + for(int j=16;j<64;j++) + W[j] = s1(W[j-2]) + W[j-7] + s0(W[j-15]) + W[j-16]; + + // Initialisiere die register a bis h mit der Hash-Tabelle + uint32_t regs[8]; + uint32_t hash[8]; + uint32_t sponge[4]; + + // pre + memset(sponge, 0, sizeof(uint32_t) * 4); + for (int k=0; k < 8; k++) + { + regs[k] = hefty_cpu_hashTable[k]; + hash[k] = regs[k]; + } + + // 1. Runde + for(int j=0;j<16;j++) + Absorb(sponge, W[j] ^ hefty_cpu_constantTable[j]); + + for(int j=0;j<16;j++) + { + Absorb(sponge, regs[3] ^ regs[7]); + hefty_cpu_round(regs, W[j], hefty_cpu_constantTable[j], sponge); + } + + for(int j=16;j<64;j++) + { + Absorb(sponge, regs[3] + regs[7]); + hefty_cpu_round(regs, W[j], hefty_cpu_constantTable[j], sponge); + } + + for(int k=0;k<8;k++) + hash[k] += regs[k]; + + // sponge speichern + + cudaMemcpyToSymbol( hefty_gpu_sponge, + sponge, + sizeof(uint32_t) * 4 ); + // hash speichern + cudaMemcpyToSymbol( hefty_gpu_register, + hash, + sizeof(uint32_t) * 8 ); + + // Blockheader setzen (korrekte Nonce fehlt da drin noch) + cudaMemcpyToSymbol( hefty_gpu_blockHeader, + &msgBlock[16], + 64); +} + +__host__ void hefty_cpu_hash(int thr_id, int threads, int startNounce) +{ + const int threadsperblock = 128; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) +#if USE_SHARED + size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte +#else + size_t shared_size = 0; +#endif + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + + hefty_gpu_hash<<>>(threads, startNounce, (void*)d_heftyHashes[thr_id]); +} diff --git a/cuda_hefty1.h b/cuda_hefty1.h new file mode 100644 index 0000000..08b1844 --- /dev/null +++ b/cuda_hefty1.h @@ -0,0 +1,8 @@ +#ifndef _CUDA_HEFTY1_H +#define _CUDA_HEFTY1_H + +void hefty_cpu_hash(int thr_id, int threads, int startNounce); +void hefty_cpu_setBlock(int thr_id, int threads, void *data); +void hefty_cpu_init(int thr_id, int threads); + +#endif \ No newline at end of file diff --git a/cuda_keccak512.cu b/cuda_keccak512.cu new file mode 100644 index 0000000..66dddaf --- /dev/null +++ b/cuda_keccak512.cu @@ -0,0 +1,274 @@ +/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +// Folgende Definitionen später durch header ersetzen +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +// globaler Speicher für alle HeftyHashes aller Threads +extern uint32_t *d_heftyHashes[8]; +extern uint32_t *d_nonceVector[8]; + +// globaler Speicher für unsere Ergebnisse +uint32_t *d_hash3output[8]; + +// der Keccak512 State nach der ersten Runde (72 Bytes) +__constant__ uint64_t c_State[25]; + +// die Message (72 Bytes) für die zweite Runde auf der GPU +__constant__ uint32_t c_PaddedMessage2[18]; // 44 bytes of remaining message (Nonce at offset 4) plus padding + +// ---------------------------- BEGIN CUDA keccak512 functions ------------------------------------ + +#define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b))) + +#define U32TO64_LE(p) \ + (((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32)) + +#define U64TO32_LE(p, v) \ + *p = (uint32_t)((v)); *(p+1) = (uint32_t)((v) >> 32); + +static __device__ void mycpy72(uint32_t *d, const uint32_t *s) { +#pragma unroll 18 + for (int k=0; k < 18; ++k) d[k] = s[k]; +} + +static __device__ void mycpy32(uint32_t *d, const uint32_t *s) { +#pragma unroll 8 + for (int k=0; k < 8; ++k) d[k] = s[k]; +} + +typedef struct keccak_hash_state_t { + uint64_t state[25]; // 25*2 + uint32_t buffer[72/4]; // 72 +} keccak_hash_state; + +__device__ void statecopy(uint64_t *d, uint64_t *s) +{ +#pragma unroll 25 + for (int i=0; i < 25; ++i) + d[i] = s[i]; +} + + +static const uint64_t host_keccak_round_constants[24] = { + 0x0000000000000001ull, 0x0000000000008082ull, + 0x800000000000808aull, 0x8000000080008000ull, + 0x000000000000808bull, 0x0000000080000001ull, + 0x8000000080008081ull, 0x8000000000008009ull, + 0x000000000000008aull, 0x0000000000000088ull, + 0x0000000080008009ull, 0x000000008000000aull, + 0x000000008000808bull, 0x800000000000008bull, + 0x8000000000008089ull, 0x8000000000008003ull, + 0x8000000000008002ull, 0x8000000000000080ull, + 0x000000000000800aull, 0x800000008000000aull, + 0x8000000080008081ull, 0x8000000000008080ull, + 0x0000000080000001ull, 0x8000000080008008ull +}; + +__constant__ uint64_t c_keccak_round_constants[24]; + +__host__ __device__ void +keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_constants) { + size_t i; + uint64_t t[5], u[5], v, w; + + /* absorb input */ +#pragma unroll 9 + for (i = 0; i < 72 / 8; i++, in += 2) + s[i] ^= U32TO64_LE(in); + + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROTL64(t[1], 1); + u[1] = t[0] ^ ROTL64(t[2], 1); + u[2] = t[1] ^ ROTL64(t[3], 1); + u[3] = t[2] ^ ROTL64(t[4], 1); + u[4] = t[3] ^ ROTL64(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[ 1]; + s[ 1] = ROTL64(s[ 6], 44); + s[ 6] = ROTL64(s[ 9], 20); + s[ 9] = ROTL64(s[22], 61); + s[22] = ROTL64(s[14], 39); + s[14] = ROTL64(s[20], 18); + s[20] = ROTL64(s[ 2], 62); + s[ 2] = ROTL64(s[12], 43); + s[12] = ROTL64(s[13], 25); + s[13] = ROTL64(s[19], 8); + s[19] = ROTL64(s[23], 56); + s[23] = ROTL64(s[15], 41); + s[15] = ROTL64(s[ 4], 27); + s[ 4] = ROTL64(s[24], 14); + s[24] = ROTL64(s[21], 2); + s[21] = ROTL64(s[ 8], 55); + s[ 8] = ROTL64(s[16], 45); + s[16] = ROTL64(s[ 5], 36); + s[ 5] = ROTL64(s[ 3], 28); + s[ 3] = ROTL64(s[18], 21); + s[18] = ROTL64(s[17], 15); + s[17] = ROTL64(s[11], 10); + s[11] = ROTL64(s[ 7], 6); + s[ 7] = ROTL64(s[10], 3); + s[10] = ROTL64( v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w; + v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= keccak_round_constants[i]; + } +} + +// Die Hash-Funktion +__global__ void keccak512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + // bestimme den aktuellen Zähler + //uint32_t nounce = startNounce + thread; + uint32_t nounce = nonceVector[thread]; + + // Index-Position des Hashes in den Hash Puffern bestimmen (Hefty1 und outputHash) + uint32_t hashPosition = nounce - startNounce; + + // erstmal den State der ersten Runde holen + uint64_t keccak_gpu_state[25]; +#pragma unroll 25 + for (int i=0; i < 25; ++i) + keccak_gpu_state[i] = c_State[i]; + + // Message2 in den Puffer holen + uint32_t msgBlock[18]; + mycpy72(msgBlock, c_PaddedMessage2); + + // die individuelle Nonce einsetzen + msgBlock[1] = nounce; + + // den individuellen Hefty1 Hash einsetzen + mycpy32(&msgBlock[3], &heftyHashes[8 * hashPosition]); + + // den Block einmal gut durchschütteln + keccak_block(keccak_gpu_state, msgBlock, c_keccak_round_constants); + + // das Hash erzeugen + uint32_t hash[16]; + +#pragma unroll 8 + for (size_t i = 0; i < 64; i += 8) { + U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]); + } + + + // und ins Global Memory rausschreiben +#pragma unroll 16 + for(int k=0;k<16;k++) + ((uint32_t*)outputHash)[16*hashPosition+k] = hash[k]; + } +} + +// ---------------------------- END CUDA keccak512 functions ------------------------------------ + +// Setup-Funktionen +__host__ void keccak512_cpu_init(int thr_id, int threads) +{ + // Kopiere die Hash-Tabellen in den GPU-Speicher + cudaMemcpyToSymbol( c_keccak_round_constants, + host_keccak_round_constants, + sizeof(host_keccak_round_constants), + 0, cudaMemcpyHostToDevice); + + // Speicher für alle Ergebnisse belegen + cudaMalloc(&d_hash3output[thr_id], 16 * sizeof(uint32_t) * threads); +} + +// ----------------BEGIN keccak512 CPU version from scrypt-jane code -------------------- + +#define SCRYPT_HASH_DIGEST_SIZE 64 +#define SCRYPT_KECCAK_F 1600 +#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 1024 */ +#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 576 */ +#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8) /* 72 */ + +// --------------- END keccak512 CPU version from scrypt-jane code -------------------- + +__host__ void keccak512_cpu_setBlock(void *data) + // data muss 84-Byte haben! + // heftyHash hat 32-Byte +{ + // CH + // state init + uint64_t keccak_cpu_state[25]; + memset(keccak_cpu_state, 0, 200); + + // keccak hat 72-Byte blöcke, d.h. in unserem Fall zwei Blöcke + // zu jeweils + uint32_t msgBlock[18]; + memset(msgBlock, 0, 18 * sizeof(uint32_t)); + + // kopiere die Daten rein (aber nur alles nach Bit 72) + memcpy(&msgBlock[0], &((uint8_t*)data)[72], 12); + + // Nachricht abschließen + msgBlock[11] = 0x01; + msgBlock[17] = 0x80000000; + + // erste Runde + keccak_block((uint64_t*)&keccak_cpu_state, (const uint32_t*)data, host_keccak_round_constants); + + // Message 2 ins Constant Memory kopieren (die variable Nonce und + // der Hefty1 Anteil muss aber auf der GPU erst noch ersetzt werden) + cudaMemcpyToSymbol( c_PaddedMessage2, msgBlock, 18*sizeof(uint32_t), 0, cudaMemcpyHostToDevice ); + + // state kopieren + cudaMemcpyToSymbol( c_State, keccak_cpu_state, 25*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); +} + +__host__ void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy) +{ + // Hefty1 Hashes kopieren + if (copy) cudaMemcpy( d_heftyHashes[thr_id], heftyHashes, 8 * sizeof(uint32_t) * threads, cudaMemcpyHostToDevice ); + //else cudaThreadSynchronize(); +} + +__host__ void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce) +{ + const int threadsperblock = 128; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) + size_t shared_size = 0; + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + + keccak512_gpu_hash<<>>(threads, startNounce, d_hash3output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]); +} diff --git a/cuda_keccak512.h b/cuda_keccak512.h new file mode 100644 index 0000000..abd4741 --- /dev/null +++ b/cuda_keccak512.h @@ -0,0 +1,9 @@ +#ifndef _CUDA_KECCAK512_H +#define _CUDA_KECCAK512_H + +void keccak512_cpu_init(int thr_id, int threads); +void keccak512_cpu_setBlock(void *data); +void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy); +void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce); + +#endif diff --git a/cuda_sha256.cu b/cuda_sha256.cu new file mode 100644 index 0000000..a68f849 --- /dev/null +++ b/cuda_sha256.cu @@ -0,0 +1,274 @@ +/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +#define W_ALIGNMENT 65 + +// Folgende Definitionen später durch header ersetzen +typedef unsigned int uint32_t; + +// globaler Speicher für alle HeftyHashes aller Threads +extern uint32_t *d_heftyHashes[8]; +extern uint32_t *d_nonceVector[8]; + +// globaler Speicher für unsere Ergebnisse +uint32_t *d_hash2output[8]; + + +/* Hash-Tabellen */ +__constant__ uint32_t sha256_gpu_constantTable[64]; + +// muss expandiert werden +__constant__ uint32_t sha256_gpu_blockHeader[16]; // 2x512 Bit Message +__constant__ uint32_t sha256_gpu_register[8]; + +uint32_t sha256_cpu_hashTable[] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; +uint32_t sha256_cpu_constantTable[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define R(x, n) ((x) >> (n)) +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define S0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) +#define S1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) +#define s0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) +#define s1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) + +#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) + +// Die Hash-Funktion +__global__ void sha256_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + // bestimme den aktuellen Zähler + uint32_t nounce = startNounce + thread; + nonceVector[thread] = nounce; + + // jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory + //extern __shared__ unsigned char s[]; + //uint32_t *W = (uint32_t *)(&s[W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x]); + uint32_t W1[16]; + uint32_t W2[16]; + + // Initialisiere die register a bis h mit der Hash-Tabelle + uint32_t regs[8]; + uint32_t hash[8]; + + // pre +#pragma unroll 8 + for (int k=0; k < 8; k++) + { + regs[k] = sha256_gpu_register[k]; + hash[k] = regs[k]; + } + + // 2. Runde + //memcpy(W, &sha256_gpu_blockHeader[0], sizeof(uint32_t) * 16); // TODO: aufsplitten in zwei Teilblöcke + //memcpy(&W[5], &heftyHashes[8 * (blockDim.x * blockIdx.x + threadIdx.x)], sizeof(uint32_t) * 8); // den richtigen Hefty1 Hash holen +#pragma unroll 16 + for(int k=0;k<16;k++) + W1[k] = sha256_gpu_blockHeader[k]; + + uint32_t offset = 8 * (blockDim.x * blockIdx.x + threadIdx.x); +#pragma unroll 8 + for(int k=0;k<8;k++) + W1[5+k] = heftyHashes[offset + k]; + + +#pragma unroll 8 + for (int i=5; i <5+8; ++i) W1[i] = SWAB32(W1[i]); // die Hefty1 Hashes brauchen eine Drehung ;) + W1[3] = SWAB32(nounce); + +// Progress W1 +#pragma unroll 16 + for(int j=0;j<16;j++) + { + uint32_t T1, T2; + T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + sha256_gpu_constantTable[j] + W1[j]; + T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); + + #pragma unroll 7 + for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; + regs[0] = T1 + T2; + regs[4] += T1; + } + +// Progress W2...W3 +#pragma unroll 3 + for(int k=0;k<3;k++) + { + #pragma unroll 2 + for(int j=0;j<2;j++) + W2[j] = s1(W1[14+j]) + W1[9+j] + s0(W1[1+j]) + W1[j]; + #pragma unroll 5 + for(int j=2;j<7;j++) + W2[j] = s1(W2[j-2]) + W1[9+j] + s0(W1[1+j]) + W1[j]; + + #pragma unroll 8 + for(int j=7;j<15;j++) + W2[j] = s1(W2[j-2]) + W2[j-7] + s0(W1[1+j]) + W1[j]; + + W2[15] = s1(W2[13]) + W2[8] + s0(W2[0]) + W1[15]; + + // Rundenfunktion + #pragma unroll 16 + for(int j=0;j<16;j++) + { + uint32_t T1, T2; + T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + sha256_gpu_constantTable[j + 16 * (k+1)] + W2[j]; + T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); + + #pragma unroll 7 + for (int l=6; l >= 0; l--) regs[l+1] = regs[l]; + regs[0] = T1 + T2; + regs[4] += T1; + } + + #pragma unroll 16 + for(int j=0;j<16;j++) + W1[j] = W2[j]; + } + +/* + for(int j=16;j<64;j++) + W[j] = s1(W[j-2]) + W[j-7] + s0(W[j-15]) + W[j-16]; + +#pragma unroll 64 + for(int j=0;j<64;j++) + { + uint32_t T1, T2; + T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + sha256_gpu_constantTable[j] + W[j]; + T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); + + #pragma unroll 7 + for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; + regs[0] = T1 + T2; + regs[4] += T1; + } +*/ +#pragma unroll 8 + for(int k=0;k<8;k++) + hash[k] += regs[k]; + +#pragma unroll 8 + for(int k=0;k<8;k++) + ((uint32_t*)outputHash)[8*thread+k] = SWAB32(hash[k]); + } +} + +// Setup-Funktionen +__host__ void sha256_cpu_init(int thr_id, int threads) +{ + // Kopiere die Hash-Tabellen in den GPU-Speicher + cudaMemcpyToSymbol( sha256_gpu_constantTable, + sha256_cpu_constantTable, + sizeof(uint32_t) * 64 ); + + // Speicher für alle Ergebnisse belegen + cudaMalloc(&d_hash2output[thr_id], 8 * sizeof(uint32_t) * threads); +} + +__host__ void sha256_cpu_setBlock(void *data) + // data muss 84-Byte haben! + // heftyHash hat 32-Byte +{ + // Nachricht expandieren und setzen + uint32_t msgBlock[32]; + + memset(msgBlock, 0, sizeof(uint32_t) * 32); + memcpy(&msgBlock[0], data, 84); + memset(&msgBlock[21], 0, 32); // vorläufig Nullen anstatt der Hefty1 Hashes einfüllen + msgBlock[29] |= 0x80; + msgBlock[31] = 928; // bitlen + + for(int i=0;i<31;i++) // Byteorder drehen + msgBlock[i] = SWAB32(msgBlock[i]); + + // die erste Runde wird auf der CPU durchgeführt, da diese für + // alle Threads gleich ist. Der Hash wird dann an die Threads + // übergeben + uint32_t W[64]; + + // Erstelle expandierten Block W + memcpy(W, &msgBlock[0], sizeof(uint32_t) * 16); + for(int j=16;j<64;j++) + W[j] = s1(W[j-2]) + W[j-7] + s0(W[j-15]) + W[j-16]; + + // Initialisiere die register a bis h mit der Hash-Tabelle + uint32_t regs[8]; + uint32_t hash[8]; + + // pre + for (int k=0; k < 8; k++) + { + regs[k] = sha256_cpu_hashTable[k]; + hash[k] = regs[k]; + } + + // 1. Runde + for(int j=0;j<64;j++) + { + uint32_t T1, T2; + T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + sha256_cpu_constantTable[j] + W[j]; + T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); + + //#pragma unroll 7 + for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; + // sollte mal noch durch memmov ersetzt werden! +// memcpy(®s[1], ®s[0], sizeof(uint32_t) * 7); + regs[0] = T1 + T2; + regs[4] += T1; + } + + for(int k=0;k<8;k++) + hash[k] += regs[k]; + + // hash speichern + cudaMemcpyToSymbol( sha256_gpu_register, + hash, + sizeof(uint32_t) * 8 ); + + // Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch) + cudaMemcpyToSymbol( sha256_gpu_blockHeader, + &msgBlock[16], + 64); +} + +__host__ void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy) +{ + // Hefty1 Hashes kopieren + if (copy) cudaMemcpy( d_heftyHashes[thr_id], heftyHashes, 8 * sizeof(uint32_t) * threads, cudaMemcpyHostToDevice ); + //else cudaThreadSynchronize(); +} + +__host__ void sha256_cpu_hash(int thr_id, int threads, int startNounce) +{ + const int threadsperblock = 128; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) + //size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte + size_t shared_size = 0; + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + + sha256_gpu_hash<<>>(threads, startNounce, d_hash2output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]); +} diff --git a/cuda_sha256.h b/cuda_sha256.h new file mode 100644 index 0000000..ff03bf5 --- /dev/null +++ b/cuda_sha256.h @@ -0,0 +1,8 @@ +#ifndef _CUDA_SHA256_H +#define _CUDA_SHA256_H + +void sha256_cpu_init(int thr_id, int threads); +void sha256_cpu_setBlock(void *data); +void sha256_cpu_hash(int thr_id, int threads, int startNounce); +void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy); +#endif diff --git a/depcomp b/depcomp new file mode 100644 index 0000000..bd0ac08 --- /dev/null +++ b/depcomp @@ -0,0 +1,688 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2011-12-04.11; # UTC + +# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010, +# 2011 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try \`$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by `PROGRAMS ARGS'. + object Object file output by `PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +## The second -e expression handles DOS-style file names with drive letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the `deleted header file' problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. + tr ' ' ' +' < "$tmpdepfile" | +## Some versions of gcc put a space before the `:'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like `#:fec' to the end of the + # dependency line. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ + tr ' +' ' ' >> "$depfile" + echo >> "$depfile" + + # The second pass generates a dummy entry for each header file. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts `$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + # Each line is of the form `foo.o: dependent.h'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" + # That's a tab and a space in the []. + sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +icc) + # Intel's C compiler understands `-MD -MF file'. However on + # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c + # ICC 7.0 will fill foo.d with something like + # foo.o: sub/foo.c + # foo.o: sub/foo.h + # which is wrong. We want: + # sub/foo.o: sub/foo.c + # sub/foo.o: sub/foo.h + # sub/foo.c: + # sub/foo.h: + # ICC 7.1 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using \ : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | + sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" + # Add `dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in `foo.d' instead, so we check for that too. + # Subdirectories are respected. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + + if test "$libtool" = yes; then + # With Tru64 cc, shared objects can also be used to make a + # static library. This mechanism is used in libtool 1.4 series to + # handle both shared and static libraries in a single compilation. + # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. + # + # With libtool 1.5 this exception was removed, and libtool now + # generates 2 separate objects for the 2 libraries. These two + # compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 + tmpdepfile2=$dir$base.o.d # libtool 1.5 + tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 + tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.o.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + tmpdepfile4=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" + # That's a tab and a space in the []. + sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test "$stat" = 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/ \1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/ / + G + p +}' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for `:' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. + "$@" $dashmflag | + sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + sed '1,2d' "$tmpdepfile" | tr ' ' ' +' | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E | + sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | + sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" + echo " " >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/elist.h b/elist.h new file mode 100644 index 0000000..431472f --- /dev/null +++ b/elist.h @@ -0,0 +1,251 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline void __list_add(struct list_head *lnew, + struct list_head *prev, + struct list_head *next) +{ + next->prev = lnew; + lnew->next = next; + lnew->prev = prev; + prev->next = lnew; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static __inline void list_add(struct list_head *lnew, struct list_head *head) +{ + __list_add(lnew, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static __inline void list_add_tail(struct list_head *lnew, struct list_head *head) +{ + __list_add(lnew, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static __inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = (struct list_head *) 0; + entry->prev = (struct list_head *) 0; +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static __inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static __inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static __inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static __inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static __inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static __inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static __inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); \ + pos = pos->next) +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); \ + pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member, tpos) \ + for (pos = list_entry((head)->next, tpos, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, tpos, member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member, tpos, tn) \ + for (pos = list_entry((head)->next, tpos, member), \ + n = list_entry(pos->member.next, tpos, member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, tn, member)) + +/** + * list_for_each_entry_continue - iterate over list of given type + * continuing after existing point + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_continue(pos, head, member, tpos) \ + for (pos = list_entry(pos->member.next, tpos, member), \ + prefetch(pos->member.next); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, tpos, member), \ + prefetch(pos->member.next)) + +#endif diff --git a/files.txt b/files.txt new file mode 100644 index 0000000..2b7db9d --- /dev/null +++ b/files.txt @@ -0,0 +1,30 @@ +blake512.cu +blake.c +combine.cu +compat.h +cpu-miner.c +cpuminer-config.h +cuda_blake512.h +cuda_combine.h +cuda_groestl512.h +cuda_hefty1.h +cuda_keccak512.h +cuda_sha256.h +elist.h +groestl512.cu +groestl.c +heavy.c +hefty1.c +hefty1.cu +hefty1.h +keccak512.cu +keccak.c +miner.h +scrypt.c +sha256.cu +sha2.c +sph_blake.h +sph_groestl.h +sph_keccak.h +sph_types.h +util.c diff --git a/fugue.c b/fugue.c new file mode 100644 index 0000000..85767c9 --- /dev/null +++ b/fugue.c @@ -0,0 +1,1208 @@ +#include +#include + +#include "sph_fugue.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +static const sph_u32 IV224[] = { + SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c), + SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215), + SPH_C32(0xbd8d679a) +}; + +static const sph_u32 IV256[] = { + SPH_C32(0xe952bdde), SPH_C32(0x6671135f), SPH_C32(0xe0d4f668), + SPH_C32(0xd2b0b594), SPH_C32(0xf96c621d), SPH_C32(0xfbf929de), + SPH_C32(0x9149e899), SPH_C32(0x34f8c248) +}; + +static const sph_u32 IV384[] = { + SPH_C32(0xaa61ec0d), SPH_C32(0x31252e1f), SPH_C32(0xa01db4c7), + SPH_C32(0x00600985), SPH_C32(0x215ef44a), SPH_C32(0x741b5e9c), + SPH_C32(0xfa693e9a), SPH_C32(0x473eb040), SPH_C32(0xe502ae8a), + SPH_C32(0xa99c25e0), SPH_C32(0xbc95517c), SPH_C32(0x5c1095a1) +}; + +static const sph_u32 IV512[] = { + SPH_C32(0x8807a57e), SPH_C32(0xe616af75), SPH_C32(0xc5d3e4db), + SPH_C32(0xac9ab027), SPH_C32(0xd915f117), SPH_C32(0xb6eecc54), + SPH_C32(0x06e8020b), SPH_C32(0x4a92efd1), SPH_C32(0xaac6e2c9), + SPH_C32(0xddb21398), SPH_C32(0xcae65838), SPH_C32(0x437f203f), + SPH_C32(0x25ea78e7), SPH_C32(0x951fddd6), SPH_C32(0xda6ed11d), + SPH_C32(0xe13e3567) +}; + +static const sph_u32 mixtab0[] = { + SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7), + SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7), + SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0), + SPH_C32(0x01010704), SPH_C32(0x67672e87), SPH_C32(0x2b2bd1ac), + SPH_C32(0xfefeccd5), SPH_C32(0xd7d71371), SPH_C32(0xabab7c9a), + SPH_C32(0x767659c3), SPH_C32(0xcaca4005), SPH_C32(0x8282a33e), + SPH_C32(0xc9c94909), SPH_C32(0x7d7d68ef), SPH_C32(0xfafad0c5), + SPH_C32(0x5959947f), SPH_C32(0x4747ce07), SPH_C32(0xf0f0e6ed), + SPH_C32(0xadad6e82), SPH_C32(0xd4d41a7d), SPH_C32(0xa2a243be), + SPH_C32(0xafaf608a), SPH_C32(0x9c9cf946), SPH_C32(0xa4a451a6), + SPH_C32(0x727245d3), SPH_C32(0xc0c0762d), SPH_C32(0xb7b728ea), + SPH_C32(0xfdfdc5d9), SPH_C32(0x9393d47a), SPH_C32(0x2626f298), + SPH_C32(0x363682d8), SPH_C32(0x3f3fbdfc), SPH_C32(0xf7f7f3f1), + SPH_C32(0xcccc521d), SPH_C32(0x34348cd0), SPH_C32(0xa5a556a2), + SPH_C32(0xe5e58db9), SPH_C32(0xf1f1e1e9), SPH_C32(0x71714cdf), + SPH_C32(0xd8d83e4d), SPH_C32(0x313197c4), SPH_C32(0x15156b54), + SPH_C32(0x04041c10), SPH_C32(0xc7c76331), SPH_C32(0x2323e98c), + SPH_C32(0xc3c37f21), SPH_C32(0x18184860), SPH_C32(0x9696cf6e), + SPH_C32(0x05051b14), SPH_C32(0x9a9aeb5e), SPH_C32(0x0707151c), + SPH_C32(0x12127e48), SPH_C32(0x8080ad36), SPH_C32(0xe2e298a5), + SPH_C32(0xebeba781), SPH_C32(0x2727f59c), SPH_C32(0xb2b233fe), + SPH_C32(0x757550cf), SPH_C32(0x09093f24), SPH_C32(0x8383a43a), + SPH_C32(0x2c2cc4b0), SPH_C32(0x1a1a4668), SPH_C32(0x1b1b416c), + SPH_C32(0x6e6e11a3), SPH_C32(0x5a5a9d73), SPH_C32(0xa0a04db6), + SPH_C32(0x5252a553), SPH_C32(0x3b3ba1ec), SPH_C32(0xd6d61475), + SPH_C32(0xb3b334fa), SPH_C32(0x2929dfa4), SPH_C32(0xe3e39fa1), + SPH_C32(0x2f2fcdbc), SPH_C32(0x8484b126), SPH_C32(0x5353a257), + SPH_C32(0xd1d10169), SPH_C32(0x00000000), SPH_C32(0xededb599), + SPH_C32(0x2020e080), SPH_C32(0xfcfcc2dd), SPH_C32(0xb1b13af2), + SPH_C32(0x5b5b9a77), SPH_C32(0x6a6a0db3), SPH_C32(0xcbcb4701), + SPH_C32(0xbebe17ce), SPH_C32(0x3939afe4), SPH_C32(0x4a4aed33), + SPH_C32(0x4c4cff2b), SPH_C32(0x5858937b), SPH_C32(0xcfcf5b11), + SPH_C32(0xd0d0066d), SPH_C32(0xefefbb91), SPH_C32(0xaaaa7b9e), + SPH_C32(0xfbfbd7c1), SPH_C32(0x4343d217), SPH_C32(0x4d4df82f), + SPH_C32(0x333399cc), SPH_C32(0x8585b622), SPH_C32(0x4545c00f), + SPH_C32(0xf9f9d9c9), SPH_C32(0x02020e08), SPH_C32(0x7f7f66e7), + SPH_C32(0x5050ab5b), SPH_C32(0x3c3cb4f0), SPH_C32(0x9f9ff04a), + SPH_C32(0xa8a87596), SPH_C32(0x5151ac5f), SPH_C32(0xa3a344ba), + SPH_C32(0x4040db1b), SPH_C32(0x8f8f800a), SPH_C32(0x9292d37e), + SPH_C32(0x9d9dfe42), SPH_C32(0x3838a8e0), SPH_C32(0xf5f5fdf9), + SPH_C32(0xbcbc19c6), SPH_C32(0xb6b62fee), SPH_C32(0xdada3045), + SPH_C32(0x2121e784), SPH_C32(0x10107040), SPH_C32(0xffffcbd1), + SPH_C32(0xf3f3efe1), SPH_C32(0xd2d20865), SPH_C32(0xcdcd5519), + SPH_C32(0x0c0c2430), SPH_C32(0x1313794c), SPH_C32(0xececb29d), + SPH_C32(0x5f5f8667), SPH_C32(0x9797c86a), SPH_C32(0x4444c70b), + SPH_C32(0x1717655c), SPH_C32(0xc4c46a3d), SPH_C32(0xa7a758aa), + SPH_C32(0x7e7e61e3), SPH_C32(0x3d3db3f4), SPH_C32(0x6464278b), + SPH_C32(0x5d5d886f), SPH_C32(0x19194f64), SPH_C32(0x737342d7), + SPH_C32(0x60603b9b), SPH_C32(0x8181aa32), SPH_C32(0x4f4ff627), + SPH_C32(0xdcdc225d), SPH_C32(0x2222ee88), SPH_C32(0x2a2ad6a8), + SPH_C32(0x9090dd76), SPH_C32(0x88889516), SPH_C32(0x4646c903), + SPH_C32(0xeeeebc95), SPH_C32(0xb8b805d6), SPH_C32(0x14146c50), + SPH_C32(0xdede2c55), SPH_C32(0x5e5e8163), SPH_C32(0x0b0b312c), + SPH_C32(0xdbdb3741), SPH_C32(0xe0e096ad), SPH_C32(0x32329ec8), + SPH_C32(0x3a3aa6e8), SPH_C32(0x0a0a3628), SPH_C32(0x4949e43f), + SPH_C32(0x06061218), SPH_C32(0x2424fc90), SPH_C32(0x5c5c8f6b), + SPH_C32(0xc2c27825), SPH_C32(0xd3d30f61), SPH_C32(0xacac6986), + SPH_C32(0x62623593), SPH_C32(0x9191da72), SPH_C32(0x9595c662), + SPH_C32(0xe4e48abd), SPH_C32(0x797974ff), SPH_C32(0xe7e783b1), + SPH_C32(0xc8c84e0d), SPH_C32(0x373785dc), SPH_C32(0x6d6d18af), + SPH_C32(0x8d8d8e02), SPH_C32(0xd5d51d79), SPH_C32(0x4e4ef123), + SPH_C32(0xa9a97292), SPH_C32(0x6c6c1fab), SPH_C32(0x5656b943), + SPH_C32(0xf4f4fafd), SPH_C32(0xeaeaa085), SPH_C32(0x6565208f), + SPH_C32(0x7a7a7df3), SPH_C32(0xaeae678e), SPH_C32(0x08083820), + SPH_C32(0xbaba0bde), SPH_C32(0x787873fb), SPH_C32(0x2525fb94), + SPH_C32(0x2e2ecab8), SPH_C32(0x1c1c5470), SPH_C32(0xa6a65fae), + SPH_C32(0xb4b421e6), SPH_C32(0xc6c66435), SPH_C32(0xe8e8ae8d), + SPH_C32(0xdddd2559), SPH_C32(0x747457cb), SPH_C32(0x1f1f5d7c), + SPH_C32(0x4b4bea37), SPH_C32(0xbdbd1ec2), SPH_C32(0x8b8b9c1a), + SPH_C32(0x8a8a9b1e), SPH_C32(0x70704bdb), SPH_C32(0x3e3ebaf8), + SPH_C32(0xb5b526e2), SPH_C32(0x66662983), SPH_C32(0x4848e33b), + SPH_C32(0x0303090c), SPH_C32(0xf6f6f4f5), SPH_C32(0x0e0e2a38), + SPH_C32(0x61613c9f), SPH_C32(0x35358bd4), SPH_C32(0x5757be47), + SPH_C32(0xb9b902d2), SPH_C32(0x8686bf2e), SPH_C32(0xc1c17129), + SPH_C32(0x1d1d5374), SPH_C32(0x9e9ef74e), SPH_C32(0xe1e191a9), + SPH_C32(0xf8f8decd), SPH_C32(0x9898e556), SPH_C32(0x11117744), + SPH_C32(0x696904bf), SPH_C32(0xd9d93949), SPH_C32(0x8e8e870e), + SPH_C32(0x9494c166), SPH_C32(0x9b9bec5a), SPH_C32(0x1e1e5a78), + SPH_C32(0x8787b82a), SPH_C32(0xe9e9a989), SPH_C32(0xcece5c15), + SPH_C32(0x5555b04f), SPH_C32(0x2828d8a0), SPH_C32(0xdfdf2b51), + SPH_C32(0x8c8c8906), SPH_C32(0xa1a14ab2), SPH_C32(0x89899212), + SPH_C32(0x0d0d2334), SPH_C32(0xbfbf10ca), SPH_C32(0xe6e684b5), + SPH_C32(0x4242d513), SPH_C32(0x686803bb), SPH_C32(0x4141dc1f), + SPH_C32(0x9999e252), SPH_C32(0x2d2dc3b4), SPH_C32(0x0f0f2d3c), + SPH_C32(0xb0b03df6), SPH_C32(0x5454b74b), SPH_C32(0xbbbb0cda), + SPH_C32(0x16166258) +}; + +static const sph_u32 mixtab1[] = { + SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e), + SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a), + SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090), + SPH_C32(0x04010107), SPH_C32(0x8767672e), SPH_C32(0xac2b2bd1), + SPH_C32(0xd5fefecc), SPH_C32(0x71d7d713), SPH_C32(0x9aabab7c), + SPH_C32(0xc3767659), SPH_C32(0x05caca40), SPH_C32(0x3e8282a3), + SPH_C32(0x09c9c949), SPH_C32(0xef7d7d68), SPH_C32(0xc5fafad0), + SPH_C32(0x7f595994), SPH_C32(0x074747ce), SPH_C32(0xedf0f0e6), + SPH_C32(0x82adad6e), SPH_C32(0x7dd4d41a), SPH_C32(0xbea2a243), + SPH_C32(0x8aafaf60), SPH_C32(0x469c9cf9), SPH_C32(0xa6a4a451), + SPH_C32(0xd3727245), SPH_C32(0x2dc0c076), SPH_C32(0xeab7b728), + SPH_C32(0xd9fdfdc5), SPH_C32(0x7a9393d4), SPH_C32(0x982626f2), + SPH_C32(0xd8363682), SPH_C32(0xfc3f3fbd), SPH_C32(0xf1f7f7f3), + SPH_C32(0x1dcccc52), SPH_C32(0xd034348c), SPH_C32(0xa2a5a556), + SPH_C32(0xb9e5e58d), SPH_C32(0xe9f1f1e1), SPH_C32(0xdf71714c), + SPH_C32(0x4dd8d83e), SPH_C32(0xc4313197), SPH_C32(0x5415156b), + SPH_C32(0x1004041c), SPH_C32(0x31c7c763), SPH_C32(0x8c2323e9), + SPH_C32(0x21c3c37f), SPH_C32(0x60181848), SPH_C32(0x6e9696cf), + SPH_C32(0x1405051b), SPH_C32(0x5e9a9aeb), SPH_C32(0x1c070715), + SPH_C32(0x4812127e), SPH_C32(0x368080ad), SPH_C32(0xa5e2e298), + SPH_C32(0x81ebeba7), SPH_C32(0x9c2727f5), SPH_C32(0xfeb2b233), + SPH_C32(0xcf757550), SPH_C32(0x2409093f), SPH_C32(0x3a8383a4), + SPH_C32(0xb02c2cc4), SPH_C32(0x681a1a46), SPH_C32(0x6c1b1b41), + SPH_C32(0xa36e6e11), SPH_C32(0x735a5a9d), SPH_C32(0xb6a0a04d), + SPH_C32(0x535252a5), SPH_C32(0xec3b3ba1), SPH_C32(0x75d6d614), + SPH_C32(0xfab3b334), SPH_C32(0xa42929df), SPH_C32(0xa1e3e39f), + SPH_C32(0xbc2f2fcd), SPH_C32(0x268484b1), SPH_C32(0x575353a2), + SPH_C32(0x69d1d101), SPH_C32(0x00000000), SPH_C32(0x99ededb5), + SPH_C32(0x802020e0), SPH_C32(0xddfcfcc2), SPH_C32(0xf2b1b13a), + SPH_C32(0x775b5b9a), SPH_C32(0xb36a6a0d), SPH_C32(0x01cbcb47), + SPH_C32(0xcebebe17), SPH_C32(0xe43939af), SPH_C32(0x334a4aed), + SPH_C32(0x2b4c4cff), SPH_C32(0x7b585893), SPH_C32(0x11cfcf5b), + SPH_C32(0x6dd0d006), SPH_C32(0x91efefbb), SPH_C32(0x9eaaaa7b), + SPH_C32(0xc1fbfbd7), SPH_C32(0x174343d2), SPH_C32(0x2f4d4df8), + SPH_C32(0xcc333399), SPH_C32(0x228585b6), SPH_C32(0x0f4545c0), + SPH_C32(0xc9f9f9d9), SPH_C32(0x0802020e), SPH_C32(0xe77f7f66), + SPH_C32(0x5b5050ab), SPH_C32(0xf03c3cb4), SPH_C32(0x4a9f9ff0), + SPH_C32(0x96a8a875), SPH_C32(0x5f5151ac), SPH_C32(0xbaa3a344), + SPH_C32(0x1b4040db), SPH_C32(0x0a8f8f80), SPH_C32(0x7e9292d3), + SPH_C32(0x429d9dfe), SPH_C32(0xe03838a8), SPH_C32(0xf9f5f5fd), + SPH_C32(0xc6bcbc19), SPH_C32(0xeeb6b62f), SPH_C32(0x45dada30), + SPH_C32(0x842121e7), SPH_C32(0x40101070), SPH_C32(0xd1ffffcb), + SPH_C32(0xe1f3f3ef), SPH_C32(0x65d2d208), SPH_C32(0x19cdcd55), + SPH_C32(0x300c0c24), SPH_C32(0x4c131379), SPH_C32(0x9dececb2), + SPH_C32(0x675f5f86), SPH_C32(0x6a9797c8), SPH_C32(0x0b4444c7), + SPH_C32(0x5c171765), SPH_C32(0x3dc4c46a), SPH_C32(0xaaa7a758), + SPH_C32(0xe37e7e61), SPH_C32(0xf43d3db3), SPH_C32(0x8b646427), + SPH_C32(0x6f5d5d88), SPH_C32(0x6419194f), SPH_C32(0xd7737342), + SPH_C32(0x9b60603b), SPH_C32(0x328181aa), SPH_C32(0x274f4ff6), + SPH_C32(0x5ddcdc22), SPH_C32(0x882222ee), SPH_C32(0xa82a2ad6), + SPH_C32(0x769090dd), SPH_C32(0x16888895), SPH_C32(0x034646c9), + SPH_C32(0x95eeeebc), SPH_C32(0xd6b8b805), SPH_C32(0x5014146c), + SPH_C32(0x55dede2c), SPH_C32(0x635e5e81), SPH_C32(0x2c0b0b31), + SPH_C32(0x41dbdb37), SPH_C32(0xade0e096), SPH_C32(0xc832329e), + SPH_C32(0xe83a3aa6), SPH_C32(0x280a0a36), SPH_C32(0x3f4949e4), + SPH_C32(0x18060612), SPH_C32(0x902424fc), SPH_C32(0x6b5c5c8f), + SPH_C32(0x25c2c278), SPH_C32(0x61d3d30f), SPH_C32(0x86acac69), + SPH_C32(0x93626235), SPH_C32(0x729191da), SPH_C32(0x629595c6), + SPH_C32(0xbde4e48a), SPH_C32(0xff797974), SPH_C32(0xb1e7e783), + SPH_C32(0x0dc8c84e), SPH_C32(0xdc373785), SPH_C32(0xaf6d6d18), + SPH_C32(0x028d8d8e), SPH_C32(0x79d5d51d), SPH_C32(0x234e4ef1), + SPH_C32(0x92a9a972), SPH_C32(0xab6c6c1f), SPH_C32(0x435656b9), + SPH_C32(0xfdf4f4fa), SPH_C32(0x85eaeaa0), SPH_C32(0x8f656520), + SPH_C32(0xf37a7a7d), SPH_C32(0x8eaeae67), SPH_C32(0x20080838), + SPH_C32(0xdebaba0b), SPH_C32(0xfb787873), SPH_C32(0x942525fb), + SPH_C32(0xb82e2eca), SPH_C32(0x701c1c54), SPH_C32(0xaea6a65f), + SPH_C32(0xe6b4b421), SPH_C32(0x35c6c664), SPH_C32(0x8de8e8ae), + SPH_C32(0x59dddd25), SPH_C32(0xcb747457), SPH_C32(0x7c1f1f5d), + SPH_C32(0x374b4bea), SPH_C32(0xc2bdbd1e), SPH_C32(0x1a8b8b9c), + SPH_C32(0x1e8a8a9b), SPH_C32(0xdb70704b), SPH_C32(0xf83e3eba), + SPH_C32(0xe2b5b526), SPH_C32(0x83666629), SPH_C32(0x3b4848e3), + SPH_C32(0x0c030309), SPH_C32(0xf5f6f6f4), SPH_C32(0x380e0e2a), + SPH_C32(0x9f61613c), SPH_C32(0xd435358b), SPH_C32(0x475757be), + SPH_C32(0xd2b9b902), SPH_C32(0x2e8686bf), SPH_C32(0x29c1c171), + SPH_C32(0x741d1d53), SPH_C32(0x4e9e9ef7), SPH_C32(0xa9e1e191), + SPH_C32(0xcdf8f8de), SPH_C32(0x569898e5), SPH_C32(0x44111177), + SPH_C32(0xbf696904), SPH_C32(0x49d9d939), SPH_C32(0x0e8e8e87), + SPH_C32(0x669494c1), SPH_C32(0x5a9b9bec), SPH_C32(0x781e1e5a), + SPH_C32(0x2a8787b8), SPH_C32(0x89e9e9a9), SPH_C32(0x15cece5c), + SPH_C32(0x4f5555b0), SPH_C32(0xa02828d8), SPH_C32(0x51dfdf2b), + SPH_C32(0x068c8c89), SPH_C32(0xb2a1a14a), SPH_C32(0x12898992), + SPH_C32(0x340d0d23), SPH_C32(0xcabfbf10), SPH_C32(0xb5e6e684), + SPH_C32(0x134242d5), SPH_C32(0xbb686803), SPH_C32(0x1f4141dc), + SPH_C32(0x529999e2), SPH_C32(0xb42d2dc3), SPH_C32(0x3c0f0f2d), + SPH_C32(0xf6b0b03d), SPH_C32(0x4b5454b7), SPH_C32(0xdabbbb0c), + SPH_C32(0x58161662) +}; + +static const sph_u32 mixtab2[] = { + SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777), + SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b), + SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030), + SPH_C32(0x07040101), SPH_C32(0x2e876767), SPH_C32(0xd1ac2b2b), + SPH_C32(0xccd5fefe), SPH_C32(0x1371d7d7), SPH_C32(0x7c9aabab), + SPH_C32(0x59c37676), SPH_C32(0x4005caca), SPH_C32(0xa33e8282), + SPH_C32(0x4909c9c9), SPH_C32(0x68ef7d7d), SPH_C32(0xd0c5fafa), + SPH_C32(0x947f5959), SPH_C32(0xce074747), SPH_C32(0xe6edf0f0), + SPH_C32(0x6e82adad), SPH_C32(0x1a7dd4d4), SPH_C32(0x43bea2a2), + SPH_C32(0x608aafaf), SPH_C32(0xf9469c9c), SPH_C32(0x51a6a4a4), + SPH_C32(0x45d37272), SPH_C32(0x762dc0c0), SPH_C32(0x28eab7b7), + SPH_C32(0xc5d9fdfd), SPH_C32(0xd47a9393), SPH_C32(0xf2982626), + SPH_C32(0x82d83636), SPH_C32(0xbdfc3f3f), SPH_C32(0xf3f1f7f7), + SPH_C32(0x521dcccc), SPH_C32(0x8cd03434), SPH_C32(0x56a2a5a5), + SPH_C32(0x8db9e5e5), SPH_C32(0xe1e9f1f1), SPH_C32(0x4cdf7171), + SPH_C32(0x3e4dd8d8), SPH_C32(0x97c43131), SPH_C32(0x6b541515), + SPH_C32(0x1c100404), SPH_C32(0x6331c7c7), SPH_C32(0xe98c2323), + SPH_C32(0x7f21c3c3), SPH_C32(0x48601818), SPH_C32(0xcf6e9696), + SPH_C32(0x1b140505), SPH_C32(0xeb5e9a9a), SPH_C32(0x151c0707), + SPH_C32(0x7e481212), SPH_C32(0xad368080), SPH_C32(0x98a5e2e2), + SPH_C32(0xa781ebeb), SPH_C32(0xf59c2727), SPH_C32(0x33feb2b2), + SPH_C32(0x50cf7575), SPH_C32(0x3f240909), SPH_C32(0xa43a8383), + SPH_C32(0xc4b02c2c), SPH_C32(0x46681a1a), SPH_C32(0x416c1b1b), + SPH_C32(0x11a36e6e), SPH_C32(0x9d735a5a), SPH_C32(0x4db6a0a0), + SPH_C32(0xa5535252), SPH_C32(0xa1ec3b3b), SPH_C32(0x1475d6d6), + SPH_C32(0x34fab3b3), SPH_C32(0xdfa42929), SPH_C32(0x9fa1e3e3), + SPH_C32(0xcdbc2f2f), SPH_C32(0xb1268484), SPH_C32(0xa2575353), + SPH_C32(0x0169d1d1), SPH_C32(0x00000000), SPH_C32(0xb599eded), + SPH_C32(0xe0802020), SPH_C32(0xc2ddfcfc), SPH_C32(0x3af2b1b1), + SPH_C32(0x9a775b5b), SPH_C32(0x0db36a6a), SPH_C32(0x4701cbcb), + SPH_C32(0x17cebebe), SPH_C32(0xafe43939), SPH_C32(0xed334a4a), + SPH_C32(0xff2b4c4c), SPH_C32(0x937b5858), SPH_C32(0x5b11cfcf), + SPH_C32(0x066dd0d0), SPH_C32(0xbb91efef), SPH_C32(0x7b9eaaaa), + SPH_C32(0xd7c1fbfb), SPH_C32(0xd2174343), SPH_C32(0xf82f4d4d), + SPH_C32(0x99cc3333), SPH_C32(0xb6228585), SPH_C32(0xc00f4545), + SPH_C32(0xd9c9f9f9), SPH_C32(0x0e080202), SPH_C32(0x66e77f7f), + SPH_C32(0xab5b5050), SPH_C32(0xb4f03c3c), SPH_C32(0xf04a9f9f), + SPH_C32(0x7596a8a8), SPH_C32(0xac5f5151), SPH_C32(0x44baa3a3), + SPH_C32(0xdb1b4040), SPH_C32(0x800a8f8f), SPH_C32(0xd37e9292), + SPH_C32(0xfe429d9d), SPH_C32(0xa8e03838), SPH_C32(0xfdf9f5f5), + SPH_C32(0x19c6bcbc), SPH_C32(0x2feeb6b6), SPH_C32(0x3045dada), + SPH_C32(0xe7842121), SPH_C32(0x70401010), SPH_C32(0xcbd1ffff), + SPH_C32(0xefe1f3f3), SPH_C32(0x0865d2d2), SPH_C32(0x5519cdcd), + SPH_C32(0x24300c0c), SPH_C32(0x794c1313), SPH_C32(0xb29decec), + SPH_C32(0x86675f5f), SPH_C32(0xc86a9797), SPH_C32(0xc70b4444), + SPH_C32(0x655c1717), SPH_C32(0x6a3dc4c4), SPH_C32(0x58aaa7a7), + SPH_C32(0x61e37e7e), SPH_C32(0xb3f43d3d), SPH_C32(0x278b6464), + SPH_C32(0x886f5d5d), SPH_C32(0x4f641919), SPH_C32(0x42d77373), + SPH_C32(0x3b9b6060), SPH_C32(0xaa328181), SPH_C32(0xf6274f4f), + SPH_C32(0x225ddcdc), SPH_C32(0xee882222), SPH_C32(0xd6a82a2a), + SPH_C32(0xdd769090), SPH_C32(0x95168888), SPH_C32(0xc9034646), + SPH_C32(0xbc95eeee), SPH_C32(0x05d6b8b8), SPH_C32(0x6c501414), + SPH_C32(0x2c55dede), SPH_C32(0x81635e5e), SPH_C32(0x312c0b0b), + SPH_C32(0x3741dbdb), SPH_C32(0x96ade0e0), SPH_C32(0x9ec83232), + SPH_C32(0xa6e83a3a), SPH_C32(0x36280a0a), SPH_C32(0xe43f4949), + SPH_C32(0x12180606), SPH_C32(0xfc902424), SPH_C32(0x8f6b5c5c), + SPH_C32(0x7825c2c2), SPH_C32(0x0f61d3d3), SPH_C32(0x6986acac), + SPH_C32(0x35936262), SPH_C32(0xda729191), SPH_C32(0xc6629595), + SPH_C32(0x8abde4e4), SPH_C32(0x74ff7979), SPH_C32(0x83b1e7e7), + SPH_C32(0x4e0dc8c8), SPH_C32(0x85dc3737), SPH_C32(0x18af6d6d), + SPH_C32(0x8e028d8d), SPH_C32(0x1d79d5d5), SPH_C32(0xf1234e4e), + SPH_C32(0x7292a9a9), SPH_C32(0x1fab6c6c), SPH_C32(0xb9435656), + SPH_C32(0xfafdf4f4), SPH_C32(0xa085eaea), SPH_C32(0x208f6565), + SPH_C32(0x7df37a7a), SPH_C32(0x678eaeae), SPH_C32(0x38200808), + SPH_C32(0x0bdebaba), SPH_C32(0x73fb7878), SPH_C32(0xfb942525), + SPH_C32(0xcab82e2e), SPH_C32(0x54701c1c), SPH_C32(0x5faea6a6), + SPH_C32(0x21e6b4b4), SPH_C32(0x6435c6c6), SPH_C32(0xae8de8e8), + SPH_C32(0x2559dddd), SPH_C32(0x57cb7474), SPH_C32(0x5d7c1f1f), + SPH_C32(0xea374b4b), SPH_C32(0x1ec2bdbd), SPH_C32(0x9c1a8b8b), + SPH_C32(0x9b1e8a8a), SPH_C32(0x4bdb7070), SPH_C32(0xbaf83e3e), + SPH_C32(0x26e2b5b5), SPH_C32(0x29836666), SPH_C32(0xe33b4848), + SPH_C32(0x090c0303), SPH_C32(0xf4f5f6f6), SPH_C32(0x2a380e0e), + SPH_C32(0x3c9f6161), SPH_C32(0x8bd43535), SPH_C32(0xbe475757), + SPH_C32(0x02d2b9b9), SPH_C32(0xbf2e8686), SPH_C32(0x7129c1c1), + SPH_C32(0x53741d1d), SPH_C32(0xf74e9e9e), SPH_C32(0x91a9e1e1), + SPH_C32(0xdecdf8f8), SPH_C32(0xe5569898), SPH_C32(0x77441111), + SPH_C32(0x04bf6969), SPH_C32(0x3949d9d9), SPH_C32(0x870e8e8e), + SPH_C32(0xc1669494), SPH_C32(0xec5a9b9b), SPH_C32(0x5a781e1e), + SPH_C32(0xb82a8787), SPH_C32(0xa989e9e9), SPH_C32(0x5c15cece), + SPH_C32(0xb04f5555), SPH_C32(0xd8a02828), SPH_C32(0x2b51dfdf), + SPH_C32(0x89068c8c), SPH_C32(0x4ab2a1a1), SPH_C32(0x92128989), + SPH_C32(0x23340d0d), SPH_C32(0x10cabfbf), SPH_C32(0x84b5e6e6), + SPH_C32(0xd5134242), SPH_C32(0x03bb6868), SPH_C32(0xdc1f4141), + SPH_C32(0xe2529999), SPH_C32(0xc3b42d2d), SPH_C32(0x2d3c0f0f), + SPH_C32(0x3df6b0b0), SPH_C32(0xb74b5454), SPH_C32(0x0cdabbbb), + SPH_C32(0x62581616) +}; + +static const sph_u32 mixtab3[] = { + SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777), + SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b), + SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030), + SPH_C32(0x01070401), SPH_C32(0x672e8767), SPH_C32(0x2bd1ac2b), + SPH_C32(0xfeccd5fe), SPH_C32(0xd71371d7), SPH_C32(0xab7c9aab), + SPH_C32(0x7659c376), SPH_C32(0xca4005ca), SPH_C32(0x82a33e82), + SPH_C32(0xc94909c9), SPH_C32(0x7d68ef7d), SPH_C32(0xfad0c5fa), + SPH_C32(0x59947f59), SPH_C32(0x47ce0747), SPH_C32(0xf0e6edf0), + SPH_C32(0xad6e82ad), SPH_C32(0xd41a7dd4), SPH_C32(0xa243bea2), + SPH_C32(0xaf608aaf), SPH_C32(0x9cf9469c), SPH_C32(0xa451a6a4), + SPH_C32(0x7245d372), SPH_C32(0xc0762dc0), SPH_C32(0xb728eab7), + SPH_C32(0xfdc5d9fd), SPH_C32(0x93d47a93), SPH_C32(0x26f29826), + SPH_C32(0x3682d836), SPH_C32(0x3fbdfc3f), SPH_C32(0xf7f3f1f7), + SPH_C32(0xcc521dcc), SPH_C32(0x348cd034), SPH_C32(0xa556a2a5), + SPH_C32(0xe58db9e5), SPH_C32(0xf1e1e9f1), SPH_C32(0x714cdf71), + SPH_C32(0xd83e4dd8), SPH_C32(0x3197c431), SPH_C32(0x156b5415), + SPH_C32(0x041c1004), SPH_C32(0xc76331c7), SPH_C32(0x23e98c23), + SPH_C32(0xc37f21c3), SPH_C32(0x18486018), SPH_C32(0x96cf6e96), + SPH_C32(0x051b1405), SPH_C32(0x9aeb5e9a), SPH_C32(0x07151c07), + SPH_C32(0x127e4812), SPH_C32(0x80ad3680), SPH_C32(0xe298a5e2), + SPH_C32(0xeba781eb), SPH_C32(0x27f59c27), SPH_C32(0xb233feb2), + SPH_C32(0x7550cf75), SPH_C32(0x093f2409), SPH_C32(0x83a43a83), + SPH_C32(0x2cc4b02c), SPH_C32(0x1a46681a), SPH_C32(0x1b416c1b), + SPH_C32(0x6e11a36e), SPH_C32(0x5a9d735a), SPH_C32(0xa04db6a0), + SPH_C32(0x52a55352), SPH_C32(0x3ba1ec3b), SPH_C32(0xd61475d6), + SPH_C32(0xb334fab3), SPH_C32(0x29dfa429), SPH_C32(0xe39fa1e3), + SPH_C32(0x2fcdbc2f), SPH_C32(0x84b12684), SPH_C32(0x53a25753), + SPH_C32(0xd10169d1), SPH_C32(0x00000000), SPH_C32(0xedb599ed), + SPH_C32(0x20e08020), SPH_C32(0xfcc2ddfc), SPH_C32(0xb13af2b1), + SPH_C32(0x5b9a775b), SPH_C32(0x6a0db36a), SPH_C32(0xcb4701cb), + SPH_C32(0xbe17cebe), SPH_C32(0x39afe439), SPH_C32(0x4aed334a), + SPH_C32(0x4cff2b4c), SPH_C32(0x58937b58), SPH_C32(0xcf5b11cf), + SPH_C32(0xd0066dd0), SPH_C32(0xefbb91ef), SPH_C32(0xaa7b9eaa), + SPH_C32(0xfbd7c1fb), SPH_C32(0x43d21743), SPH_C32(0x4df82f4d), + SPH_C32(0x3399cc33), SPH_C32(0x85b62285), SPH_C32(0x45c00f45), + SPH_C32(0xf9d9c9f9), SPH_C32(0x020e0802), SPH_C32(0x7f66e77f), + SPH_C32(0x50ab5b50), SPH_C32(0x3cb4f03c), SPH_C32(0x9ff04a9f), + SPH_C32(0xa87596a8), SPH_C32(0x51ac5f51), SPH_C32(0xa344baa3), + SPH_C32(0x40db1b40), SPH_C32(0x8f800a8f), SPH_C32(0x92d37e92), + SPH_C32(0x9dfe429d), SPH_C32(0x38a8e038), SPH_C32(0xf5fdf9f5), + SPH_C32(0xbc19c6bc), SPH_C32(0xb62feeb6), SPH_C32(0xda3045da), + SPH_C32(0x21e78421), SPH_C32(0x10704010), SPH_C32(0xffcbd1ff), + SPH_C32(0xf3efe1f3), SPH_C32(0xd20865d2), SPH_C32(0xcd5519cd), + SPH_C32(0x0c24300c), SPH_C32(0x13794c13), SPH_C32(0xecb29dec), + SPH_C32(0x5f86675f), SPH_C32(0x97c86a97), SPH_C32(0x44c70b44), + SPH_C32(0x17655c17), SPH_C32(0xc46a3dc4), SPH_C32(0xa758aaa7), + SPH_C32(0x7e61e37e), SPH_C32(0x3db3f43d), SPH_C32(0x64278b64), + SPH_C32(0x5d886f5d), SPH_C32(0x194f6419), SPH_C32(0x7342d773), + SPH_C32(0x603b9b60), SPH_C32(0x81aa3281), SPH_C32(0x4ff6274f), + SPH_C32(0xdc225ddc), SPH_C32(0x22ee8822), SPH_C32(0x2ad6a82a), + SPH_C32(0x90dd7690), SPH_C32(0x88951688), SPH_C32(0x46c90346), + SPH_C32(0xeebc95ee), SPH_C32(0xb805d6b8), SPH_C32(0x146c5014), + SPH_C32(0xde2c55de), SPH_C32(0x5e81635e), SPH_C32(0x0b312c0b), + SPH_C32(0xdb3741db), SPH_C32(0xe096ade0), SPH_C32(0x329ec832), + SPH_C32(0x3aa6e83a), SPH_C32(0x0a36280a), SPH_C32(0x49e43f49), + SPH_C32(0x06121806), SPH_C32(0x24fc9024), SPH_C32(0x5c8f6b5c), + SPH_C32(0xc27825c2), SPH_C32(0xd30f61d3), SPH_C32(0xac6986ac), + SPH_C32(0x62359362), SPH_C32(0x91da7291), SPH_C32(0x95c66295), + SPH_C32(0xe48abde4), SPH_C32(0x7974ff79), SPH_C32(0xe783b1e7), + SPH_C32(0xc84e0dc8), SPH_C32(0x3785dc37), SPH_C32(0x6d18af6d), + SPH_C32(0x8d8e028d), SPH_C32(0xd51d79d5), SPH_C32(0x4ef1234e), + SPH_C32(0xa97292a9), SPH_C32(0x6c1fab6c), SPH_C32(0x56b94356), + SPH_C32(0xf4fafdf4), SPH_C32(0xeaa085ea), SPH_C32(0x65208f65), + SPH_C32(0x7a7df37a), SPH_C32(0xae678eae), SPH_C32(0x08382008), + SPH_C32(0xba0bdeba), SPH_C32(0x7873fb78), SPH_C32(0x25fb9425), + SPH_C32(0x2ecab82e), SPH_C32(0x1c54701c), SPH_C32(0xa65faea6), + SPH_C32(0xb421e6b4), SPH_C32(0xc66435c6), SPH_C32(0xe8ae8de8), + SPH_C32(0xdd2559dd), SPH_C32(0x7457cb74), SPH_C32(0x1f5d7c1f), + SPH_C32(0x4bea374b), SPH_C32(0xbd1ec2bd), SPH_C32(0x8b9c1a8b), + SPH_C32(0x8a9b1e8a), SPH_C32(0x704bdb70), SPH_C32(0x3ebaf83e), + SPH_C32(0xb526e2b5), SPH_C32(0x66298366), SPH_C32(0x48e33b48), + SPH_C32(0x03090c03), SPH_C32(0xf6f4f5f6), SPH_C32(0x0e2a380e), + SPH_C32(0x613c9f61), SPH_C32(0x358bd435), SPH_C32(0x57be4757), + SPH_C32(0xb902d2b9), SPH_C32(0x86bf2e86), SPH_C32(0xc17129c1), + SPH_C32(0x1d53741d), SPH_C32(0x9ef74e9e), SPH_C32(0xe191a9e1), + SPH_C32(0xf8decdf8), SPH_C32(0x98e55698), SPH_C32(0x11774411), + SPH_C32(0x6904bf69), SPH_C32(0xd93949d9), SPH_C32(0x8e870e8e), + SPH_C32(0x94c16694), SPH_C32(0x9bec5a9b), SPH_C32(0x1e5a781e), + SPH_C32(0x87b82a87), SPH_C32(0xe9a989e9), SPH_C32(0xce5c15ce), + SPH_C32(0x55b04f55), SPH_C32(0x28d8a028), SPH_C32(0xdf2b51df), + SPH_C32(0x8c89068c), SPH_C32(0xa14ab2a1), SPH_C32(0x89921289), + SPH_C32(0x0d23340d), SPH_C32(0xbf10cabf), SPH_C32(0xe684b5e6), + SPH_C32(0x42d51342), SPH_C32(0x6803bb68), SPH_C32(0x41dc1f41), + SPH_C32(0x99e25299), SPH_C32(0x2dc3b42d), SPH_C32(0x0f2d3c0f), + SPH_C32(0xb03df6b0), SPH_C32(0x54b74b54), SPH_C32(0xbb0cdabb), + SPH_C32(0x16625816) +}; + +#define TIX2(q, x00, x01, x08, x10, x24) do { \ + x10 ^= x00; \ + x00 = (q); \ + x08 ^= x00; \ + x01 ^= x24; \ + } while (0) + +#define TIX3(q, x00, x01, x04, x08, x16, x27, x30) do { \ + x16 ^= x00; \ + x00 = (q); \ + x08 ^= x00; \ + x01 ^= x27; \ + x04 ^= x30; \ + } while (0) + +#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) do { \ + x22 ^= x00; \ + x00 = (q); \ + x08 ^= x00; \ + x01 ^= x24; \ + x04 ^= x27; \ + x07 ^= x30; \ + } while (0) + +#define CMIX30(x00, x01, x02, x04, x05, x06, x15, x16, x17) do { \ + x00 ^= x04; \ + x01 ^= x05; \ + x02 ^= x06; \ + x15 ^= x04; \ + x16 ^= x05; \ + x17 ^= x06; \ + } while (0) + +#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) do { \ + x00 ^= x04; \ + x01 ^= x05; \ + x02 ^= x06; \ + x18 ^= x04; \ + x19 ^= x05; \ + x20 ^= x06; \ + } while (0) + +#define SMIX(x0, x1, x2, x3) do { \ + sph_u32 c0 = 0; \ + sph_u32 c1 = 0; \ + sph_u32 c2 = 0; \ + sph_u32 c3 = 0; \ + sph_u32 r0 = 0; \ + sph_u32 r1 = 0; \ + sph_u32 r2 = 0; \ + sph_u32 r3 = 0; \ + sph_u32 tmp; \ + tmp = mixtab0[x0 >> 24]; \ + c0 ^= tmp; \ + tmp = mixtab1[(x0 >> 16) & 0xFF]; \ + c0 ^= tmp; \ + r1 ^= tmp; \ + tmp = mixtab2[(x0 >> 8) & 0xFF]; \ + c0 ^= tmp; \ + r2 ^= tmp; \ + tmp = mixtab3[x0 & 0xFF]; \ + c0 ^= tmp; \ + r3 ^= tmp; \ + tmp = mixtab0[x1 >> 24]; \ + c1 ^= tmp; \ + r0 ^= tmp; \ + tmp = mixtab1[(x1 >> 16) & 0xFF]; \ + c1 ^= tmp; \ + tmp = mixtab2[(x1 >> 8) & 0xFF]; \ + c1 ^= tmp; \ + r2 ^= tmp; \ + tmp = mixtab3[x1 & 0xFF]; \ + c1 ^= tmp; \ + r3 ^= tmp; \ + tmp = mixtab0[x2 >> 24]; \ + c2 ^= tmp; \ + r0 ^= tmp; \ + tmp = mixtab1[(x2 >> 16) & 0xFF]; \ + c2 ^= tmp; \ + r1 ^= tmp; \ + tmp = mixtab2[(x2 >> 8) & 0xFF]; \ + c2 ^= tmp; \ + tmp = mixtab3[x2 & 0xFF]; \ + c2 ^= tmp; \ + r3 ^= tmp; \ + tmp = mixtab0[x3 >> 24]; \ + c3 ^= tmp; \ + r0 ^= tmp; \ + tmp = mixtab1[(x3 >> 16) & 0xFF]; \ + c3 ^= tmp; \ + r1 ^= tmp; \ + tmp = mixtab2[(x3 >> 8) & 0xFF]; \ + c3 ^= tmp; \ + r2 ^= tmp; \ + tmp = mixtab3[x3 & 0xFF]; \ + c3 ^= tmp; \ + x0 = ((c0 ^ r0) & SPH_C32(0xFF000000)) \ + | ((c1 ^ r1) & SPH_C32(0x00FF0000)) \ + | ((c2 ^ r2) & SPH_C32(0x0000FF00)) \ + | ((c3 ^ r3) & SPH_C32(0x000000FF)); \ + x1 = ((c1 ^ (r0 << 8)) & SPH_C32(0xFF000000)) \ + | ((c2 ^ (r1 << 8)) & SPH_C32(0x00FF0000)) \ + | ((c3 ^ (r2 << 8)) & SPH_C32(0x0000FF00)) \ + | ((c0 ^ (r3 >> 24)) & SPH_C32(0x000000FF)); \ + x2 = ((c2 ^ (r0 << 16)) & SPH_C32(0xFF000000)) \ + | ((c3 ^ (r1 << 16)) & SPH_C32(0x00FF0000)) \ + | ((c0 ^ (r2 >> 16)) & SPH_C32(0x0000FF00)) \ + | ((c1 ^ (r3 >> 16)) & SPH_C32(0x000000FF)); \ + x3 = ((c3 ^ (r0 << 24)) & SPH_C32(0xFF000000)) \ + | ((c0 ^ (r1 >> 8)) & SPH_C32(0x00FF0000)) \ + | ((c1 ^ (r2 >> 8)) & SPH_C32(0x0000FF00)) \ + | ((c2 ^ (r3 >> 8)) & SPH_C32(0x000000FF)); \ + /* */ \ + } while (0) + +#if SPH_FUGUE_NOCOPY + +#define DECL_STATE_SMALL +#define READ_STATE_SMALL(state) +#define WRITE_STATE_SMALL(state) +#define DECL_STATE_BIG +#define READ_STATE_BIG(state) +#define WRITE_STATE_BIG(state) + +#define S00 ((sc)->S[ 0]) +#define S01 ((sc)->S[ 1]) +#define S02 ((sc)->S[ 2]) +#define S03 ((sc)->S[ 3]) +#define S04 ((sc)->S[ 4]) +#define S05 ((sc)->S[ 5]) +#define S06 ((sc)->S[ 6]) +#define S07 ((sc)->S[ 7]) +#define S08 ((sc)->S[ 8]) +#define S09 ((sc)->S[ 9]) +#define S10 ((sc)->S[10]) +#define S11 ((sc)->S[11]) +#define S12 ((sc)->S[12]) +#define S13 ((sc)->S[13]) +#define S14 ((sc)->S[14]) +#define S15 ((sc)->S[15]) +#define S16 ((sc)->S[16]) +#define S17 ((sc)->S[17]) +#define S18 ((sc)->S[18]) +#define S19 ((sc)->S[19]) +#define S20 ((sc)->S[20]) +#define S21 ((sc)->S[21]) +#define S22 ((sc)->S[22]) +#define S23 ((sc)->S[23]) +#define S24 ((sc)->S[24]) +#define S25 ((sc)->S[25]) +#define S26 ((sc)->S[26]) +#define S27 ((sc)->S[27]) +#define S28 ((sc)->S[28]) +#define S29 ((sc)->S[29]) +#define S30 ((sc)->S[30]) +#define S31 ((sc)->S[31]) +#define S32 ((sc)->S[32]) +#define S33 ((sc)->S[33]) +#define S34 ((sc)->S[34]) +#define S35 ((sc)->S[35]) + +#else + +#define DECL_STATE_SMALL \ + sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; \ + sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; \ + sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; + +#define DECL_STATE_BIG \ + DECL_STATE_SMALL \ + sph_u32 S30, S31, S32, S33, S34, S35; + +#define READ_STATE_SMALL(state) do { \ + S00 = (state)->S[ 0]; \ + S01 = (state)->S[ 1]; \ + S02 = (state)->S[ 2]; \ + S03 = (state)->S[ 3]; \ + S04 = (state)->S[ 4]; \ + S05 = (state)->S[ 5]; \ + S06 = (state)->S[ 6]; \ + S07 = (state)->S[ 7]; \ + S08 = (state)->S[ 8]; \ + S09 = (state)->S[ 9]; \ + S10 = (state)->S[10]; \ + S11 = (state)->S[11]; \ + S12 = (state)->S[12]; \ + S13 = (state)->S[13]; \ + S14 = (state)->S[14]; \ + S15 = (state)->S[15]; \ + S16 = (state)->S[16]; \ + S17 = (state)->S[17]; \ + S18 = (state)->S[18]; \ + S19 = (state)->S[19]; \ + S20 = (state)->S[20]; \ + S21 = (state)->S[21]; \ + S22 = (state)->S[22]; \ + S23 = (state)->S[23]; \ + S24 = (state)->S[24]; \ + S25 = (state)->S[25]; \ + S26 = (state)->S[26]; \ + S27 = (state)->S[27]; \ + S28 = (state)->S[28]; \ + S29 = (state)->S[29]; \ + } while (0) + +#define READ_STATE_BIG(state) do { \ + READ_STATE_SMALL(state); \ + S30 = (state)->S[30]; \ + S31 = (state)->S[31]; \ + S32 = (state)->S[32]; \ + S33 = (state)->S[33]; \ + S34 = (state)->S[34]; \ + S35 = (state)->S[35]; \ + } while (0) + +#define WRITE_STATE_SMALL(state) do { \ + (state)->S[ 0] = S00; \ + (state)->S[ 1] = S01; \ + (state)->S[ 2] = S02; \ + (state)->S[ 3] = S03; \ + (state)->S[ 4] = S04; \ + (state)->S[ 5] = S05; \ + (state)->S[ 6] = S06; \ + (state)->S[ 7] = S07; \ + (state)->S[ 8] = S08; \ + (state)->S[ 9] = S09; \ + (state)->S[10] = S10; \ + (state)->S[11] = S11; \ + (state)->S[12] = S12; \ + (state)->S[13] = S13; \ + (state)->S[14] = S14; \ + (state)->S[15] = S15; \ + (state)->S[16] = S16; \ + (state)->S[17] = S17; \ + (state)->S[18] = S18; \ + (state)->S[19] = S19; \ + (state)->S[20] = S20; \ + (state)->S[21] = S21; \ + (state)->S[22] = S22; \ + (state)->S[23] = S23; \ + (state)->S[24] = S24; \ + (state)->S[25] = S25; \ + (state)->S[26] = S26; \ + (state)->S[27] = S27; \ + (state)->S[28] = S28; \ + (state)->S[29] = S29; \ + } while (0) + +#define WRITE_STATE_BIG(state) do { \ + WRITE_STATE_SMALL(state); \ + (state)->S[30] = S30; \ + (state)->S[31] = S31; \ + (state)->S[32] = S32; \ + (state)->S[33] = S33; \ + (state)->S[34] = S34; \ + (state)->S[35] = S35; \ + } while (0) + +#endif + +static void +fugue_init(sph_fugue_context *sc, size_t z_len, + const sph_u32 *iv, size_t iv_len) +{ + size_t u; + + for (u = 0; u < z_len; u ++) + sc->S[u] = 0; + memcpy(&sc->S[z_len], iv, iv_len * sizeof *iv); + sc->partial = 0; + sc->partial_len = 0; + sc->round_shift = 0; +#if SPH_64 + sc->bit_count = 0; +#else + sc->bit_count_high = 0; + sc->bit_count_low = 0; +#endif +} + +#if SPH_64 + +#define INCR_COUNTER do { \ + sc->bit_count += (sph_u64)len << 3; \ + } while (0) + +#else + +#define INCR_COUNTER do { \ + sph_u32 tmp = SPH_T32((sph_u32)len << 3); \ + sc->bit_count_low = SPH_T32(sc->bit_count_low + tmp); \ + if (sc->bit_count_low < tmp) \ + sc->bit_count_high ++; \ + sc->bit_count_high = SPH_T32(sc->bit_count_high \ + + ((sph_u32)len >> 29)); \ + } while (0) + +#endif + +#define CORE_ENTRY \ + sph_u32 p; \ + unsigned plen, rshift; \ + INCR_COUNTER; \ + p = sc->partial; \ + plen = sc->partial_len; \ + if (plen < 4) { \ + unsigned count = 4 - plen; \ + if (len < count) \ + count = len; \ + plen += count; \ + while (count -- > 0) { \ + p = (p << 8) | *(const unsigned char *)data; \ + data = (const unsigned char *)data + 1; \ + len --; \ + } \ + if (len == 0) { \ + sc->partial = p; \ + sc->partial_len = plen; \ + return; \ + } \ + } + +#define CORE_EXIT \ + p = 0; \ + sc->partial_len = (unsigned)len; \ + while (len -- > 0) { \ + p = (p << 8) | *(const unsigned char *)data; \ + data = (const unsigned char *)data + 1; \ + } \ + sc->partial = p; \ + sc->round_shift = rshift; + +/* + * Not in a do..while: the 'break' must exit the outer loop. + */ +#define NEXT(rc) \ + if (len <= 4) { \ + rshift = (rc); \ + break; \ + } \ + p = sph_dec32be(data); \ + data = (const unsigned char *)data + 4; \ + len -= 4 + +static void +fugue2_core(sph_fugue_context *sc, const void *data, size_t len) +{ + DECL_STATE_SMALL + CORE_ENTRY + READ_STATE_SMALL(sc); + rshift = sc->round_shift; + switch (rshift) { + for (;;) { + sph_u32 q; + + case 0: + q = p; + TIX2(q, S00, S01, S08, S10, S24); + CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14); + SMIX(S27, S28, S29, S00); + CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11); + SMIX(S24, S25, S26, S27); + NEXT(1); + /* fall through */ + case 1: + q = p; + TIX2(q, S24, S25, S02, S04, S18); + CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08); + SMIX(S21, S22, S23, S24); + CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05); + SMIX(S18, S19, S20, S21); + NEXT(2); + /* fall through */ + case 2: + q = p; + TIX2(q, S18, S19, S26, S28, S12); + CMIX30(S15, S16, S17, S19, S20, S21, S00, S01, S02); + SMIX(S15, S16, S17, S18); + CMIX30(S12, S13, S14, S16, S17, S18, S27, S28, S29); + SMIX(S12, S13, S14, S15); + NEXT(3); + /* fall through */ + case 3: + q = p; + TIX2(q, S12, S13, S20, S22, S06); + CMIX30(S09, S10, S11, S13, S14, S15, S24, S25, S26); + SMIX(S09, S10, S11, S12); + CMIX30(S06, S07, S08, S10, S11, S12, S21, S22, S23); + SMIX(S06, S07, S08, S09); + NEXT(4); + /* fall through */ + case 4: + q = p; + TIX2(q, S06, S07, S14, S16, S00); + CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20); + SMIX(S03, S04, S05, S06); + CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17); + SMIX(S00, S01, S02, S03); + NEXT(0); + } + } + CORE_EXIT + WRITE_STATE_SMALL(sc); +} + +static void +fugue3_core(sph_fugue_context *sc, const void *data, size_t len) +{ + DECL_STATE_BIG + CORE_ENTRY + READ_STATE_BIG(sc); + rshift = sc->round_shift; + switch (rshift) { + for (;;) { + sph_u32 q; + + case 0: + q = p; + TIX3(q, S00, S01, S04, S08, S16, S27, S30); + CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); + SMIX(S33, S34, S35, S00); + CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); + SMIX(S30, S31, S32, S33); + CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); + SMIX(S27, S28, S29, S30); + NEXT(1); + /* fall through */ + case 1: + q = p; + TIX3(q, S27, S28, S31, S35, S07, S18, S21); + CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); + SMIX(S24, S25, S26, S27); + CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); + SMIX(S21, S22, S23, S24); + CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); + SMIX(S18, S19, S20, S21); + NEXT(2); + /* fall through */ + case 2: + q = p; + TIX3(q, S18, S19, S22, S26, S34, S09, S12); + CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); + SMIX(S15, S16, S17, S18); + CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); + SMIX(S12, S13, S14, S15); + CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); + SMIX(S09, S10, S11, S12); + NEXT(3); + /* fall through */ + case 3: + q = p; + TIX3(q, S09, S10, S13, S17, S25, S00, S03); + CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); + SMIX(S06, S07, S08, S09); + CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); + SMIX(S03, S04, S05, S06); + CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); + SMIX(S00, S01, S02, S03); + NEXT(0); + } + } + CORE_EXIT + WRITE_STATE_BIG(sc); +} + +static void +fugue4_core(sph_fugue_context *sc, const void *data, size_t len) +{ + DECL_STATE_BIG + CORE_ENTRY + READ_STATE_BIG(sc); + rshift = sc->round_shift; + switch (rshift) { + for (;;) { + sph_u32 q; + + case 0: + q = p; + TIX4(q, S00, S01, S04, S07, S08, S22, S24, S27, S30); + CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); + SMIX(S33, S34, S35, S00); + CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); + SMIX(S30, S31, S32, S33); + CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); + SMIX(S27, S28, S29, S30); + CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); + SMIX(S24, S25, S26, S27); + NEXT(1); + /* fall through */ + case 1: + q = p; + TIX4(q, S24, S25, S28, S31, S32, S10, S12, S15, S18); + CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); + SMIX(S21, S22, S23, S24); + CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); + SMIX(S18, S19, S20, S21); + CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); + SMIX(S15, S16, S17, S18); + CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); + SMIX(S12, S13, S14, S15); + NEXT(2); + /* fall through */ + case 2: + q = p; + TIX4(q, S12, S13, S16, S19, S20, S34, S00, S03, S06); + CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); + SMIX(S09, S10, S11, S12); + CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); + SMIX(S06, S07, S08, S09); + CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); + SMIX(S03, S04, S05, S06); + CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); + SMIX(S00, S01, S02, S03); + NEXT(0); + } + } + CORE_EXIT + WRITE_STATE_BIG(sc); +} + +#if SPH_64 + +#define WRITE_COUNTER do { \ + sph_enc64be(buf + 4, sc->bit_count + n); \ + } while (0) + +#else + +#define WRITE_COUNTER do { \ + sph_enc32be(buf + 4, sc->bit_count_high); \ + sph_enc32be(buf + 8, sc->bit_count_low + n); \ + } while (0) + +#endif + +#define CLOSE_ENTRY(s, rcm, core) \ + unsigned char buf[16]; \ + unsigned plen, rms; \ + unsigned char *out; \ + sph_u32 S[s]; \ + plen = sc->partial_len; \ + WRITE_COUNTER; \ + if (plen == 0 && n == 0) { \ + plen = 4; \ + } else if (plen < 4 || n != 0) { \ + unsigned u; \ + \ + if (plen == 4) \ + plen = 0; \ + buf[plen] = ub & ~(0xFFU >> n); \ + for (u = plen + 1; u < 4; u ++) \ + buf[u] = 0; \ + } \ + core(sc, buf + plen, (sizeof buf) - plen); \ + rms = sc->round_shift * (rcm); \ + memcpy(S, sc->S + (s) - rms, rms * sizeof(sph_u32)); \ + memcpy(S + rms, sc->S, ((s) - rms) * sizeof(sph_u32)); + +#define ROR(n, s) do { \ + sph_u32 tmp[n]; \ + memcpy(tmp, S + ((s) - (n)), (n) * sizeof(sph_u32)); \ + memmove(S + (n), S, ((s) - (n)) * sizeof(sph_u32)); \ + memcpy(S, tmp, (n) * sizeof(sph_u32)); \ + } while (0) + +static void +fugue2_close(sph_fugue_context *sc, unsigned ub, unsigned n, + void *dst, size_t out_size_w32) +{ + int i; + + CLOSE_ENTRY(30, 6, fugue2_core) + for (i = 0; i < 10; i ++) { + ROR(3, 30); + CMIX30(S[0], S[1], S[2], S[4], S[5], S[6], S[15], S[16], S[17]); + SMIX(S[0], S[1], S[2], S[3]); + } + for (i = 0; i < 13; i ++) { + S[4] ^= S[0]; + S[15] ^= S[0]; + ROR(15, 30); + SMIX(S[0], S[1], S[2], S[3]); + S[4] ^= S[0]; + S[16] ^= S[0]; + ROR(14, 30); + SMIX(S[0], S[1], S[2], S[3]); + } + S[4] ^= S[0]; + S[15] ^= S[0]; + out = dst; + sph_enc32be(out + 0, S[ 1]); + sph_enc32be(out + 4, S[ 2]); + sph_enc32be(out + 8, S[ 3]); + sph_enc32be(out + 12, S[ 4]); + sph_enc32be(out + 16, S[15]); + sph_enc32be(out + 20, S[16]); + sph_enc32be(out + 24, S[17]); + if (out_size_w32 == 8) { + sph_enc32be(out + 28, S[18]); + sph_fugue256_init(sc); + } else { + sph_fugue224_init(sc); + } +} + +static void +fugue3_close(sph_fugue_context *sc, unsigned ub, unsigned n, void *dst) +{ + int i; + + CLOSE_ENTRY(36, 9, fugue3_core) + for (i = 0; i < 18; i ++) { + ROR(3, 36); + CMIX36(S[0], S[1], S[2], S[4], S[5], S[6], S[18], S[19], S[20]); + SMIX(S[0], S[1], S[2], S[3]); + } + for (i = 0; i < 13; i ++) { + S[4] ^= S[0]; + S[12] ^= S[0]; + S[24] ^= S[0]; + ROR(12, 36); + SMIX(S[0], S[1], S[2], S[3]); + S[4] ^= S[0]; + S[13] ^= S[0]; + S[24] ^= S[0]; + ROR(12, 36); + SMIX(S[0], S[1], S[2], S[3]); + S[4] ^= S[0]; + S[13] ^= S[0]; + S[25] ^= S[0]; + ROR(11, 36); + SMIX(S[0], S[1], S[2], S[3]); + } + S[4] ^= S[0]; + S[12] ^= S[0]; + S[24] ^= S[0]; + out = dst; + sph_enc32be(out + 0, S[ 1]); + sph_enc32be(out + 4, S[ 2]); + sph_enc32be(out + 8, S[ 3]); + sph_enc32be(out + 12, S[ 4]); + sph_enc32be(out + 16, S[12]); + sph_enc32be(out + 20, S[13]); + sph_enc32be(out + 24, S[14]); + sph_enc32be(out + 28, S[15]); + sph_enc32be(out + 32, S[24]); + sph_enc32be(out + 36, S[25]); + sph_enc32be(out + 40, S[26]); + sph_enc32be(out + 44, S[27]); + sph_fugue384_init(sc); +} + +static void +fugue4_close(sph_fugue_context *sc, unsigned ub, unsigned n, void *dst) +{ + int i; + + CLOSE_ENTRY(36, 12, fugue4_core) + for (i = 0; i < 32; i ++) { + ROR(3, 36); + CMIX36(S[0], S[1], S[2], S[4], S[5], S[6], S[18], S[19], S[20]); + SMIX(S[0], S[1], S[2], S[3]); + } + for (i = 0; i < 13; i ++) { + S[4] ^= S[0]; + S[9] ^= S[0]; + S[18] ^= S[0]; + S[27] ^= S[0]; + ROR(9, 36); + SMIX(S[0], S[1], S[2], S[3]); + S[4] ^= S[0]; + S[10] ^= S[0]; + S[18] ^= S[0]; + S[27] ^= S[0]; + ROR(9, 36); + SMIX(S[0], S[1], S[2], S[3]); + S[4] ^= S[0]; + S[10] ^= S[0]; + S[19] ^= S[0]; + S[27] ^= S[0]; + ROR(9, 36); + SMIX(S[0], S[1], S[2], S[3]); + S[4] ^= S[0]; + S[10] ^= S[0]; + S[19] ^= S[0]; + S[28] ^= S[0]; + ROR(8, 36); + SMIX(S[0], S[1], S[2], S[3]); + } + S[4] ^= S[0]; + S[9] ^= S[0]; + S[18] ^= S[0]; + S[27] ^= S[0]; + out = dst; + sph_enc32be(out + 0, S[ 1]); + sph_enc32be(out + 4, S[ 2]); + sph_enc32be(out + 8, S[ 3]); + sph_enc32be(out + 12, S[ 4]); + sph_enc32be(out + 16, S[ 9]); + sph_enc32be(out + 20, S[10]); + sph_enc32be(out + 24, S[11]); + sph_enc32be(out + 28, S[12]); + sph_enc32be(out + 32, S[18]); + sph_enc32be(out + 36, S[19]); + sph_enc32be(out + 40, S[20]); + sph_enc32be(out + 44, S[21]); + sph_enc32be(out + 48, S[27]); + sph_enc32be(out + 52, S[28]); + sph_enc32be(out + 56, S[29]); + sph_enc32be(out + 60, S[30]); + sph_fugue512_init(sc); +} + +void +sph_fugue224_init(void *cc) +{ + fugue_init(cc, 23, IV224, 7); +} + +void +sph_fugue224(void *cc, const void *data, size_t len) +{ + fugue2_core(cc, data, len); +} + +void +sph_fugue224_close(void *cc, void *dst) +{ + fugue2_close(cc, 0, 0, dst, 7); +} + +void +sph_fugue224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + fugue2_close(cc, ub, n, dst, 7); +} + +void +sph_fugue256_init(void *cc) +{ + fugue_init(cc, 22, IV256, 8); +} + +void +sph_fugue256(void *cc, const void *data, size_t len) +{ + fugue2_core(cc, data, len); +} + +void +sph_fugue256_close(void *cc, void *dst) +{ + fugue2_close(cc, 0, 0, dst, 8); +} + +void +sph_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + fugue2_close(cc, ub, n, dst, 8); +} + +void +sph_fugue384_init(void *cc) +{ + fugue_init(cc, 24, IV384, 12); +} + +void +sph_fugue384(void *cc, const void *data, size_t len) +{ + fugue3_core(cc, data, len); +} + +void +sph_fugue384_close(void *cc, void *dst) +{ + fugue3_close(cc, 0, 0, dst); +} + +void +sph_fugue384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + fugue3_close(cc, ub, n, dst); +} + +void +sph_fugue512_init(void *cc) +{ + fugue_init(cc, 20, IV512, 16); +} + +void +sph_fugue512(void *cc, const void *data, size_t len) +{ + fugue4_core(cc, data, len); +} + +void +sph_fugue512_close(void *cc, void *dst) +{ + fugue4_close(cc, 0, 0, dst); +} + +void +sph_fugue512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + fugue4_close(cc, ub, n, dst); +} +#ifdef __cplusplus +} +#endif diff --git a/fuguecoin.cpp b/fuguecoin.cpp new file mode 100644 index 0000000..64c05d1 --- /dev/null +++ b/fuguecoin.cpp @@ -0,0 +1,74 @@ +#include "uint256.h" +#include "sph_fugue.h" + +#include "cpuminer-config.h" +#include "miner.h" + +#include +#include +#include + +extern "C" void my_fugue256_init(void *cc); +extern "C" void my_fugue256(void *cc, const void *data, size_t len); +extern "C" void my_fugue256_close(void *cc, void *dst); +extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +// vorbereitete Kontexte nach den ersten 80 Bytes +sph_fugue256_context ctx_fugue_const[8]; + +#define SWAP32(x) \ + ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ + (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) + +extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t start_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + const uint32_t throughPut = 4096 * 128; + + // init + static bool init[8] = { false, false, false, false, false, false, false, false }; + if(!init[thr_id]) + { + fugue256_cpu_init(thr_id, throughPut); + init[thr_id] = true; + } + + // Endian Drehung ist notwendig + uint32_t endiandata[20]; + for (int kk=0; kk < 20; kk++) + be32enc(&endiandata[kk], pdata[kk]); + + // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt) + fugue256_cpu_setBlock(thr_id, endiandata, (void*)ptarget); + + do { + // GPU + uint32_t foundNounce = 0xFFFFFFFF; + fugue256_cpu_hash(thr_id, throughPut, pdata[19], NULL, &foundNounce); + + if(foundNounce < 0xffffffff) + { + uint32_t hash[8]; + endiandata[19] = SWAP32(foundNounce); + sph_fugue256_context ctx_fugue; + sph_fugue256_init(&ctx_fugue); + sph_fugue256 (&ctx_fugue, endiandata, 80); + sph_fugue256_close(&ctx_fugue, &hash); + + pdata[19] = foundNounce; + *hashes_done = SWAP32(foundNounce) - start_nonce + 1; + return 1; + } + + if (pdata[19] + throughPut < pdata[19]) + pdata[19] = max_nonce; + else pdata[19] += throughPut; + + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - start_nonce + 1; + return 0; +} diff --git a/groestl.c b/groestl.c new file mode 100644 index 0000000..cc685f4 --- /dev/null +++ b/groestl.c @@ -0,0 +1,3123 @@ +/* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */ +/* + * Groestl implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_groestl.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_GROESTL +#define SPH_SMALL_FOOTPRINT_GROESTL 1 +#endif + +/* + * Apparently, the 32-bit-only version is not faster than the 64-bit + * version unless using the "small footprint" code on a 32-bit machine. + */ +#if !defined SPH_GROESTL_64 +#if SPH_SMALL_FOOTPRINT_GROESTL && !SPH_64_TRUE +#define SPH_GROESTL_64 0 +#else +#define SPH_GROESTL_64 1 +#endif +#endif + +#if !SPH_64 +#undef SPH_GROESTL_64 +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +/* + * The internal representation may use either big-endian or + * little-endian. Using the platform default representation speeds up + * encoding and decoding between bytes and the matrix columns. + */ + +#undef USE_LE +#if SPH_GROESTL_LITTLE_ENDIAN +#define USE_LE 1 +#elif SPH_GROESTL_BIG_ENDIAN +#define USE_LE 0 +#elif SPH_LITTLE_ENDIAN +#define USE_LE 1 +#endif + +#if USE_LE + +#define C32e(x) ((SPH_C32(x) >> 24) \ + | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ + | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ + | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) +#define dec32e_aligned sph_dec32le_aligned +#define enc32e sph_enc32le +#define B32_0(x) ((x) & 0xFF) +#define B32_1(x) (((x) >> 8) & 0xFF) +#define B32_2(x) (((x) >> 16) & 0xFF) +#define B32_3(x) ((x) >> 24) + +#define R32u(u, d) SPH_T32(((u) << 16) | ((d) >> 16)) +#define R32d(u, d) SPH_T32(((u) >> 16) | ((d) << 16)) + +#define PC32up(j, r) ((sph_u32)((j) + (r))) +#define PC32dn(j, r) 0 +#define QC32up(j, r) SPH_C32(0xFFFFFFFF) +#define QC32dn(j, r) (((sph_u32)(r) << 24) ^ SPH_T32(~((sph_u32)(j) << 24))) + +#if SPH_64 +#define C64e(x) ((SPH_C64(x) >> 56) \ + | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \ + | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \ + | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \ + | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \ + | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \ + | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \ + | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000))) +#define dec64e_aligned sph_dec64le_aligned +#define enc64e sph_enc64le +#define B64_0(x) ((x) & 0xFF) +#define B64_1(x) (((x) >> 8) & 0xFF) +#define B64_2(x) (((x) >> 16) & 0xFF) +#define B64_3(x) (((x) >> 24) & 0xFF) +#define B64_4(x) (((x) >> 32) & 0xFF) +#define B64_5(x) (((x) >> 40) & 0xFF) +#define B64_6(x) (((x) >> 48) & 0xFF) +#define B64_7(x) ((x) >> 56) +#define R64 SPH_ROTL64 +#define PC64(j, r) ((sph_u64)((j) + (r))) +#define QC64(j, r) (((sph_u64)(r) << 56) ^ SPH_T64(~((sph_u64)(j) << 56))) +#endif + +#else + +#define C32e(x) SPH_C32(x) +#define dec32e_aligned sph_dec32be_aligned +#define enc32e sph_enc32be +#define B32_0(x) ((x) >> 24) +#define B32_1(x) (((x) >> 16) & 0xFF) +#define B32_2(x) (((x) >> 8) & 0xFF) +#define B32_3(x) ((x) & 0xFF) + +#define R32u(u, d) SPH_T32(((u) >> 16) | ((d) << 16)) +#define R32d(u, d) SPH_T32(((u) << 16) | ((d) >> 16)) + +#define PC32up(j, r) ((sph_u32)((j) + (r)) << 24) +#define PC32dn(j, r) 0 +#define QC32up(j, r) SPH_C32(0xFFFFFFFF) +#define QC32dn(j, r) ((sph_u32)(r) ^ SPH_T32(~(sph_u32)(j))) + +#if SPH_64 +#define C64e(x) SPH_C64(x) +#define dec64e_aligned sph_dec64be_aligned +#define enc64e sph_enc64be +#define B64_0(x) ((x) >> 56) +#define B64_1(x) (((x) >> 48) & 0xFF) +#define B64_2(x) (((x) >> 40) & 0xFF) +#define B64_3(x) (((x) >> 32) & 0xFF) +#define B64_4(x) (((x) >> 24) & 0xFF) +#define B64_5(x) (((x) >> 16) & 0xFF) +#define B64_6(x) (((x) >> 8) & 0xFF) +#define B64_7(x) ((x) & 0xFF) +#define R64 SPH_ROTR64 +#define PC64(j, r) ((sph_u64)((j) + (r)) << 56) +#define QC64(j, r) ((sph_u64)(r) ^ SPH_T64(~(sph_u64)(j))) +#endif + +#endif + +#if SPH_GROESTL_64 + +static const sph_u64 T0[] = { + C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8), + C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6), + C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6), + C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491), + C64e(0x6090f050f0c05060), C64e(0x0207050305040302), + C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56), + C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5), + C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec), + C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f), + C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa), + C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2), + C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb), + C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3), + C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45), + C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753), + C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b), + C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1), + C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c), + C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e), + C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83), + C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451), + C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9), + C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab), + C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a), + C64e(0x081c140c14100c08), C64e(0x9563f652f6315295), + C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d), + C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137), + C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f), + C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624), + C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf), + C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e), + C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea), + C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d), + C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34), + C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc), + C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b), + C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76), + C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d), + C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd), + C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713), + C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9), + C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1), + C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3), + C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6), + C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d), + C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72), + C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498), + C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85), + C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5), + C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed), + C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a), + C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411), + C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9), + C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe), + C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478), + C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b), + C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d), + C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05), + C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21), + C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1), + C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177), + C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342), + C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5), + C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf), + C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418), + C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3), + C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235), + C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e), + C64e(0x936af957f93d5793), C64e(0x55580df20daaf255), + C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a), + C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba), + C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6), + C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819), + C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3), + C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54), + C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b), + C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7), + C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28), + C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc), + C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad), + C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664), + C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14), + C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c), + C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8), + C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd), + C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4), + C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431), + C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2), + C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b), + C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da), + C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1), + C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049), + C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac), + C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf), + C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4), + C64e(0x476720e9208ee947), C64e(0x1038281828201810), + C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0), + C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c), + C64e(0x38546c246c702438), C64e(0x575f08f108aef157), + C64e(0x732152c752e6c773), C64e(0x9764f351f3355197), + C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1), + C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e), + C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61), + C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f), + C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c), + C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc), + C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506), + C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c), + C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a), + C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069), + C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899), + C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927), + C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb), + C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322), + C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9), + C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733), + C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c), + C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9), + C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa), + C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5), + C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859), + C64e(0x09929b809b128009), C64e(0x1a2339173934171a), + C64e(0x651075da75cada65), C64e(0xd784533153b531d7), + C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0), + C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029), + C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e), + C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8), + C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c) +}; + +#if !SPH_SMALL_FOOTPRINT_GROESTL + +static const sph_u64 T1[] = { + C64e(0xc6c632f4a5f497a5), C64e(0xf8f86f978497eb84), + C64e(0xeeee5eb099b0c799), C64e(0xf6f67a8c8d8cf78d), + C64e(0xffffe8170d17e50d), C64e(0xd6d60adcbddcb7bd), + C64e(0xdede16c8b1c8a7b1), C64e(0x91916dfc54fc3954), + C64e(0x606090f050f0c050), C64e(0x0202070503050403), + C64e(0xcece2ee0a9e087a9), C64e(0x5656d1877d87ac7d), + C64e(0xe7e7cc2b192bd519), C64e(0xb5b513a662a67162), + C64e(0x4d4d7c31e6319ae6), C64e(0xecec59b59ab5c39a), + C64e(0x8f8f40cf45cf0545), C64e(0x1f1fa3bc9dbc3e9d), + C64e(0x898949c040c00940), C64e(0xfafa68928792ef87), + C64e(0xefefd03f153fc515), C64e(0xb2b29426eb267feb), + C64e(0x8e8ece40c94007c9), C64e(0xfbfbe61d0b1ded0b), + C64e(0x41416e2fec2f82ec), C64e(0xb3b31aa967a97d67), + C64e(0x5f5f431cfd1cbefd), C64e(0x45456025ea258aea), + C64e(0x2323f9dabfda46bf), C64e(0x53535102f702a6f7), + C64e(0xe4e445a196a1d396), C64e(0x9b9b76ed5bed2d5b), + C64e(0x7575285dc25deac2), C64e(0xe1e1c5241c24d91c), + C64e(0x3d3dd4e9aee97aae), C64e(0x4c4cf2be6abe986a), + C64e(0x6c6c82ee5aeed85a), C64e(0x7e7ebdc341c3fc41), + C64e(0xf5f5f3060206f102), C64e(0x838352d14fd11d4f), + C64e(0x68688ce45ce4d05c), C64e(0x51515607f407a2f4), + C64e(0xd1d18d5c345cb934), C64e(0xf9f9e1180818e908), + C64e(0xe2e24cae93aedf93), C64e(0xabab3e9573954d73), + C64e(0x626297f553f5c453), C64e(0x2a2a6b413f41543f), + C64e(0x08081c140c14100c), C64e(0x959563f652f63152), + C64e(0x4646e9af65af8c65), C64e(0x9d9d7fe25ee2215e), + C64e(0x3030487828786028), C64e(0x3737cff8a1f86ea1), + C64e(0x0a0a1b110f11140f), C64e(0x2f2febc4b5c45eb5), + C64e(0x0e0e151b091b1c09), C64e(0x24247e5a365a4836), + C64e(0x1b1badb69bb6369b), C64e(0xdfdf98473d47a53d), + C64e(0xcdcda76a266a8126), C64e(0x4e4ef5bb69bb9c69), + C64e(0x7f7f334ccd4cfecd), C64e(0xeaea50ba9fbacf9f), + C64e(0x12123f2d1b2d241b), C64e(0x1d1da4b99eb93a9e), + C64e(0x5858c49c749cb074), C64e(0x343446722e72682e), + C64e(0x363641772d776c2d), C64e(0xdcdc11cdb2cda3b2), + C64e(0xb4b49d29ee2973ee), C64e(0x5b5b4d16fb16b6fb), + C64e(0xa4a4a501f60153f6), C64e(0x7676a1d74dd7ec4d), + C64e(0xb7b714a361a37561), C64e(0x7d7d3449ce49face), + C64e(0x5252df8d7b8da47b), C64e(0xdddd9f423e42a13e), + C64e(0x5e5ecd937193bc71), C64e(0x1313b1a297a22697), + C64e(0xa6a6a204f50457f5), C64e(0xb9b901b868b86968), + C64e(0x0000000000000000), C64e(0xc1c1b5742c74992c), + C64e(0x4040e0a060a08060), C64e(0xe3e3c2211f21dd1f), + C64e(0x79793a43c843f2c8), C64e(0xb6b69a2ced2c77ed), + C64e(0xd4d40dd9bed9b3be), C64e(0x8d8d47ca46ca0146), + C64e(0x67671770d970ced9), C64e(0x7272afdd4bdde44b), + C64e(0x9494ed79de7933de), C64e(0x9898ff67d4672bd4), + C64e(0xb0b09323e8237be8), C64e(0x85855bde4ade114a), + C64e(0xbbbb06bd6bbd6d6b), C64e(0xc5c5bb7e2a7e912a), + C64e(0x4f4f7b34e5349ee5), C64e(0xededd73a163ac116), + C64e(0x8686d254c55417c5), C64e(0x9a9af862d7622fd7), + C64e(0x666699ff55ffcc55), C64e(0x1111b6a794a72294), + C64e(0x8a8ac04acf4a0fcf), C64e(0xe9e9d9301030c910), + C64e(0x04040e0a060a0806), C64e(0xfefe66988198e781), + C64e(0xa0a0ab0bf00b5bf0), C64e(0x7878b4cc44ccf044), + C64e(0x2525f0d5bad54aba), C64e(0x4b4b753ee33e96e3), + C64e(0xa2a2ac0ef30e5ff3), C64e(0x5d5d4419fe19bafe), + C64e(0x8080db5bc05b1bc0), C64e(0x050580858a850a8a), + C64e(0x3f3fd3ecadec7ead), C64e(0x2121fedfbcdf42bc), + C64e(0x7070a8d848d8e048), C64e(0xf1f1fd0c040cf904), + C64e(0x6363197adf7ac6df), C64e(0x77772f58c158eec1), + C64e(0xafaf309f759f4575), C64e(0x4242e7a563a58463), + C64e(0x2020705030504030), C64e(0xe5e5cb2e1a2ed11a), + C64e(0xfdfdef120e12e10e), C64e(0xbfbf08b76db7656d), + C64e(0x818155d44cd4194c), C64e(0x1818243c143c3014), + C64e(0x2626795f355f4c35), C64e(0xc3c3b2712f719d2f), + C64e(0xbebe8638e13867e1), C64e(0x3535c8fda2fd6aa2), + C64e(0x8888c74fcc4f0bcc), C64e(0x2e2e654b394b5c39), + C64e(0x93936af957f93d57), C64e(0x5555580df20daaf2), + C64e(0xfcfc619d829de382), C64e(0x7a7ab3c947c9f447), + C64e(0xc8c827efacef8bac), C64e(0xbaba8832e7326fe7), + C64e(0x32324f7d2b7d642b), C64e(0xe6e642a495a4d795), + C64e(0xc0c03bfba0fb9ba0), C64e(0x1919aab398b33298), + C64e(0x9e9ef668d16827d1), C64e(0xa3a322817f815d7f), + C64e(0x4444eeaa66aa8866), C64e(0x5454d6827e82a87e), + C64e(0x3b3bdde6abe676ab), C64e(0x0b0b959e839e1683), + C64e(0x8c8cc945ca4503ca), C64e(0xc7c7bc7b297b9529), + C64e(0x6b6b056ed36ed6d3), C64e(0x28286c443c44503c), + C64e(0xa7a72c8b798b5579), C64e(0xbcbc813de23d63e2), + C64e(0x161631271d272c1d), C64e(0xadad379a769a4176), + C64e(0xdbdb964d3b4dad3b), C64e(0x64649efa56fac856), + C64e(0x7474a6d24ed2e84e), C64e(0x141436221e22281e), + C64e(0x9292e476db763fdb), C64e(0x0c0c121e0a1e180a), + C64e(0x4848fcb46cb4906c), C64e(0xb8b88f37e4376be4), + C64e(0x9f9f78e75de7255d), C64e(0xbdbd0fb26eb2616e), + C64e(0x4343692aef2a86ef), C64e(0xc4c435f1a6f193a6), + C64e(0x3939dae3a8e372a8), C64e(0x3131c6f7a4f762a4), + C64e(0xd3d38a593759bd37), C64e(0xf2f274868b86ff8b), + C64e(0xd5d583563256b132), C64e(0x8b8b4ec543c50d43), + C64e(0x6e6e85eb59ebdc59), C64e(0xdada18c2b7c2afb7), + C64e(0x01018e8f8c8f028c), C64e(0xb1b11dac64ac7964), + C64e(0x9c9cf16dd26d23d2), C64e(0x4949723be03b92e0), + C64e(0xd8d81fc7b4c7abb4), C64e(0xacacb915fa1543fa), + C64e(0xf3f3fa090709fd07), C64e(0xcfcfa06f256f8525), + C64e(0xcaca20eaafea8faf), C64e(0xf4f47d898e89f38e), + C64e(0x47476720e9208ee9), C64e(0x1010382818282018), + C64e(0x6f6f0b64d564ded5), C64e(0xf0f073838883fb88), + C64e(0x4a4afbb16fb1946f), C64e(0x5c5cca967296b872), + C64e(0x3838546c246c7024), C64e(0x57575f08f108aef1), + C64e(0x73732152c752e6c7), C64e(0x979764f351f33551), + C64e(0xcbcbae6523658d23), C64e(0xa1a125847c84597c), + C64e(0xe8e857bf9cbfcb9c), C64e(0x3e3e5d6321637c21), + C64e(0x9696ea7cdd7c37dd), C64e(0x61611e7fdc7fc2dc), + C64e(0x0d0d9c9186911a86), C64e(0x0f0f9b9485941e85), + C64e(0xe0e04bab90abdb90), C64e(0x7c7cbac642c6f842), + C64e(0x71712657c457e2c4), C64e(0xcccc29e5aae583aa), + C64e(0x9090e373d8733bd8), C64e(0x0606090f050f0c05), + C64e(0xf7f7f4030103f501), C64e(0x1c1c2a3612363812), + C64e(0xc2c23cfea3fe9fa3), C64e(0x6a6a8be15fe1d45f), + C64e(0xaeaebe10f91047f9), C64e(0x6969026bd06bd2d0), + C64e(0x1717bfa891a82e91), C64e(0x999971e858e82958), + C64e(0x3a3a536927697427), C64e(0x2727f7d0b9d04eb9), + C64e(0xd9d991483848a938), C64e(0xebebde351335cd13), + C64e(0x2b2be5ceb3ce56b3), C64e(0x2222775533554433), + C64e(0xd2d204d6bbd6bfbb), C64e(0xa9a9399070904970), + C64e(0x0707878089800e89), C64e(0x3333c1f2a7f266a7), + C64e(0x2d2decc1b6c15ab6), C64e(0x3c3c5a6622667822), + C64e(0x1515b8ad92ad2a92), C64e(0xc9c9a96020608920), + C64e(0x87875cdb49db1549), C64e(0xaaaab01aff1a4fff), + C64e(0x5050d8887888a078), C64e(0xa5a52b8e7a8e517a), + C64e(0x0303898a8f8a068f), C64e(0x59594a13f813b2f8), + C64e(0x0909929b809b1280), C64e(0x1a1a233917393417), + C64e(0x65651075da75cada), C64e(0xd7d784533153b531), + C64e(0x8484d551c65113c6), C64e(0xd0d003d3b8d3bbb8), + C64e(0x8282dc5ec35e1fc3), C64e(0x2929e2cbb0cb52b0), + C64e(0x5a5ac3997799b477), C64e(0x1e1e2d3311333c11), + C64e(0x7b7b3d46cb46f6cb), C64e(0xa8a8b71ffc1f4bfc), + C64e(0x6d6d0c61d661dad6), C64e(0x2c2c624e3a4e583a) +}; + +static const sph_u64 T2[] = { + C64e(0xa5c6c632f4a5f497), C64e(0x84f8f86f978497eb), + C64e(0x99eeee5eb099b0c7), C64e(0x8df6f67a8c8d8cf7), + C64e(0x0dffffe8170d17e5), C64e(0xbdd6d60adcbddcb7), + C64e(0xb1dede16c8b1c8a7), C64e(0x5491916dfc54fc39), + C64e(0x50606090f050f0c0), C64e(0x0302020705030504), + C64e(0xa9cece2ee0a9e087), C64e(0x7d5656d1877d87ac), + C64e(0x19e7e7cc2b192bd5), C64e(0x62b5b513a662a671), + C64e(0xe64d4d7c31e6319a), C64e(0x9aecec59b59ab5c3), + C64e(0x458f8f40cf45cf05), C64e(0x9d1f1fa3bc9dbc3e), + C64e(0x40898949c040c009), C64e(0x87fafa68928792ef), + C64e(0x15efefd03f153fc5), C64e(0xebb2b29426eb267f), + C64e(0xc98e8ece40c94007), C64e(0x0bfbfbe61d0b1ded), + C64e(0xec41416e2fec2f82), C64e(0x67b3b31aa967a97d), + C64e(0xfd5f5f431cfd1cbe), C64e(0xea45456025ea258a), + C64e(0xbf2323f9dabfda46), C64e(0xf753535102f702a6), + C64e(0x96e4e445a196a1d3), C64e(0x5b9b9b76ed5bed2d), + C64e(0xc27575285dc25dea), C64e(0x1ce1e1c5241c24d9), + C64e(0xae3d3dd4e9aee97a), C64e(0x6a4c4cf2be6abe98), + C64e(0x5a6c6c82ee5aeed8), C64e(0x417e7ebdc341c3fc), + C64e(0x02f5f5f3060206f1), C64e(0x4f838352d14fd11d), + C64e(0x5c68688ce45ce4d0), C64e(0xf451515607f407a2), + C64e(0x34d1d18d5c345cb9), C64e(0x08f9f9e1180818e9), + C64e(0x93e2e24cae93aedf), C64e(0x73abab3e9573954d), + C64e(0x53626297f553f5c4), C64e(0x3f2a2a6b413f4154), + C64e(0x0c08081c140c1410), C64e(0x52959563f652f631), + C64e(0x654646e9af65af8c), C64e(0x5e9d9d7fe25ee221), + C64e(0x2830304878287860), C64e(0xa13737cff8a1f86e), + C64e(0x0f0a0a1b110f1114), C64e(0xb52f2febc4b5c45e), + C64e(0x090e0e151b091b1c), C64e(0x3624247e5a365a48), + C64e(0x9b1b1badb69bb636), C64e(0x3ddfdf98473d47a5), + C64e(0x26cdcda76a266a81), C64e(0x694e4ef5bb69bb9c), + C64e(0xcd7f7f334ccd4cfe), C64e(0x9feaea50ba9fbacf), + C64e(0x1b12123f2d1b2d24), C64e(0x9e1d1da4b99eb93a), + C64e(0x745858c49c749cb0), C64e(0x2e343446722e7268), + C64e(0x2d363641772d776c), C64e(0xb2dcdc11cdb2cda3), + C64e(0xeeb4b49d29ee2973), C64e(0xfb5b5b4d16fb16b6), + C64e(0xf6a4a4a501f60153), C64e(0x4d7676a1d74dd7ec), + C64e(0x61b7b714a361a375), C64e(0xce7d7d3449ce49fa), + C64e(0x7b5252df8d7b8da4), C64e(0x3edddd9f423e42a1), + C64e(0x715e5ecd937193bc), C64e(0x971313b1a297a226), + C64e(0xf5a6a6a204f50457), C64e(0x68b9b901b868b869), + C64e(0x0000000000000000), C64e(0x2cc1c1b5742c7499), + C64e(0x604040e0a060a080), C64e(0x1fe3e3c2211f21dd), + C64e(0xc879793a43c843f2), C64e(0xedb6b69a2ced2c77), + C64e(0xbed4d40dd9bed9b3), C64e(0x468d8d47ca46ca01), + C64e(0xd967671770d970ce), C64e(0x4b7272afdd4bdde4), + C64e(0xde9494ed79de7933), C64e(0xd49898ff67d4672b), + C64e(0xe8b0b09323e8237b), C64e(0x4a85855bde4ade11), + C64e(0x6bbbbb06bd6bbd6d), C64e(0x2ac5c5bb7e2a7e91), + C64e(0xe54f4f7b34e5349e), C64e(0x16ededd73a163ac1), + C64e(0xc58686d254c55417), C64e(0xd79a9af862d7622f), + C64e(0x55666699ff55ffcc), C64e(0x941111b6a794a722), + C64e(0xcf8a8ac04acf4a0f), C64e(0x10e9e9d9301030c9), + C64e(0x0604040e0a060a08), C64e(0x81fefe66988198e7), + C64e(0xf0a0a0ab0bf00b5b), C64e(0x447878b4cc44ccf0), + C64e(0xba2525f0d5bad54a), C64e(0xe34b4b753ee33e96), + C64e(0xf3a2a2ac0ef30e5f), C64e(0xfe5d5d4419fe19ba), + C64e(0xc08080db5bc05b1b), C64e(0x8a050580858a850a), + C64e(0xad3f3fd3ecadec7e), C64e(0xbc2121fedfbcdf42), + C64e(0x487070a8d848d8e0), C64e(0x04f1f1fd0c040cf9), + C64e(0xdf6363197adf7ac6), C64e(0xc177772f58c158ee), + C64e(0x75afaf309f759f45), C64e(0x634242e7a563a584), + C64e(0x3020207050305040), C64e(0x1ae5e5cb2e1a2ed1), + C64e(0x0efdfdef120e12e1), C64e(0x6dbfbf08b76db765), + C64e(0x4c818155d44cd419), C64e(0x141818243c143c30), + C64e(0x352626795f355f4c), C64e(0x2fc3c3b2712f719d), + C64e(0xe1bebe8638e13867), C64e(0xa23535c8fda2fd6a), + C64e(0xcc8888c74fcc4f0b), C64e(0x392e2e654b394b5c), + C64e(0x5793936af957f93d), C64e(0xf25555580df20daa), + C64e(0x82fcfc619d829de3), C64e(0x477a7ab3c947c9f4), + C64e(0xacc8c827efacef8b), C64e(0xe7baba8832e7326f), + C64e(0x2b32324f7d2b7d64), C64e(0x95e6e642a495a4d7), + C64e(0xa0c0c03bfba0fb9b), C64e(0x981919aab398b332), + C64e(0xd19e9ef668d16827), C64e(0x7fa3a322817f815d), + C64e(0x664444eeaa66aa88), C64e(0x7e5454d6827e82a8), + C64e(0xab3b3bdde6abe676), C64e(0x830b0b959e839e16), + C64e(0xca8c8cc945ca4503), C64e(0x29c7c7bc7b297b95), + C64e(0xd36b6b056ed36ed6), C64e(0x3c28286c443c4450), + C64e(0x79a7a72c8b798b55), C64e(0xe2bcbc813de23d63), + C64e(0x1d161631271d272c), C64e(0x76adad379a769a41), + C64e(0x3bdbdb964d3b4dad), C64e(0x5664649efa56fac8), + C64e(0x4e7474a6d24ed2e8), C64e(0x1e141436221e2228), + C64e(0xdb9292e476db763f), C64e(0x0a0c0c121e0a1e18), + C64e(0x6c4848fcb46cb490), C64e(0xe4b8b88f37e4376b), + C64e(0x5d9f9f78e75de725), C64e(0x6ebdbd0fb26eb261), + C64e(0xef4343692aef2a86), C64e(0xa6c4c435f1a6f193), + C64e(0xa83939dae3a8e372), C64e(0xa43131c6f7a4f762), + C64e(0x37d3d38a593759bd), C64e(0x8bf2f274868b86ff), + C64e(0x32d5d583563256b1), C64e(0x438b8b4ec543c50d), + C64e(0x596e6e85eb59ebdc), C64e(0xb7dada18c2b7c2af), + C64e(0x8c01018e8f8c8f02), C64e(0x64b1b11dac64ac79), + C64e(0xd29c9cf16dd26d23), C64e(0xe04949723be03b92), + C64e(0xb4d8d81fc7b4c7ab), C64e(0xfaacacb915fa1543), + C64e(0x07f3f3fa090709fd), C64e(0x25cfcfa06f256f85), + C64e(0xafcaca20eaafea8f), C64e(0x8ef4f47d898e89f3), + C64e(0xe947476720e9208e), C64e(0x1810103828182820), + C64e(0xd56f6f0b64d564de), C64e(0x88f0f073838883fb), + C64e(0x6f4a4afbb16fb194), C64e(0x725c5cca967296b8), + C64e(0x243838546c246c70), C64e(0xf157575f08f108ae), + C64e(0xc773732152c752e6), C64e(0x51979764f351f335), + C64e(0x23cbcbae6523658d), C64e(0x7ca1a125847c8459), + C64e(0x9ce8e857bf9cbfcb), C64e(0x213e3e5d6321637c), + C64e(0xdd9696ea7cdd7c37), C64e(0xdc61611e7fdc7fc2), + C64e(0x860d0d9c9186911a), C64e(0x850f0f9b9485941e), + C64e(0x90e0e04bab90abdb), C64e(0x427c7cbac642c6f8), + C64e(0xc471712657c457e2), C64e(0xaacccc29e5aae583), + C64e(0xd89090e373d8733b), C64e(0x050606090f050f0c), + C64e(0x01f7f7f4030103f5), C64e(0x121c1c2a36123638), + C64e(0xa3c2c23cfea3fe9f), C64e(0x5f6a6a8be15fe1d4), + C64e(0xf9aeaebe10f91047), C64e(0xd06969026bd06bd2), + C64e(0x911717bfa891a82e), C64e(0x58999971e858e829), + C64e(0x273a3a5369276974), C64e(0xb92727f7d0b9d04e), + C64e(0x38d9d991483848a9), C64e(0x13ebebde351335cd), + C64e(0xb32b2be5ceb3ce56), C64e(0x3322227755335544), + C64e(0xbbd2d204d6bbd6bf), C64e(0x70a9a93990709049), + C64e(0x890707878089800e), C64e(0xa73333c1f2a7f266), + C64e(0xb62d2decc1b6c15a), C64e(0x223c3c5a66226678), + C64e(0x921515b8ad92ad2a), C64e(0x20c9c9a960206089), + C64e(0x4987875cdb49db15), C64e(0xffaaaab01aff1a4f), + C64e(0x785050d8887888a0), C64e(0x7aa5a52b8e7a8e51), + C64e(0x8f0303898a8f8a06), C64e(0xf859594a13f813b2), + C64e(0x800909929b809b12), C64e(0x171a1a2339173934), + C64e(0xda65651075da75ca), C64e(0x31d7d784533153b5), + C64e(0xc68484d551c65113), C64e(0xb8d0d003d3b8d3bb), + C64e(0xc38282dc5ec35e1f), C64e(0xb02929e2cbb0cb52), + C64e(0x775a5ac3997799b4), C64e(0x111e1e2d3311333c), + C64e(0xcb7b7b3d46cb46f6), C64e(0xfca8a8b71ffc1f4b), + C64e(0xd66d6d0c61d661da), C64e(0x3a2c2c624e3a4e58) +}; + +static const sph_u64 T3[] = { + C64e(0x97a5c6c632f4a5f4), C64e(0xeb84f8f86f978497), + C64e(0xc799eeee5eb099b0), C64e(0xf78df6f67a8c8d8c), + C64e(0xe50dffffe8170d17), C64e(0xb7bdd6d60adcbddc), + C64e(0xa7b1dede16c8b1c8), C64e(0x395491916dfc54fc), + C64e(0xc050606090f050f0), C64e(0x0403020207050305), + C64e(0x87a9cece2ee0a9e0), C64e(0xac7d5656d1877d87), + C64e(0xd519e7e7cc2b192b), C64e(0x7162b5b513a662a6), + C64e(0x9ae64d4d7c31e631), C64e(0xc39aecec59b59ab5), + C64e(0x05458f8f40cf45cf), C64e(0x3e9d1f1fa3bc9dbc), + C64e(0x0940898949c040c0), C64e(0xef87fafa68928792), + C64e(0xc515efefd03f153f), C64e(0x7febb2b29426eb26), + C64e(0x07c98e8ece40c940), C64e(0xed0bfbfbe61d0b1d), + C64e(0x82ec41416e2fec2f), C64e(0x7d67b3b31aa967a9), + C64e(0xbefd5f5f431cfd1c), C64e(0x8aea45456025ea25), + C64e(0x46bf2323f9dabfda), C64e(0xa6f753535102f702), + C64e(0xd396e4e445a196a1), C64e(0x2d5b9b9b76ed5bed), + C64e(0xeac27575285dc25d), C64e(0xd91ce1e1c5241c24), + C64e(0x7aae3d3dd4e9aee9), C64e(0x986a4c4cf2be6abe), + C64e(0xd85a6c6c82ee5aee), C64e(0xfc417e7ebdc341c3), + C64e(0xf102f5f5f3060206), C64e(0x1d4f838352d14fd1), + C64e(0xd05c68688ce45ce4), C64e(0xa2f451515607f407), + C64e(0xb934d1d18d5c345c), C64e(0xe908f9f9e1180818), + C64e(0xdf93e2e24cae93ae), C64e(0x4d73abab3e957395), + C64e(0xc453626297f553f5), C64e(0x543f2a2a6b413f41), + C64e(0x100c08081c140c14), C64e(0x3152959563f652f6), + C64e(0x8c654646e9af65af), C64e(0x215e9d9d7fe25ee2), + C64e(0x6028303048782878), C64e(0x6ea13737cff8a1f8), + C64e(0x140f0a0a1b110f11), C64e(0x5eb52f2febc4b5c4), + C64e(0x1c090e0e151b091b), C64e(0x483624247e5a365a), + C64e(0x369b1b1badb69bb6), C64e(0xa53ddfdf98473d47), + C64e(0x8126cdcda76a266a), C64e(0x9c694e4ef5bb69bb), + C64e(0xfecd7f7f334ccd4c), C64e(0xcf9feaea50ba9fba), + C64e(0x241b12123f2d1b2d), C64e(0x3a9e1d1da4b99eb9), + C64e(0xb0745858c49c749c), C64e(0x682e343446722e72), + C64e(0x6c2d363641772d77), C64e(0xa3b2dcdc11cdb2cd), + C64e(0x73eeb4b49d29ee29), C64e(0xb6fb5b5b4d16fb16), + C64e(0x53f6a4a4a501f601), C64e(0xec4d7676a1d74dd7), + C64e(0x7561b7b714a361a3), C64e(0xface7d7d3449ce49), + C64e(0xa47b5252df8d7b8d), C64e(0xa13edddd9f423e42), + C64e(0xbc715e5ecd937193), C64e(0x26971313b1a297a2), + C64e(0x57f5a6a6a204f504), C64e(0x6968b9b901b868b8), + C64e(0x0000000000000000), C64e(0x992cc1c1b5742c74), + C64e(0x80604040e0a060a0), C64e(0xdd1fe3e3c2211f21), + C64e(0xf2c879793a43c843), C64e(0x77edb6b69a2ced2c), + C64e(0xb3bed4d40dd9bed9), C64e(0x01468d8d47ca46ca), + C64e(0xced967671770d970), C64e(0xe44b7272afdd4bdd), + C64e(0x33de9494ed79de79), C64e(0x2bd49898ff67d467), + C64e(0x7be8b0b09323e823), C64e(0x114a85855bde4ade), + C64e(0x6d6bbbbb06bd6bbd), C64e(0x912ac5c5bb7e2a7e), + C64e(0x9ee54f4f7b34e534), C64e(0xc116ededd73a163a), + C64e(0x17c58686d254c554), C64e(0x2fd79a9af862d762), + C64e(0xcc55666699ff55ff), C64e(0x22941111b6a794a7), + C64e(0x0fcf8a8ac04acf4a), C64e(0xc910e9e9d9301030), + C64e(0x080604040e0a060a), C64e(0xe781fefe66988198), + C64e(0x5bf0a0a0ab0bf00b), C64e(0xf0447878b4cc44cc), + C64e(0x4aba2525f0d5bad5), C64e(0x96e34b4b753ee33e), + C64e(0x5ff3a2a2ac0ef30e), C64e(0xbafe5d5d4419fe19), + C64e(0x1bc08080db5bc05b), C64e(0x0a8a050580858a85), + C64e(0x7ead3f3fd3ecadec), C64e(0x42bc2121fedfbcdf), + C64e(0xe0487070a8d848d8), C64e(0xf904f1f1fd0c040c), + C64e(0xc6df6363197adf7a), C64e(0xeec177772f58c158), + C64e(0x4575afaf309f759f), C64e(0x84634242e7a563a5), + C64e(0x4030202070503050), C64e(0xd11ae5e5cb2e1a2e), + C64e(0xe10efdfdef120e12), C64e(0x656dbfbf08b76db7), + C64e(0x194c818155d44cd4), C64e(0x30141818243c143c), + C64e(0x4c352626795f355f), C64e(0x9d2fc3c3b2712f71), + C64e(0x67e1bebe8638e138), C64e(0x6aa23535c8fda2fd), + C64e(0x0bcc8888c74fcc4f), C64e(0x5c392e2e654b394b), + C64e(0x3d5793936af957f9), C64e(0xaaf25555580df20d), + C64e(0xe382fcfc619d829d), C64e(0xf4477a7ab3c947c9), + C64e(0x8bacc8c827efacef), C64e(0x6fe7baba8832e732), + C64e(0x642b32324f7d2b7d), C64e(0xd795e6e642a495a4), + C64e(0x9ba0c0c03bfba0fb), C64e(0x32981919aab398b3), + C64e(0x27d19e9ef668d168), C64e(0x5d7fa3a322817f81), + C64e(0x88664444eeaa66aa), C64e(0xa87e5454d6827e82), + C64e(0x76ab3b3bdde6abe6), C64e(0x16830b0b959e839e), + C64e(0x03ca8c8cc945ca45), C64e(0x9529c7c7bc7b297b), + C64e(0xd6d36b6b056ed36e), C64e(0x503c28286c443c44), + C64e(0x5579a7a72c8b798b), C64e(0x63e2bcbc813de23d), + C64e(0x2c1d161631271d27), C64e(0x4176adad379a769a), + C64e(0xad3bdbdb964d3b4d), C64e(0xc85664649efa56fa), + C64e(0xe84e7474a6d24ed2), C64e(0x281e141436221e22), + C64e(0x3fdb9292e476db76), C64e(0x180a0c0c121e0a1e), + C64e(0x906c4848fcb46cb4), C64e(0x6be4b8b88f37e437), + C64e(0x255d9f9f78e75de7), C64e(0x616ebdbd0fb26eb2), + C64e(0x86ef4343692aef2a), C64e(0x93a6c4c435f1a6f1), + C64e(0x72a83939dae3a8e3), C64e(0x62a43131c6f7a4f7), + C64e(0xbd37d3d38a593759), C64e(0xff8bf2f274868b86), + C64e(0xb132d5d583563256), C64e(0x0d438b8b4ec543c5), + C64e(0xdc596e6e85eb59eb), C64e(0xafb7dada18c2b7c2), + C64e(0x028c01018e8f8c8f), C64e(0x7964b1b11dac64ac), + C64e(0x23d29c9cf16dd26d), C64e(0x92e04949723be03b), + C64e(0xabb4d8d81fc7b4c7), C64e(0x43faacacb915fa15), + C64e(0xfd07f3f3fa090709), C64e(0x8525cfcfa06f256f), + C64e(0x8fafcaca20eaafea), C64e(0xf38ef4f47d898e89), + C64e(0x8ee947476720e920), C64e(0x2018101038281828), + C64e(0xded56f6f0b64d564), C64e(0xfb88f0f073838883), + C64e(0x946f4a4afbb16fb1), C64e(0xb8725c5cca967296), + C64e(0x70243838546c246c), C64e(0xaef157575f08f108), + C64e(0xe6c773732152c752), C64e(0x3551979764f351f3), + C64e(0x8d23cbcbae652365), C64e(0x597ca1a125847c84), + C64e(0xcb9ce8e857bf9cbf), C64e(0x7c213e3e5d632163), + C64e(0x37dd9696ea7cdd7c), C64e(0xc2dc61611e7fdc7f), + C64e(0x1a860d0d9c918691), C64e(0x1e850f0f9b948594), + C64e(0xdb90e0e04bab90ab), C64e(0xf8427c7cbac642c6), + C64e(0xe2c471712657c457), C64e(0x83aacccc29e5aae5), + C64e(0x3bd89090e373d873), C64e(0x0c050606090f050f), + C64e(0xf501f7f7f4030103), C64e(0x38121c1c2a361236), + C64e(0x9fa3c2c23cfea3fe), C64e(0xd45f6a6a8be15fe1), + C64e(0x47f9aeaebe10f910), C64e(0xd2d06969026bd06b), + C64e(0x2e911717bfa891a8), C64e(0x2958999971e858e8), + C64e(0x74273a3a53692769), C64e(0x4eb92727f7d0b9d0), + C64e(0xa938d9d991483848), C64e(0xcd13ebebde351335), + C64e(0x56b32b2be5ceb3ce), C64e(0x4433222277553355), + C64e(0xbfbbd2d204d6bbd6), C64e(0x4970a9a939907090), + C64e(0x0e89070787808980), C64e(0x66a73333c1f2a7f2), + C64e(0x5ab62d2decc1b6c1), C64e(0x78223c3c5a662266), + C64e(0x2a921515b8ad92ad), C64e(0x8920c9c9a9602060), + C64e(0x154987875cdb49db), C64e(0x4fffaaaab01aff1a), + C64e(0xa0785050d8887888), C64e(0x517aa5a52b8e7a8e), + C64e(0x068f0303898a8f8a), C64e(0xb2f859594a13f813), + C64e(0x12800909929b809b), C64e(0x34171a1a23391739), + C64e(0xcada65651075da75), C64e(0xb531d7d784533153), + C64e(0x13c68484d551c651), C64e(0xbbb8d0d003d3b8d3), + C64e(0x1fc38282dc5ec35e), C64e(0x52b02929e2cbb0cb), + C64e(0xb4775a5ac3997799), C64e(0x3c111e1e2d331133), + C64e(0xf6cb7b7b3d46cb46), C64e(0x4bfca8a8b71ffc1f), + C64e(0xdad66d6d0c61d661), C64e(0x583a2c2c624e3a4e) +}; + +#endif + +static const sph_u64 T4[] = { + C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784), + C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d), + C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd), + C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54), + C64e(0xf0c050606090f050), C64e(0x0504030202070503), + C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d), + C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662), + C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a), + C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d), + C64e(0xc00940898949c040), C64e(0x92ef87fafa689287), + C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb), + C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b), + C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967), + C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea), + C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7), + C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b), + C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c), + C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a), + C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341), + C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f), + C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4), + C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808), + C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573), + C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f), + C64e(0x14100c08081c140c), C64e(0xf63152959563f652), + C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e), + C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1), + C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5), + C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36), + C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d), + C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69), + C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f), + C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e), + C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e), + C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2), + C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb), + C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d), + C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce), + C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e), + C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297), + C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868), + C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c), + C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f), + C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced), + C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46), + C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b), + C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4), + C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a), + C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a), + C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16), + C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7), + C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794), + C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010), + C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881), + C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44), + C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3), + C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe), + C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a), + C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc), + C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04), + C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1), + C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563), + C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a), + C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d), + C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14), + C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f), + C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2), + C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39), + C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2), + C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947), + C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7), + C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495), + C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398), + C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f), + C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e), + C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83), + C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29), + C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c), + C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2), + C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76), + C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56), + C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e), + C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a), + C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4), + C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e), + C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6), + C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4), + C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b), + C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543), + C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7), + C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64), + C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0), + C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa), + C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25), + C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e), + C64e(0x208ee947476720e9), C64e(0x2820181010382818), + C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388), + C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672), + C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1), + C64e(0x52e6c773732152c7), C64e(0xf33551979764f351), + C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c), + C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321), + C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc), + C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485), + C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642), + C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa), + C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05), + C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612), + C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f), + C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0), + C64e(0xa82e911717bfa891), C64e(0xe82958999971e858), + C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9), + C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513), + C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533), + C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070), + C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7), + C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622), + C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020), + C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff), + C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a), + C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8), + C64e(0x9b12800909929b80), C64e(0x3934171a1a233917), + C64e(0x75cada65651075da), C64e(0x53b531d7d7845331), + C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8), + C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0), + C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311), + C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc), + C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a) +}; + +#if !SPH_SMALL_FOOTPRINT_GROESTL + +static const sph_u64 T5[] = { + C64e(0xa5f497a5c6c632f4), C64e(0x8497eb84f8f86f97), + C64e(0x99b0c799eeee5eb0), C64e(0x8d8cf78df6f67a8c), + C64e(0x0d17e50dffffe817), C64e(0xbddcb7bdd6d60adc), + C64e(0xb1c8a7b1dede16c8), C64e(0x54fc395491916dfc), + C64e(0x50f0c050606090f0), C64e(0x0305040302020705), + C64e(0xa9e087a9cece2ee0), C64e(0x7d87ac7d5656d187), + C64e(0x192bd519e7e7cc2b), C64e(0x62a67162b5b513a6), + C64e(0xe6319ae64d4d7c31), C64e(0x9ab5c39aecec59b5), + C64e(0x45cf05458f8f40cf), C64e(0x9dbc3e9d1f1fa3bc), + C64e(0x40c00940898949c0), C64e(0x8792ef87fafa6892), + C64e(0x153fc515efefd03f), C64e(0xeb267febb2b29426), + C64e(0xc94007c98e8ece40), C64e(0x0b1ded0bfbfbe61d), + C64e(0xec2f82ec41416e2f), C64e(0x67a97d67b3b31aa9), + C64e(0xfd1cbefd5f5f431c), C64e(0xea258aea45456025), + C64e(0xbfda46bf2323f9da), C64e(0xf702a6f753535102), + C64e(0x96a1d396e4e445a1), C64e(0x5bed2d5b9b9b76ed), + C64e(0xc25deac27575285d), C64e(0x1c24d91ce1e1c524), + C64e(0xaee97aae3d3dd4e9), C64e(0x6abe986a4c4cf2be), + C64e(0x5aeed85a6c6c82ee), C64e(0x41c3fc417e7ebdc3), + C64e(0x0206f102f5f5f306), C64e(0x4fd11d4f838352d1), + C64e(0x5ce4d05c68688ce4), C64e(0xf407a2f451515607), + C64e(0x345cb934d1d18d5c), C64e(0x0818e908f9f9e118), + C64e(0x93aedf93e2e24cae), C64e(0x73954d73abab3e95), + C64e(0x53f5c453626297f5), C64e(0x3f41543f2a2a6b41), + C64e(0x0c14100c08081c14), C64e(0x52f63152959563f6), + C64e(0x65af8c654646e9af), C64e(0x5ee2215e9d9d7fe2), + C64e(0x2878602830304878), C64e(0xa1f86ea13737cff8), + C64e(0x0f11140f0a0a1b11), C64e(0xb5c45eb52f2febc4), + C64e(0x091b1c090e0e151b), C64e(0x365a483624247e5a), + C64e(0x9bb6369b1b1badb6), C64e(0x3d47a53ddfdf9847), + C64e(0x266a8126cdcda76a), C64e(0x69bb9c694e4ef5bb), + C64e(0xcd4cfecd7f7f334c), C64e(0x9fbacf9feaea50ba), + C64e(0x1b2d241b12123f2d), C64e(0x9eb93a9e1d1da4b9), + C64e(0x749cb0745858c49c), C64e(0x2e72682e34344672), + C64e(0x2d776c2d36364177), C64e(0xb2cda3b2dcdc11cd), + C64e(0xee2973eeb4b49d29), C64e(0xfb16b6fb5b5b4d16), + C64e(0xf60153f6a4a4a501), C64e(0x4dd7ec4d7676a1d7), + C64e(0x61a37561b7b714a3), C64e(0xce49face7d7d3449), + C64e(0x7b8da47b5252df8d), C64e(0x3e42a13edddd9f42), + C64e(0x7193bc715e5ecd93), C64e(0x97a226971313b1a2), + C64e(0xf50457f5a6a6a204), C64e(0x68b86968b9b901b8), + C64e(0x0000000000000000), C64e(0x2c74992cc1c1b574), + C64e(0x60a080604040e0a0), C64e(0x1f21dd1fe3e3c221), + C64e(0xc843f2c879793a43), C64e(0xed2c77edb6b69a2c), + C64e(0xbed9b3bed4d40dd9), C64e(0x46ca01468d8d47ca), + C64e(0xd970ced967671770), C64e(0x4bdde44b7272afdd), + C64e(0xde7933de9494ed79), C64e(0xd4672bd49898ff67), + C64e(0xe8237be8b0b09323), C64e(0x4ade114a85855bde), + C64e(0x6bbd6d6bbbbb06bd), C64e(0x2a7e912ac5c5bb7e), + C64e(0xe5349ee54f4f7b34), C64e(0x163ac116ededd73a), + C64e(0xc55417c58686d254), C64e(0xd7622fd79a9af862), + C64e(0x55ffcc55666699ff), C64e(0x94a722941111b6a7), + C64e(0xcf4a0fcf8a8ac04a), C64e(0x1030c910e9e9d930), + C64e(0x060a080604040e0a), C64e(0x8198e781fefe6698), + C64e(0xf00b5bf0a0a0ab0b), C64e(0x44ccf0447878b4cc), + C64e(0xbad54aba2525f0d5), C64e(0xe33e96e34b4b753e), + C64e(0xf30e5ff3a2a2ac0e), C64e(0xfe19bafe5d5d4419), + C64e(0xc05b1bc08080db5b), C64e(0x8a850a8a05058085), + C64e(0xadec7ead3f3fd3ec), C64e(0xbcdf42bc2121fedf), + C64e(0x48d8e0487070a8d8), C64e(0x040cf904f1f1fd0c), + C64e(0xdf7ac6df6363197a), C64e(0xc158eec177772f58), + C64e(0x759f4575afaf309f), C64e(0x63a584634242e7a5), + C64e(0x3050403020207050), C64e(0x1a2ed11ae5e5cb2e), + C64e(0x0e12e10efdfdef12), C64e(0x6db7656dbfbf08b7), + C64e(0x4cd4194c818155d4), C64e(0x143c30141818243c), + C64e(0x355f4c352626795f), C64e(0x2f719d2fc3c3b271), + C64e(0xe13867e1bebe8638), C64e(0xa2fd6aa23535c8fd), + C64e(0xcc4f0bcc8888c74f), C64e(0x394b5c392e2e654b), + C64e(0x57f93d5793936af9), C64e(0xf20daaf25555580d), + C64e(0x829de382fcfc619d), C64e(0x47c9f4477a7ab3c9), + C64e(0xacef8bacc8c827ef), C64e(0xe7326fe7baba8832), + C64e(0x2b7d642b32324f7d), C64e(0x95a4d795e6e642a4), + C64e(0xa0fb9ba0c0c03bfb), C64e(0x98b332981919aab3), + C64e(0xd16827d19e9ef668), C64e(0x7f815d7fa3a32281), + C64e(0x66aa88664444eeaa), C64e(0x7e82a87e5454d682), + C64e(0xabe676ab3b3bdde6), C64e(0x839e16830b0b959e), + C64e(0xca4503ca8c8cc945), C64e(0x297b9529c7c7bc7b), + C64e(0xd36ed6d36b6b056e), C64e(0x3c44503c28286c44), + C64e(0x798b5579a7a72c8b), C64e(0xe23d63e2bcbc813d), + C64e(0x1d272c1d16163127), C64e(0x769a4176adad379a), + C64e(0x3b4dad3bdbdb964d), C64e(0x56fac85664649efa), + C64e(0x4ed2e84e7474a6d2), C64e(0x1e22281e14143622), + C64e(0xdb763fdb9292e476), C64e(0x0a1e180a0c0c121e), + C64e(0x6cb4906c4848fcb4), C64e(0xe4376be4b8b88f37), + C64e(0x5de7255d9f9f78e7), C64e(0x6eb2616ebdbd0fb2), + C64e(0xef2a86ef4343692a), C64e(0xa6f193a6c4c435f1), + C64e(0xa8e372a83939dae3), C64e(0xa4f762a43131c6f7), + C64e(0x3759bd37d3d38a59), C64e(0x8b86ff8bf2f27486), + C64e(0x3256b132d5d58356), C64e(0x43c50d438b8b4ec5), + C64e(0x59ebdc596e6e85eb), C64e(0xb7c2afb7dada18c2), + C64e(0x8c8f028c01018e8f), C64e(0x64ac7964b1b11dac), + C64e(0xd26d23d29c9cf16d), C64e(0xe03b92e04949723b), + C64e(0xb4c7abb4d8d81fc7), C64e(0xfa1543faacacb915), + C64e(0x0709fd07f3f3fa09), C64e(0x256f8525cfcfa06f), + C64e(0xafea8fafcaca20ea), C64e(0x8e89f38ef4f47d89), + C64e(0xe9208ee947476720), C64e(0x1828201810103828), + C64e(0xd564ded56f6f0b64), C64e(0x8883fb88f0f07383), + C64e(0x6fb1946f4a4afbb1), C64e(0x7296b8725c5cca96), + C64e(0x246c70243838546c), C64e(0xf108aef157575f08), + C64e(0xc752e6c773732152), C64e(0x51f33551979764f3), + C64e(0x23658d23cbcbae65), C64e(0x7c84597ca1a12584), + C64e(0x9cbfcb9ce8e857bf), C64e(0x21637c213e3e5d63), + C64e(0xdd7c37dd9696ea7c), C64e(0xdc7fc2dc61611e7f), + C64e(0x86911a860d0d9c91), C64e(0x85941e850f0f9b94), + C64e(0x90abdb90e0e04bab), C64e(0x42c6f8427c7cbac6), + C64e(0xc457e2c471712657), C64e(0xaae583aacccc29e5), + C64e(0xd8733bd89090e373), C64e(0x050f0c050606090f), + C64e(0x0103f501f7f7f403), C64e(0x123638121c1c2a36), + C64e(0xa3fe9fa3c2c23cfe), C64e(0x5fe1d45f6a6a8be1), + C64e(0xf91047f9aeaebe10), C64e(0xd06bd2d06969026b), + C64e(0x91a82e911717bfa8), C64e(0x58e82958999971e8), + C64e(0x276974273a3a5369), C64e(0xb9d04eb92727f7d0), + C64e(0x3848a938d9d99148), C64e(0x1335cd13ebebde35), + C64e(0xb3ce56b32b2be5ce), C64e(0x3355443322227755), + C64e(0xbbd6bfbbd2d204d6), C64e(0x70904970a9a93990), + C64e(0x89800e8907078780), C64e(0xa7f266a73333c1f2), + C64e(0xb6c15ab62d2decc1), C64e(0x226678223c3c5a66), + C64e(0x92ad2a921515b8ad), C64e(0x20608920c9c9a960), + C64e(0x49db154987875cdb), C64e(0xff1a4fffaaaab01a), + C64e(0x7888a0785050d888), C64e(0x7a8e517aa5a52b8e), + C64e(0x8f8a068f0303898a), C64e(0xf813b2f859594a13), + C64e(0x809b12800909929b), C64e(0x173934171a1a2339), + C64e(0xda75cada65651075), C64e(0x3153b531d7d78453), + C64e(0xc65113c68484d551), C64e(0xb8d3bbb8d0d003d3), + C64e(0xc35e1fc38282dc5e), C64e(0xb0cb52b02929e2cb), + C64e(0x7799b4775a5ac399), C64e(0x11333c111e1e2d33), + C64e(0xcb46f6cb7b7b3d46), C64e(0xfc1f4bfca8a8b71f), + C64e(0xd661dad66d6d0c61), C64e(0x3a4e583a2c2c624e) +}; + +static const sph_u64 T6[] = { + C64e(0xf4a5f497a5c6c632), C64e(0x978497eb84f8f86f), + C64e(0xb099b0c799eeee5e), C64e(0x8c8d8cf78df6f67a), + C64e(0x170d17e50dffffe8), C64e(0xdcbddcb7bdd6d60a), + C64e(0xc8b1c8a7b1dede16), C64e(0xfc54fc395491916d), + C64e(0xf050f0c050606090), C64e(0x0503050403020207), + C64e(0xe0a9e087a9cece2e), C64e(0x877d87ac7d5656d1), + C64e(0x2b192bd519e7e7cc), C64e(0xa662a67162b5b513), + C64e(0x31e6319ae64d4d7c), C64e(0xb59ab5c39aecec59), + C64e(0xcf45cf05458f8f40), C64e(0xbc9dbc3e9d1f1fa3), + C64e(0xc040c00940898949), C64e(0x928792ef87fafa68), + C64e(0x3f153fc515efefd0), C64e(0x26eb267febb2b294), + C64e(0x40c94007c98e8ece), C64e(0x1d0b1ded0bfbfbe6), + C64e(0x2fec2f82ec41416e), C64e(0xa967a97d67b3b31a), + C64e(0x1cfd1cbefd5f5f43), C64e(0x25ea258aea454560), + C64e(0xdabfda46bf2323f9), C64e(0x02f702a6f7535351), + C64e(0xa196a1d396e4e445), C64e(0xed5bed2d5b9b9b76), + C64e(0x5dc25deac2757528), C64e(0x241c24d91ce1e1c5), + C64e(0xe9aee97aae3d3dd4), C64e(0xbe6abe986a4c4cf2), + C64e(0xee5aeed85a6c6c82), C64e(0xc341c3fc417e7ebd), + C64e(0x060206f102f5f5f3), C64e(0xd14fd11d4f838352), + C64e(0xe45ce4d05c68688c), C64e(0x07f407a2f4515156), + C64e(0x5c345cb934d1d18d), C64e(0x180818e908f9f9e1), + C64e(0xae93aedf93e2e24c), C64e(0x9573954d73abab3e), + C64e(0xf553f5c453626297), C64e(0x413f41543f2a2a6b), + C64e(0x140c14100c08081c), C64e(0xf652f63152959563), + C64e(0xaf65af8c654646e9), C64e(0xe25ee2215e9d9d7f), + C64e(0x7828786028303048), C64e(0xf8a1f86ea13737cf), + C64e(0x110f11140f0a0a1b), C64e(0xc4b5c45eb52f2feb), + C64e(0x1b091b1c090e0e15), C64e(0x5a365a483624247e), + C64e(0xb69bb6369b1b1bad), C64e(0x473d47a53ddfdf98), + C64e(0x6a266a8126cdcda7), C64e(0xbb69bb9c694e4ef5), + C64e(0x4ccd4cfecd7f7f33), C64e(0xba9fbacf9feaea50), + C64e(0x2d1b2d241b12123f), C64e(0xb99eb93a9e1d1da4), + C64e(0x9c749cb0745858c4), C64e(0x722e72682e343446), + C64e(0x772d776c2d363641), C64e(0xcdb2cda3b2dcdc11), + C64e(0x29ee2973eeb4b49d), C64e(0x16fb16b6fb5b5b4d), + C64e(0x01f60153f6a4a4a5), C64e(0xd74dd7ec4d7676a1), + C64e(0xa361a37561b7b714), C64e(0x49ce49face7d7d34), + C64e(0x8d7b8da47b5252df), C64e(0x423e42a13edddd9f), + C64e(0x937193bc715e5ecd), C64e(0xa297a226971313b1), + C64e(0x04f50457f5a6a6a2), C64e(0xb868b86968b9b901), + C64e(0x0000000000000000), C64e(0x742c74992cc1c1b5), + C64e(0xa060a080604040e0), C64e(0x211f21dd1fe3e3c2), + C64e(0x43c843f2c879793a), C64e(0x2ced2c77edb6b69a), + C64e(0xd9bed9b3bed4d40d), C64e(0xca46ca01468d8d47), + C64e(0x70d970ced9676717), C64e(0xdd4bdde44b7272af), + C64e(0x79de7933de9494ed), C64e(0x67d4672bd49898ff), + C64e(0x23e8237be8b0b093), C64e(0xde4ade114a85855b), + C64e(0xbd6bbd6d6bbbbb06), C64e(0x7e2a7e912ac5c5bb), + C64e(0x34e5349ee54f4f7b), C64e(0x3a163ac116ededd7), + C64e(0x54c55417c58686d2), C64e(0x62d7622fd79a9af8), + C64e(0xff55ffcc55666699), C64e(0xa794a722941111b6), + C64e(0x4acf4a0fcf8a8ac0), C64e(0x301030c910e9e9d9), + C64e(0x0a060a080604040e), C64e(0x988198e781fefe66), + C64e(0x0bf00b5bf0a0a0ab), C64e(0xcc44ccf0447878b4), + C64e(0xd5bad54aba2525f0), C64e(0x3ee33e96e34b4b75), + C64e(0x0ef30e5ff3a2a2ac), C64e(0x19fe19bafe5d5d44), + C64e(0x5bc05b1bc08080db), C64e(0x858a850a8a050580), + C64e(0xecadec7ead3f3fd3), C64e(0xdfbcdf42bc2121fe), + C64e(0xd848d8e0487070a8), C64e(0x0c040cf904f1f1fd), + C64e(0x7adf7ac6df636319), C64e(0x58c158eec177772f), + C64e(0x9f759f4575afaf30), C64e(0xa563a584634242e7), + C64e(0x5030504030202070), C64e(0x2e1a2ed11ae5e5cb), + C64e(0x120e12e10efdfdef), C64e(0xb76db7656dbfbf08), + C64e(0xd44cd4194c818155), C64e(0x3c143c3014181824), + C64e(0x5f355f4c35262679), C64e(0x712f719d2fc3c3b2), + C64e(0x38e13867e1bebe86), C64e(0xfda2fd6aa23535c8), + C64e(0x4fcc4f0bcc8888c7), C64e(0x4b394b5c392e2e65), + C64e(0xf957f93d5793936a), C64e(0x0df20daaf2555558), + C64e(0x9d829de382fcfc61), C64e(0xc947c9f4477a7ab3), + C64e(0xefacef8bacc8c827), C64e(0x32e7326fe7baba88), + C64e(0x7d2b7d642b32324f), C64e(0xa495a4d795e6e642), + C64e(0xfba0fb9ba0c0c03b), C64e(0xb398b332981919aa), + C64e(0x68d16827d19e9ef6), C64e(0x817f815d7fa3a322), + C64e(0xaa66aa88664444ee), C64e(0x827e82a87e5454d6), + C64e(0xe6abe676ab3b3bdd), C64e(0x9e839e16830b0b95), + C64e(0x45ca4503ca8c8cc9), C64e(0x7b297b9529c7c7bc), + C64e(0x6ed36ed6d36b6b05), C64e(0x443c44503c28286c), + C64e(0x8b798b5579a7a72c), C64e(0x3de23d63e2bcbc81), + C64e(0x271d272c1d161631), C64e(0x9a769a4176adad37), + C64e(0x4d3b4dad3bdbdb96), C64e(0xfa56fac85664649e), + C64e(0xd24ed2e84e7474a6), C64e(0x221e22281e141436), + C64e(0x76db763fdb9292e4), C64e(0x1e0a1e180a0c0c12), + C64e(0xb46cb4906c4848fc), C64e(0x37e4376be4b8b88f), + C64e(0xe75de7255d9f9f78), C64e(0xb26eb2616ebdbd0f), + C64e(0x2aef2a86ef434369), C64e(0xf1a6f193a6c4c435), + C64e(0xe3a8e372a83939da), C64e(0xf7a4f762a43131c6), + C64e(0x593759bd37d3d38a), C64e(0x868b86ff8bf2f274), + C64e(0x563256b132d5d583), C64e(0xc543c50d438b8b4e), + C64e(0xeb59ebdc596e6e85), C64e(0xc2b7c2afb7dada18), + C64e(0x8f8c8f028c01018e), C64e(0xac64ac7964b1b11d), + C64e(0x6dd26d23d29c9cf1), C64e(0x3be03b92e0494972), + C64e(0xc7b4c7abb4d8d81f), C64e(0x15fa1543faacacb9), + C64e(0x090709fd07f3f3fa), C64e(0x6f256f8525cfcfa0), + C64e(0xeaafea8fafcaca20), C64e(0x898e89f38ef4f47d), + C64e(0x20e9208ee9474767), C64e(0x2818282018101038), + C64e(0x64d564ded56f6f0b), C64e(0x838883fb88f0f073), + C64e(0xb16fb1946f4a4afb), C64e(0x967296b8725c5cca), + C64e(0x6c246c7024383854), C64e(0x08f108aef157575f), + C64e(0x52c752e6c7737321), C64e(0xf351f33551979764), + C64e(0x6523658d23cbcbae), C64e(0x847c84597ca1a125), + C64e(0xbf9cbfcb9ce8e857), C64e(0x6321637c213e3e5d), + C64e(0x7cdd7c37dd9696ea), C64e(0x7fdc7fc2dc61611e), + C64e(0x9186911a860d0d9c), C64e(0x9485941e850f0f9b), + C64e(0xab90abdb90e0e04b), C64e(0xc642c6f8427c7cba), + C64e(0x57c457e2c4717126), C64e(0xe5aae583aacccc29), + C64e(0x73d8733bd89090e3), C64e(0x0f050f0c05060609), + C64e(0x030103f501f7f7f4), C64e(0x36123638121c1c2a), + C64e(0xfea3fe9fa3c2c23c), C64e(0xe15fe1d45f6a6a8b), + C64e(0x10f91047f9aeaebe), C64e(0x6bd06bd2d0696902), + C64e(0xa891a82e911717bf), C64e(0xe858e82958999971), + C64e(0x69276974273a3a53), C64e(0xd0b9d04eb92727f7), + C64e(0x483848a938d9d991), C64e(0x351335cd13ebebde), + C64e(0xceb3ce56b32b2be5), C64e(0x5533554433222277), + C64e(0xd6bbd6bfbbd2d204), C64e(0x9070904970a9a939), + C64e(0x8089800e89070787), C64e(0xf2a7f266a73333c1), + C64e(0xc1b6c15ab62d2dec), C64e(0x66226678223c3c5a), + C64e(0xad92ad2a921515b8), C64e(0x6020608920c9c9a9), + C64e(0xdb49db154987875c), C64e(0x1aff1a4fffaaaab0), + C64e(0x887888a0785050d8), C64e(0x8e7a8e517aa5a52b), + C64e(0x8a8f8a068f030389), C64e(0x13f813b2f859594a), + C64e(0x9b809b1280090992), C64e(0x39173934171a1a23), + C64e(0x75da75cada656510), C64e(0x533153b531d7d784), + C64e(0x51c65113c68484d5), C64e(0xd3b8d3bbb8d0d003), + C64e(0x5ec35e1fc38282dc), C64e(0xcbb0cb52b02929e2), + C64e(0x997799b4775a5ac3), C64e(0x3311333c111e1e2d), + C64e(0x46cb46f6cb7b7b3d), C64e(0x1ffc1f4bfca8a8b7), + C64e(0x61d661dad66d6d0c), C64e(0x4e3a4e583a2c2c62) +}; + +static const sph_u64 T7[] = { + C64e(0x32f4a5f497a5c6c6), C64e(0x6f978497eb84f8f8), + C64e(0x5eb099b0c799eeee), C64e(0x7a8c8d8cf78df6f6), + C64e(0xe8170d17e50dffff), C64e(0x0adcbddcb7bdd6d6), + C64e(0x16c8b1c8a7b1dede), C64e(0x6dfc54fc39549191), + C64e(0x90f050f0c0506060), C64e(0x0705030504030202), + C64e(0x2ee0a9e087a9cece), C64e(0xd1877d87ac7d5656), + C64e(0xcc2b192bd519e7e7), C64e(0x13a662a67162b5b5), + C64e(0x7c31e6319ae64d4d), C64e(0x59b59ab5c39aecec), + C64e(0x40cf45cf05458f8f), C64e(0xa3bc9dbc3e9d1f1f), + C64e(0x49c040c009408989), C64e(0x68928792ef87fafa), + C64e(0xd03f153fc515efef), C64e(0x9426eb267febb2b2), + C64e(0xce40c94007c98e8e), C64e(0xe61d0b1ded0bfbfb), + C64e(0x6e2fec2f82ec4141), C64e(0x1aa967a97d67b3b3), + C64e(0x431cfd1cbefd5f5f), C64e(0x6025ea258aea4545), + C64e(0xf9dabfda46bf2323), C64e(0x5102f702a6f75353), + C64e(0x45a196a1d396e4e4), C64e(0x76ed5bed2d5b9b9b), + C64e(0x285dc25deac27575), C64e(0xc5241c24d91ce1e1), + C64e(0xd4e9aee97aae3d3d), C64e(0xf2be6abe986a4c4c), + C64e(0x82ee5aeed85a6c6c), C64e(0xbdc341c3fc417e7e), + C64e(0xf3060206f102f5f5), C64e(0x52d14fd11d4f8383), + C64e(0x8ce45ce4d05c6868), C64e(0x5607f407a2f45151), + C64e(0x8d5c345cb934d1d1), C64e(0xe1180818e908f9f9), + C64e(0x4cae93aedf93e2e2), C64e(0x3e9573954d73abab), + C64e(0x97f553f5c4536262), C64e(0x6b413f41543f2a2a), + C64e(0x1c140c14100c0808), C64e(0x63f652f631529595), + C64e(0xe9af65af8c654646), C64e(0x7fe25ee2215e9d9d), + C64e(0x4878287860283030), C64e(0xcff8a1f86ea13737), + C64e(0x1b110f11140f0a0a), C64e(0xebc4b5c45eb52f2f), + C64e(0x151b091b1c090e0e), C64e(0x7e5a365a48362424), + C64e(0xadb69bb6369b1b1b), C64e(0x98473d47a53ddfdf), + C64e(0xa76a266a8126cdcd), C64e(0xf5bb69bb9c694e4e), + C64e(0x334ccd4cfecd7f7f), C64e(0x50ba9fbacf9feaea), + C64e(0x3f2d1b2d241b1212), C64e(0xa4b99eb93a9e1d1d), + C64e(0xc49c749cb0745858), C64e(0x46722e72682e3434), + C64e(0x41772d776c2d3636), C64e(0x11cdb2cda3b2dcdc), + C64e(0x9d29ee2973eeb4b4), C64e(0x4d16fb16b6fb5b5b), + C64e(0xa501f60153f6a4a4), C64e(0xa1d74dd7ec4d7676), + C64e(0x14a361a37561b7b7), C64e(0x3449ce49face7d7d), + C64e(0xdf8d7b8da47b5252), C64e(0x9f423e42a13edddd), + C64e(0xcd937193bc715e5e), C64e(0xb1a297a226971313), + C64e(0xa204f50457f5a6a6), C64e(0x01b868b86968b9b9), + C64e(0x0000000000000000), C64e(0xb5742c74992cc1c1), + C64e(0xe0a060a080604040), C64e(0xc2211f21dd1fe3e3), + C64e(0x3a43c843f2c87979), C64e(0x9a2ced2c77edb6b6), + C64e(0x0dd9bed9b3bed4d4), C64e(0x47ca46ca01468d8d), + C64e(0x1770d970ced96767), C64e(0xafdd4bdde44b7272), + C64e(0xed79de7933de9494), C64e(0xff67d4672bd49898), + C64e(0x9323e8237be8b0b0), C64e(0x5bde4ade114a8585), + C64e(0x06bd6bbd6d6bbbbb), C64e(0xbb7e2a7e912ac5c5), + C64e(0x7b34e5349ee54f4f), C64e(0xd73a163ac116eded), + C64e(0xd254c55417c58686), C64e(0xf862d7622fd79a9a), + C64e(0x99ff55ffcc556666), C64e(0xb6a794a722941111), + C64e(0xc04acf4a0fcf8a8a), C64e(0xd9301030c910e9e9), + C64e(0x0e0a060a08060404), C64e(0x66988198e781fefe), + C64e(0xab0bf00b5bf0a0a0), C64e(0xb4cc44ccf0447878), + C64e(0xf0d5bad54aba2525), C64e(0x753ee33e96e34b4b), + C64e(0xac0ef30e5ff3a2a2), C64e(0x4419fe19bafe5d5d), + C64e(0xdb5bc05b1bc08080), C64e(0x80858a850a8a0505), + C64e(0xd3ecadec7ead3f3f), C64e(0xfedfbcdf42bc2121), + C64e(0xa8d848d8e0487070), C64e(0xfd0c040cf904f1f1), + C64e(0x197adf7ac6df6363), C64e(0x2f58c158eec17777), + C64e(0x309f759f4575afaf), C64e(0xe7a563a584634242), + C64e(0x7050305040302020), C64e(0xcb2e1a2ed11ae5e5), + C64e(0xef120e12e10efdfd), C64e(0x08b76db7656dbfbf), + C64e(0x55d44cd4194c8181), C64e(0x243c143c30141818), + C64e(0x795f355f4c352626), C64e(0xb2712f719d2fc3c3), + C64e(0x8638e13867e1bebe), C64e(0xc8fda2fd6aa23535), + C64e(0xc74fcc4f0bcc8888), C64e(0x654b394b5c392e2e), + C64e(0x6af957f93d579393), C64e(0x580df20daaf25555), + C64e(0x619d829de382fcfc), C64e(0xb3c947c9f4477a7a), + C64e(0x27efacef8bacc8c8), C64e(0x8832e7326fe7baba), + C64e(0x4f7d2b7d642b3232), C64e(0x42a495a4d795e6e6), + C64e(0x3bfba0fb9ba0c0c0), C64e(0xaab398b332981919), + C64e(0xf668d16827d19e9e), C64e(0x22817f815d7fa3a3), + C64e(0xeeaa66aa88664444), C64e(0xd6827e82a87e5454), + C64e(0xdde6abe676ab3b3b), C64e(0x959e839e16830b0b), + C64e(0xc945ca4503ca8c8c), C64e(0xbc7b297b9529c7c7), + C64e(0x056ed36ed6d36b6b), C64e(0x6c443c44503c2828), + C64e(0x2c8b798b5579a7a7), C64e(0x813de23d63e2bcbc), + C64e(0x31271d272c1d1616), C64e(0x379a769a4176adad), + C64e(0x964d3b4dad3bdbdb), C64e(0x9efa56fac8566464), + C64e(0xa6d24ed2e84e7474), C64e(0x36221e22281e1414), + C64e(0xe476db763fdb9292), C64e(0x121e0a1e180a0c0c), + C64e(0xfcb46cb4906c4848), C64e(0x8f37e4376be4b8b8), + C64e(0x78e75de7255d9f9f), C64e(0x0fb26eb2616ebdbd), + C64e(0x692aef2a86ef4343), C64e(0x35f1a6f193a6c4c4), + C64e(0xdae3a8e372a83939), C64e(0xc6f7a4f762a43131), + C64e(0x8a593759bd37d3d3), C64e(0x74868b86ff8bf2f2), + C64e(0x83563256b132d5d5), C64e(0x4ec543c50d438b8b), + C64e(0x85eb59ebdc596e6e), C64e(0x18c2b7c2afb7dada), + C64e(0x8e8f8c8f028c0101), C64e(0x1dac64ac7964b1b1), + C64e(0xf16dd26d23d29c9c), C64e(0x723be03b92e04949), + C64e(0x1fc7b4c7abb4d8d8), C64e(0xb915fa1543faacac), + C64e(0xfa090709fd07f3f3), C64e(0xa06f256f8525cfcf), + C64e(0x20eaafea8fafcaca), C64e(0x7d898e89f38ef4f4), + C64e(0x6720e9208ee94747), C64e(0x3828182820181010), + C64e(0x0b64d564ded56f6f), C64e(0x73838883fb88f0f0), + C64e(0xfbb16fb1946f4a4a), C64e(0xca967296b8725c5c), + C64e(0x546c246c70243838), C64e(0x5f08f108aef15757), + C64e(0x2152c752e6c77373), C64e(0x64f351f335519797), + C64e(0xae6523658d23cbcb), C64e(0x25847c84597ca1a1), + C64e(0x57bf9cbfcb9ce8e8), C64e(0x5d6321637c213e3e), + C64e(0xea7cdd7c37dd9696), C64e(0x1e7fdc7fc2dc6161), + C64e(0x9c9186911a860d0d), C64e(0x9b9485941e850f0f), + C64e(0x4bab90abdb90e0e0), C64e(0xbac642c6f8427c7c), + C64e(0x2657c457e2c47171), C64e(0x29e5aae583aacccc), + C64e(0xe373d8733bd89090), C64e(0x090f050f0c050606), + C64e(0xf4030103f501f7f7), C64e(0x2a36123638121c1c), + C64e(0x3cfea3fe9fa3c2c2), C64e(0x8be15fe1d45f6a6a), + C64e(0xbe10f91047f9aeae), C64e(0x026bd06bd2d06969), + C64e(0xbfa891a82e911717), C64e(0x71e858e829589999), + C64e(0x5369276974273a3a), C64e(0xf7d0b9d04eb92727), + C64e(0x91483848a938d9d9), C64e(0xde351335cd13ebeb), + C64e(0xe5ceb3ce56b32b2b), C64e(0x7755335544332222), + C64e(0x04d6bbd6bfbbd2d2), C64e(0x399070904970a9a9), + C64e(0x878089800e890707), C64e(0xc1f2a7f266a73333), + C64e(0xecc1b6c15ab62d2d), C64e(0x5a66226678223c3c), + C64e(0xb8ad92ad2a921515), C64e(0xa96020608920c9c9), + C64e(0x5cdb49db15498787), C64e(0xb01aff1a4fffaaaa), + C64e(0xd8887888a0785050), C64e(0x2b8e7a8e517aa5a5), + C64e(0x898a8f8a068f0303), C64e(0x4a13f813b2f85959), + C64e(0x929b809b12800909), C64e(0x2339173934171a1a), + C64e(0x1075da75cada6565), C64e(0x84533153b531d7d7), + C64e(0xd551c65113c68484), C64e(0x03d3b8d3bbb8d0d0), + C64e(0xdc5ec35e1fc38282), C64e(0xe2cbb0cb52b02929), + C64e(0xc3997799b4775a5a), C64e(0x2d3311333c111e1e), + C64e(0x3d46cb46f6cb7b7b), C64e(0xb71ffc1f4bfca8a8), + C64e(0x0c61d661dad66d6d), C64e(0x624e3a4e583a2c2c) +}; + +#endif + +#define DECL_STATE_SMALL \ + sph_u64 H[8]; + +#define READ_STATE_SMALL(sc) do { \ + memcpy(H, (sc)->state.wide, sizeof H); \ + } while (0) + +#define WRITE_STATE_SMALL(sc) do { \ + memcpy((sc)->state.wide, H, sizeof H); \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define RSTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d] = T0[B64_0(a[b0])] \ + ^ R64(T0[B64_1(a[b1])], 8) \ + ^ R64(T0[B64_2(a[b2])], 16) \ + ^ R64(T0[B64_3(a[b3])], 24) \ + ^ T4[B64_4(a[b4])] \ + ^ R64(T4[B64_5(a[b5])], 8) \ + ^ R64(T4[B64_6(a[b6])], 16) \ + ^ R64(T4[B64_7(a[b7])], 24); \ + } while (0) + +#else + +#define RSTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d] = T0[B64_0(a[b0])] \ + ^ T1[B64_1(a[b1])] \ + ^ T2[B64_2(a[b2])] \ + ^ T3[B64_3(a[b3])] \ + ^ T4[B64_4(a[b4])] \ + ^ T5[B64_5(a[b5])] \ + ^ T6[B64_6(a[b6])] \ + ^ T7[B64_7(a[b7])]; \ + } while (0) + +#endif + +#define ROUND_SMALL_P(a, r) do { \ + sph_u64 t[8]; \ + a[0] ^= PC64(0x00, r); \ + a[1] ^= PC64(0x10, r); \ + a[2] ^= PC64(0x20, r); \ + a[3] ^= PC64(0x30, r); \ + a[4] ^= PC64(0x40, r); \ + a[5] ^= PC64(0x50, r); \ + a[6] ^= PC64(0x60, r); \ + a[7] ^= PC64(0x70, r); \ + RSTT(0, a, 0, 1, 2, 3, 4, 5, 6, 7); \ + RSTT(1, a, 1, 2, 3, 4, 5, 6, 7, 0); \ + RSTT(2, a, 2, 3, 4, 5, 6, 7, 0, 1); \ + RSTT(3, a, 3, 4, 5, 6, 7, 0, 1, 2); \ + RSTT(4, a, 4, 5, 6, 7, 0, 1, 2, 3); \ + RSTT(5, a, 5, 6, 7, 0, 1, 2, 3, 4); \ + RSTT(6, a, 6, 7, 0, 1, 2, 3, 4, 5); \ + RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \ + a[0] = t[0]; \ + a[1] = t[1]; \ + a[2] = t[2]; \ + a[3] = t[3]; \ + a[4] = t[4]; \ + a[5] = t[5]; \ + a[6] = t[6]; \ + a[7] = t[7]; \ + } while (0) + +#define ROUND_SMALL_Q(a, r) do { \ + sph_u64 t[8]; \ + a[0] ^= QC64(0x00, r); \ + a[1] ^= QC64(0x10, r); \ + a[2] ^= QC64(0x20, r); \ + a[3] ^= QC64(0x30, r); \ + a[4] ^= QC64(0x40, r); \ + a[5] ^= QC64(0x50, r); \ + a[6] ^= QC64(0x60, r); \ + a[7] ^= QC64(0x70, r); \ + RSTT(0, a, 1, 3, 5, 7, 0, 2, 4, 6); \ + RSTT(1, a, 2, 4, 6, 0, 1, 3, 5, 7); \ + RSTT(2, a, 3, 5, 7, 1, 2, 4, 6, 0); \ + RSTT(3, a, 4, 6, 0, 2, 3, 5, 7, 1); \ + RSTT(4, a, 5, 7, 1, 3, 4, 6, 0, 2); \ + RSTT(5, a, 6, 0, 2, 4, 5, 7, 1, 3); \ + RSTT(6, a, 7, 1, 3, 5, 6, 0, 2, 4); \ + RSTT(7, a, 0, 2, 4, 6, 7, 1, 3, 5); \ + a[0] = t[0]; \ + a[1] = t[1]; \ + a[2] = t[2]; \ + a[3] = t[3]; \ + a[4] = t[4]; \ + a[5] = t[5]; \ + a[6] = t[6]; \ + a[7] = t[7]; \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define PERM_SMALL_P(a) do { \ + int r; \ + for (r = 0; r < 10; r ++) \ + ROUND_SMALL_P(a, r); \ + } while (0) + +#define PERM_SMALL_Q(a) do { \ + int r; \ + for (r = 0; r < 10; r ++) \ + ROUND_SMALL_Q(a, r); \ + } while (0) + +#else + +/* + * Apparently, unrolling more than that confuses GCC, resulting in + * lower performance, even though L1 cache would be no problem. + */ +#define PERM_SMALL_P(a) do { \ + int r; \ + for (r = 0; r < 10; r += 2) { \ + ROUND_SMALL_P(a, r + 0); \ + ROUND_SMALL_P(a, r + 1); \ + } \ + } while (0) + +#define PERM_SMALL_Q(a) do { \ + int r; \ + for (r = 0; r < 10; r += 2) { \ + ROUND_SMALL_Q(a, r + 0); \ + ROUND_SMALL_Q(a, r + 1); \ + } \ + } while (0) + +#endif + +#define COMPRESS_SMALL do { \ + sph_u64 g[8], m[8]; \ + size_t u; \ + for (u = 0; u < 8; u ++) { \ + m[u] = dec64e_aligned(buf + (u << 3)); \ + g[u] = m[u] ^ H[u]; \ + } \ + PERM_SMALL_P(g); \ + PERM_SMALL_Q(m); \ + for (u = 0; u < 8; u ++) \ + H[u] ^= g[u] ^ m[u]; \ + } while (0) + +#define FINAL_SMALL do { \ + sph_u64 x[8]; \ + size_t u; \ + memcpy(x, H, sizeof x); \ + PERM_SMALL_P(x); \ + for (u = 0; u < 8; u ++) \ + H[u] ^= x[u]; \ + } while (0) + +#define DECL_STATE_BIG \ + sph_u64 H[16]; + +#define READ_STATE_BIG(sc) do { \ + memcpy(H, (sc)->state.wide, sizeof H); \ + } while (0) + +#define WRITE_STATE_BIG(sc) do { \ + memcpy((sc)->state.wide, H, sizeof H); \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define RBTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d] = T0[B64_0(a[b0])] \ + ^ R64(T0[B64_1(a[b1])], 8) \ + ^ R64(T0[B64_2(a[b2])], 16) \ + ^ R64(T0[B64_3(a[b3])], 24) \ + ^ T4[B64_4(a[b4])] \ + ^ R64(T4[B64_5(a[b5])], 8) \ + ^ R64(T4[B64_6(a[b6])], 16) \ + ^ R64(T4[B64_7(a[b7])], 24); \ + } while (0) + +#else + +#define RBTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d] = T0[B64_0(a[b0])] \ + ^ T1[B64_1(a[b1])] \ + ^ T2[B64_2(a[b2])] \ + ^ T3[B64_3(a[b3])] \ + ^ T4[B64_4(a[b4])] \ + ^ T5[B64_5(a[b5])] \ + ^ T6[B64_6(a[b6])] \ + ^ T7[B64_7(a[b7])]; \ + } while (0) + +#endif + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define ROUND_BIG_P(a, r) do { \ + sph_u64 t[16]; \ + size_t u; \ + a[0x0] ^= PC64(0x00, r); \ + a[0x1] ^= PC64(0x10, r); \ + a[0x2] ^= PC64(0x20, r); \ + a[0x3] ^= PC64(0x30, r); \ + a[0x4] ^= PC64(0x40, r); \ + a[0x5] ^= PC64(0x50, r); \ + a[0x6] ^= PC64(0x60, r); \ + a[0x7] ^= PC64(0x70, r); \ + a[0x8] ^= PC64(0x80, r); \ + a[0x9] ^= PC64(0x90, r); \ + a[0xA] ^= PC64(0xA0, r); \ + a[0xB] ^= PC64(0xB0, r); \ + a[0xC] ^= PC64(0xC0, r); \ + a[0xD] ^= PC64(0xD0, r); \ + a[0xE] ^= PC64(0xE0, r); \ + a[0xF] ^= PC64(0xF0, r); \ + for (u = 0; u < 16; u += 4) { \ + RBTT(u + 0, a, u + 0, (u + 1) & 0xF, \ + (u + 2) & 0xF, (u + 3) & 0xF, (u + 4) & 0xF, \ + (u + 5) & 0xF, (u + 6) & 0xF, (u + 11) & 0xF); \ + RBTT(u + 1, a, u + 1, (u + 2) & 0xF, \ + (u + 3) & 0xF, (u + 4) & 0xF, (u + 5) & 0xF, \ + (u + 6) & 0xF, (u + 7) & 0xF, (u + 12) & 0xF); \ + RBTT(u + 2, a, u + 2, (u + 3) & 0xF, \ + (u + 4) & 0xF, (u + 5) & 0xF, (u + 6) & 0xF, \ + (u + 7) & 0xF, (u + 8) & 0xF, (u + 13) & 0xF); \ + RBTT(u + 3, a, u + 3, (u + 4) & 0xF, \ + (u + 5) & 0xF, (u + 6) & 0xF, (u + 7) & 0xF, \ + (u + 8) & 0xF, (u + 9) & 0xF, (u + 14) & 0xF); \ + } \ + memcpy(a, t, sizeof t); \ + } while (0) + +#define ROUND_BIG_Q(a, r) do { \ + sph_u64 t[16]; \ + size_t u; \ + a[0x0] ^= QC64(0x00, r); \ + a[0x1] ^= QC64(0x10, r); \ + a[0x2] ^= QC64(0x20, r); \ + a[0x3] ^= QC64(0x30, r); \ + a[0x4] ^= QC64(0x40, r); \ + a[0x5] ^= QC64(0x50, r); \ + a[0x6] ^= QC64(0x60, r); \ + a[0x7] ^= QC64(0x70, r); \ + a[0x8] ^= QC64(0x80, r); \ + a[0x9] ^= QC64(0x90, r); \ + a[0xA] ^= QC64(0xA0, r); \ + a[0xB] ^= QC64(0xB0, r); \ + a[0xC] ^= QC64(0xC0, r); \ + a[0xD] ^= QC64(0xD0, r); \ + a[0xE] ^= QC64(0xE0, r); \ + a[0xF] ^= QC64(0xF0, r); \ + for (u = 0; u < 16; u += 4) { \ + RBTT(u + 0, a, (u + 1) & 0xF, (u + 3) & 0xF, \ + (u + 5) & 0xF, (u + 11) & 0xF, (u + 0) & 0xF, \ + (u + 2) & 0xF, (u + 4) & 0xF, (u + 6) & 0xF); \ + RBTT(u + 1, a, (u + 2) & 0xF, (u + 4) & 0xF, \ + (u + 6) & 0xF, (u + 12) & 0xF, (u + 1) & 0xF, \ + (u + 3) & 0xF, (u + 5) & 0xF, (u + 7) & 0xF); \ + RBTT(u + 2, a, (u + 3) & 0xF, (u + 5) & 0xF, \ + (u + 7) & 0xF, (u + 13) & 0xF, (u + 2) & 0xF, \ + (u + 4) & 0xF, (u + 6) & 0xF, (u + 8) & 0xF); \ + RBTT(u + 3, a, (u + 4) & 0xF, (u + 6) & 0xF, \ + (u + 8) & 0xF, (u + 14) & 0xF, (u + 3) & 0xF, \ + (u + 5) & 0xF, (u + 7) & 0xF, (u + 9) & 0xF); \ + } \ + memcpy(a, t, sizeof t); \ + } while (0) + +#else + +#define ROUND_BIG_P(a, r) do { \ + sph_u64 t[16]; \ + a[0x0] ^= PC64(0x00, r); \ + a[0x1] ^= PC64(0x10, r); \ + a[0x2] ^= PC64(0x20, r); \ + a[0x3] ^= PC64(0x30, r); \ + a[0x4] ^= PC64(0x40, r); \ + a[0x5] ^= PC64(0x50, r); \ + a[0x6] ^= PC64(0x60, r); \ + a[0x7] ^= PC64(0x70, r); \ + a[0x8] ^= PC64(0x80, r); \ + a[0x9] ^= PC64(0x90, r); \ + a[0xA] ^= PC64(0xA0, r); \ + a[0xB] ^= PC64(0xB0, r); \ + a[0xC] ^= PC64(0xC0, r); \ + a[0xD] ^= PC64(0xD0, r); \ + a[0xE] ^= PC64(0xE0, r); \ + a[0xF] ^= PC64(0xF0, r); \ + RBTT(0x0, a, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); \ + RBTT(0x1, a, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); \ + RBTT(0x2, a, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0xD); \ + RBTT(0x3, a, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xE); \ + RBTT(0x4, a, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xF); \ + RBTT(0x5, a, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0x0); \ + RBTT(0x6, a, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); \ + RBTT(0x7, a, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0x2); \ + RBTT(0x8, a, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); \ + RBTT(0x9, a, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); \ + RBTT(0xA, a, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); \ + RBTT(0xB, a, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); \ + RBTT(0xC, a, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); \ + RBTT(0xD, a, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); \ + RBTT(0xE, a, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); \ + RBTT(0xF, a, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); \ + a[0x0] = t[0x0]; \ + a[0x1] = t[0x1]; \ + a[0x2] = t[0x2]; \ + a[0x3] = t[0x3]; \ + a[0x4] = t[0x4]; \ + a[0x5] = t[0x5]; \ + a[0x6] = t[0x6]; \ + a[0x7] = t[0x7]; \ + a[0x8] = t[0x8]; \ + a[0x9] = t[0x9]; \ + a[0xA] = t[0xA]; \ + a[0xB] = t[0xB]; \ + a[0xC] = t[0xC]; \ + a[0xD] = t[0xD]; \ + a[0xE] = t[0xE]; \ + a[0xF] = t[0xF]; \ + } while (0) + +#define ROUND_BIG_Q(a, r) do { \ + sph_u64 t[16]; \ + a[0x0] ^= QC64(0x00, r); \ + a[0x1] ^= QC64(0x10, r); \ + a[0x2] ^= QC64(0x20, r); \ + a[0x3] ^= QC64(0x30, r); \ + a[0x4] ^= QC64(0x40, r); \ + a[0x5] ^= QC64(0x50, r); \ + a[0x6] ^= QC64(0x60, r); \ + a[0x7] ^= QC64(0x70, r); \ + a[0x8] ^= QC64(0x80, r); \ + a[0x9] ^= QC64(0x90, r); \ + a[0xA] ^= QC64(0xA0, r); \ + a[0xB] ^= QC64(0xB0, r); \ + a[0xC] ^= QC64(0xC0, r); \ + a[0xD] ^= QC64(0xD0, r); \ + a[0xE] ^= QC64(0xE0, r); \ + a[0xF] ^= QC64(0xF0, r); \ + RBTT(0x0, a, 0x1, 0x3, 0x5, 0xB, 0x0, 0x2, 0x4, 0x6); \ + RBTT(0x1, a, 0x2, 0x4, 0x6, 0xC, 0x1, 0x3, 0x5, 0x7); \ + RBTT(0x2, a, 0x3, 0x5, 0x7, 0xD, 0x2, 0x4, 0x6, 0x8); \ + RBTT(0x3, a, 0x4, 0x6, 0x8, 0xE, 0x3, 0x5, 0x7, 0x9); \ + RBTT(0x4, a, 0x5, 0x7, 0x9, 0xF, 0x4, 0x6, 0x8, 0xA); \ + RBTT(0x5, a, 0x6, 0x8, 0xA, 0x0, 0x5, 0x7, 0x9, 0xB); \ + RBTT(0x6, a, 0x7, 0x9, 0xB, 0x1, 0x6, 0x8, 0xA, 0xC); \ + RBTT(0x7, a, 0x8, 0xA, 0xC, 0x2, 0x7, 0x9, 0xB, 0xD); \ + RBTT(0x8, a, 0x9, 0xB, 0xD, 0x3, 0x8, 0xA, 0xC, 0xE); \ + RBTT(0x9, a, 0xA, 0xC, 0xE, 0x4, 0x9, 0xB, 0xD, 0xF); \ + RBTT(0xA, a, 0xB, 0xD, 0xF, 0x5, 0xA, 0xC, 0xE, 0x0); \ + RBTT(0xB, a, 0xC, 0xE, 0x0, 0x6, 0xB, 0xD, 0xF, 0x1); \ + RBTT(0xC, a, 0xD, 0xF, 0x1, 0x7, 0xC, 0xE, 0x0, 0x2); \ + RBTT(0xD, a, 0xE, 0x0, 0x2, 0x8, 0xD, 0xF, 0x1, 0x3); \ + RBTT(0xE, a, 0xF, 0x1, 0x3, 0x9, 0xE, 0x0, 0x2, 0x4); \ + RBTT(0xF, a, 0x0, 0x2, 0x4, 0xA, 0xF, 0x1, 0x3, 0x5); \ + a[0x0] = t[0x0]; \ + a[0x1] = t[0x1]; \ + a[0x2] = t[0x2]; \ + a[0x3] = t[0x3]; \ + a[0x4] = t[0x4]; \ + a[0x5] = t[0x5]; \ + a[0x6] = t[0x6]; \ + a[0x7] = t[0x7]; \ + a[0x8] = t[0x8]; \ + a[0x9] = t[0x9]; \ + a[0xA] = t[0xA]; \ + a[0xB] = t[0xB]; \ + a[0xC] = t[0xC]; \ + a[0xD] = t[0xD]; \ + a[0xE] = t[0xE]; \ + a[0xF] = t[0xF]; \ + } while (0) + +#endif + +#define PERM_BIG_P(a) do { \ + int r; \ + for (r = 0; r < 14; r += 2) { \ + ROUND_BIG_P(a, r + 0); \ + ROUND_BIG_P(a, r + 1); \ + } \ + } while (0) + +#define PERM_BIG_Q(a) do { \ + int r; \ + for (r = 0; r < 14; r += 2) { \ + ROUND_BIG_Q(a, r + 0); \ + ROUND_BIG_Q(a, r + 1); \ + } \ + } while (0) + +/* obsolete +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define COMPRESS_BIG do { \ + sph_u64 g[16], m[16], *ya; \ + const sph_u64 *yc; \ + size_t u; \ + int i; \ + for (u = 0; u < 16; u ++) { \ + m[u] = dec64e_aligned(buf + (u << 3)); \ + g[u] = m[u] ^ H[u]; \ + } \ + ya = g; \ + yc = CP; \ + for (i = 0; i < 2; i ++) { \ + PERM_BIG(ya, yc); \ + ya = m; \ + yc = CQ; \ + } \ + for (u = 0; u < 16; u ++) { \ + H[u] ^= g[u] ^ m[u]; \ + } \ + } while (0) + +#else +*/ + +#define COMPRESS_BIG do { \ + sph_u64 g[16], m[16]; \ + size_t u; \ + for (u = 0; u < 16; u ++) { \ + m[u] = dec64e_aligned(buf + (u << 3)); \ + g[u] = m[u] ^ H[u]; \ + } \ + PERM_BIG_P(g); \ + PERM_BIG_Q(m); \ + for (u = 0; u < 16; u ++) { \ + H[u] ^= g[u] ^ m[u]; \ + } \ + } while (0) + +/* obsolete +#endif +*/ + +#define FINAL_BIG do { \ + sph_u64 x[16]; \ + size_t u; \ + memcpy(x, H, sizeof x); \ + PERM_BIG_P(x); \ + for (u = 0; u < 16; u ++) \ + H[u] ^= x[u]; \ + } while (0) + +#else + +static const sph_u32 T0up[] = { + C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d), + C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54), + C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d), + C32e(0xe7cc2b19), C32e(0xb513a662), C32e(0x4d7c31e6), C32e(0xec59b59a), + C32e(0x8f40cf45), C32e(0x1fa3bc9d), C32e(0x8949c040), C32e(0xfa689287), + C32e(0xefd03f15), C32e(0xb29426eb), C32e(0x8ece40c9), C32e(0xfbe61d0b), + C32e(0x416e2fec), C32e(0xb31aa967), C32e(0x5f431cfd), C32e(0x456025ea), + C32e(0x23f9dabf), C32e(0x535102f7), C32e(0xe445a196), C32e(0x9b76ed5b), + C32e(0x75285dc2), C32e(0xe1c5241c), C32e(0x3dd4e9ae), C32e(0x4cf2be6a), + C32e(0x6c82ee5a), C32e(0x7ebdc341), C32e(0xf5f30602), C32e(0x8352d14f), + C32e(0x688ce45c), C32e(0x515607f4), C32e(0xd18d5c34), C32e(0xf9e11808), + C32e(0xe24cae93), C32e(0xab3e9573), C32e(0x6297f553), C32e(0x2a6b413f), + C32e(0x081c140c), C32e(0x9563f652), C32e(0x46e9af65), C32e(0x9d7fe25e), + C32e(0x30487828), C32e(0x37cff8a1), C32e(0x0a1b110f), C32e(0x2febc4b5), + C32e(0x0e151b09), C32e(0x247e5a36), C32e(0x1badb69b), C32e(0xdf98473d), + C32e(0xcda76a26), C32e(0x4ef5bb69), C32e(0x7f334ccd), C32e(0xea50ba9f), + C32e(0x123f2d1b), C32e(0x1da4b99e), C32e(0x58c49c74), C32e(0x3446722e), + C32e(0x3641772d), C32e(0xdc11cdb2), C32e(0xb49d29ee), C32e(0x5b4d16fb), + C32e(0xa4a501f6), C32e(0x76a1d74d), C32e(0xb714a361), C32e(0x7d3449ce), + C32e(0x52df8d7b), C32e(0xdd9f423e), C32e(0x5ecd9371), C32e(0x13b1a297), + C32e(0xa6a204f5), C32e(0xb901b868), C32e(0x00000000), C32e(0xc1b5742c), + C32e(0x40e0a060), C32e(0xe3c2211f), C32e(0x793a43c8), C32e(0xb69a2ced), + C32e(0xd40dd9be), C32e(0x8d47ca46), C32e(0x671770d9), C32e(0x72afdd4b), + C32e(0x94ed79de), C32e(0x98ff67d4), C32e(0xb09323e8), C32e(0x855bde4a), + C32e(0xbb06bd6b), C32e(0xc5bb7e2a), C32e(0x4f7b34e5), C32e(0xedd73a16), + C32e(0x86d254c5), C32e(0x9af862d7), C32e(0x6699ff55), C32e(0x11b6a794), + C32e(0x8ac04acf), C32e(0xe9d93010), C32e(0x040e0a06), C32e(0xfe669881), + C32e(0xa0ab0bf0), C32e(0x78b4cc44), C32e(0x25f0d5ba), C32e(0x4b753ee3), + C32e(0xa2ac0ef3), C32e(0x5d4419fe), C32e(0x80db5bc0), C32e(0x0580858a), + C32e(0x3fd3ecad), C32e(0x21fedfbc), C32e(0x70a8d848), C32e(0xf1fd0c04), + C32e(0x63197adf), C32e(0x772f58c1), C32e(0xaf309f75), C32e(0x42e7a563), + C32e(0x20705030), C32e(0xe5cb2e1a), C32e(0xfdef120e), C32e(0xbf08b76d), + C32e(0x8155d44c), C32e(0x18243c14), C32e(0x26795f35), C32e(0xc3b2712f), + C32e(0xbe8638e1), C32e(0x35c8fda2), C32e(0x88c74fcc), C32e(0x2e654b39), + C32e(0x936af957), C32e(0x55580df2), C32e(0xfc619d82), C32e(0x7ab3c947), + C32e(0xc827efac), C32e(0xba8832e7), C32e(0x324f7d2b), C32e(0xe642a495), + C32e(0xc03bfba0), C32e(0x19aab398), C32e(0x9ef668d1), C32e(0xa322817f), + C32e(0x44eeaa66), C32e(0x54d6827e), C32e(0x3bdde6ab), C32e(0x0b959e83), + C32e(0x8cc945ca), C32e(0xc7bc7b29), C32e(0x6b056ed3), C32e(0x286c443c), + C32e(0xa72c8b79), C32e(0xbc813de2), C32e(0x1631271d), C32e(0xad379a76), + C32e(0xdb964d3b), C32e(0x649efa56), C32e(0x74a6d24e), C32e(0x1436221e), + C32e(0x92e476db), C32e(0x0c121e0a), C32e(0x48fcb46c), C32e(0xb88f37e4), + C32e(0x9f78e75d), C32e(0xbd0fb26e), C32e(0x43692aef), C32e(0xc435f1a6), + C32e(0x39dae3a8), C32e(0x31c6f7a4), C32e(0xd38a5937), C32e(0xf274868b), + C32e(0xd5835632), C32e(0x8b4ec543), C32e(0x6e85eb59), C32e(0xda18c2b7), + C32e(0x018e8f8c), C32e(0xb11dac64), C32e(0x9cf16dd2), C32e(0x49723be0), + C32e(0xd81fc7b4), C32e(0xacb915fa), C32e(0xf3fa0907), C32e(0xcfa06f25), + C32e(0xca20eaaf), C32e(0xf47d898e), C32e(0x476720e9), C32e(0x10382818), + C32e(0x6f0b64d5), C32e(0xf0738388), C32e(0x4afbb16f), C32e(0x5cca9672), + C32e(0x38546c24), C32e(0x575f08f1), C32e(0x732152c7), C32e(0x9764f351), + C32e(0xcbae6523), C32e(0xa125847c), C32e(0xe857bf9c), C32e(0x3e5d6321), + C32e(0x96ea7cdd), C32e(0x611e7fdc), C32e(0x0d9c9186), C32e(0x0f9b9485), + C32e(0xe04bab90), C32e(0x7cbac642), C32e(0x712657c4), C32e(0xcc29e5aa), + C32e(0x90e373d8), C32e(0x06090f05), C32e(0xf7f40301), C32e(0x1c2a3612), + C32e(0xc23cfea3), C32e(0x6a8be15f), C32e(0xaebe10f9), C32e(0x69026bd0), + C32e(0x17bfa891), C32e(0x9971e858), C32e(0x3a536927), C32e(0x27f7d0b9), + C32e(0xd9914838), C32e(0xebde3513), C32e(0x2be5ceb3), C32e(0x22775533), + C32e(0xd204d6bb), C32e(0xa9399070), C32e(0x07878089), C32e(0x33c1f2a7), + C32e(0x2decc1b6), C32e(0x3c5a6622), C32e(0x15b8ad92), C32e(0xc9a96020), + C32e(0x875cdb49), C32e(0xaab01aff), C32e(0x50d88878), C32e(0xa52b8e7a), + C32e(0x03898a8f), C32e(0x594a13f8), C32e(0x09929b80), C32e(0x1a233917), + C32e(0x651075da), C32e(0xd7845331), C32e(0x84d551c6), C32e(0xd003d3b8), + C32e(0x82dc5ec3), C32e(0x29e2cbb0), C32e(0x5ac39977), C32e(0x1e2d3311), + C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a) +}; + +static const sph_u32 T0dn[] = { + C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6), + C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491), + C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56), + C32e(0x2bd519e7), C32e(0xa67162b5), C32e(0x319ae64d), C32e(0xb5c39aec), + C32e(0xcf05458f), C32e(0xbc3e9d1f), C32e(0xc0094089), C32e(0x92ef87fa), + C32e(0x3fc515ef), C32e(0x267febb2), C32e(0x4007c98e), C32e(0x1ded0bfb), + C32e(0x2f82ec41), C32e(0xa97d67b3), C32e(0x1cbefd5f), C32e(0x258aea45), + C32e(0xda46bf23), C32e(0x02a6f753), C32e(0xa1d396e4), C32e(0xed2d5b9b), + C32e(0x5deac275), C32e(0x24d91ce1), C32e(0xe97aae3d), C32e(0xbe986a4c), + C32e(0xeed85a6c), C32e(0xc3fc417e), C32e(0x06f102f5), C32e(0xd11d4f83), + C32e(0xe4d05c68), C32e(0x07a2f451), C32e(0x5cb934d1), C32e(0x18e908f9), + C32e(0xaedf93e2), C32e(0x954d73ab), C32e(0xf5c45362), C32e(0x41543f2a), + C32e(0x14100c08), C32e(0xf6315295), C32e(0xaf8c6546), C32e(0xe2215e9d), + C32e(0x78602830), C32e(0xf86ea137), C32e(0x11140f0a), C32e(0xc45eb52f), + C32e(0x1b1c090e), C32e(0x5a483624), C32e(0xb6369b1b), C32e(0x47a53ddf), + C32e(0x6a8126cd), C32e(0xbb9c694e), C32e(0x4cfecd7f), C32e(0xbacf9fea), + C32e(0x2d241b12), C32e(0xb93a9e1d), C32e(0x9cb07458), C32e(0x72682e34), + C32e(0x776c2d36), C32e(0xcda3b2dc), C32e(0x2973eeb4), C32e(0x16b6fb5b), + C32e(0x0153f6a4), C32e(0xd7ec4d76), C32e(0xa37561b7), C32e(0x49face7d), + C32e(0x8da47b52), C32e(0x42a13edd), C32e(0x93bc715e), C32e(0xa2269713), + C32e(0x0457f5a6), C32e(0xb86968b9), C32e(0x00000000), C32e(0x74992cc1), + C32e(0xa0806040), C32e(0x21dd1fe3), C32e(0x43f2c879), C32e(0x2c77edb6), + C32e(0xd9b3bed4), C32e(0xca01468d), C32e(0x70ced967), C32e(0xdde44b72), + C32e(0x7933de94), C32e(0x672bd498), C32e(0x237be8b0), C32e(0xde114a85), + C32e(0xbd6d6bbb), C32e(0x7e912ac5), C32e(0x349ee54f), C32e(0x3ac116ed), + C32e(0x5417c586), C32e(0x622fd79a), C32e(0xffcc5566), C32e(0xa7229411), + C32e(0x4a0fcf8a), C32e(0x30c910e9), C32e(0x0a080604), C32e(0x98e781fe), + C32e(0x0b5bf0a0), C32e(0xccf04478), C32e(0xd54aba25), C32e(0x3e96e34b), + C32e(0x0e5ff3a2), C32e(0x19bafe5d), C32e(0x5b1bc080), C32e(0x850a8a05), + C32e(0xec7ead3f), C32e(0xdf42bc21), C32e(0xd8e04870), C32e(0x0cf904f1), + C32e(0x7ac6df63), C32e(0x58eec177), C32e(0x9f4575af), C32e(0xa5846342), + C32e(0x50403020), C32e(0x2ed11ae5), C32e(0x12e10efd), C32e(0xb7656dbf), + C32e(0xd4194c81), C32e(0x3c301418), C32e(0x5f4c3526), C32e(0x719d2fc3), + C32e(0x3867e1be), C32e(0xfd6aa235), C32e(0x4f0bcc88), C32e(0x4b5c392e), + C32e(0xf93d5793), C32e(0x0daaf255), C32e(0x9de382fc), C32e(0xc9f4477a), + C32e(0xef8bacc8), C32e(0x326fe7ba), C32e(0x7d642b32), C32e(0xa4d795e6), + C32e(0xfb9ba0c0), C32e(0xb3329819), C32e(0x6827d19e), C32e(0x815d7fa3), + C32e(0xaa886644), C32e(0x82a87e54), C32e(0xe676ab3b), C32e(0x9e16830b), + C32e(0x4503ca8c), C32e(0x7b9529c7), C32e(0x6ed6d36b), C32e(0x44503c28), + C32e(0x8b5579a7), C32e(0x3d63e2bc), C32e(0x272c1d16), C32e(0x9a4176ad), + C32e(0x4dad3bdb), C32e(0xfac85664), C32e(0xd2e84e74), C32e(0x22281e14), + C32e(0x763fdb92), C32e(0x1e180a0c), C32e(0xb4906c48), C32e(0x376be4b8), + C32e(0xe7255d9f), C32e(0xb2616ebd), C32e(0x2a86ef43), C32e(0xf193a6c4), + C32e(0xe372a839), C32e(0xf762a431), C32e(0x59bd37d3), C32e(0x86ff8bf2), + C32e(0x56b132d5), C32e(0xc50d438b), C32e(0xebdc596e), C32e(0xc2afb7da), + C32e(0x8f028c01), C32e(0xac7964b1), C32e(0x6d23d29c), C32e(0x3b92e049), + C32e(0xc7abb4d8), C32e(0x1543faac), C32e(0x09fd07f3), C32e(0x6f8525cf), + C32e(0xea8fafca), C32e(0x89f38ef4), C32e(0x208ee947), C32e(0x28201810), + C32e(0x64ded56f), C32e(0x83fb88f0), C32e(0xb1946f4a), C32e(0x96b8725c), + C32e(0x6c702438), C32e(0x08aef157), C32e(0x52e6c773), C32e(0xf3355197), + C32e(0x658d23cb), C32e(0x84597ca1), C32e(0xbfcb9ce8), C32e(0x637c213e), + C32e(0x7c37dd96), C32e(0x7fc2dc61), C32e(0x911a860d), C32e(0x941e850f), + C32e(0xabdb90e0), C32e(0xc6f8427c), C32e(0x57e2c471), C32e(0xe583aacc), + C32e(0x733bd890), C32e(0x0f0c0506), C32e(0x03f501f7), C32e(0x3638121c), + C32e(0xfe9fa3c2), C32e(0xe1d45f6a), C32e(0x1047f9ae), C32e(0x6bd2d069), + C32e(0xa82e9117), C32e(0xe8295899), C32e(0x6974273a), C32e(0xd04eb927), + C32e(0x48a938d9), C32e(0x35cd13eb), C32e(0xce56b32b), C32e(0x55443322), + C32e(0xd6bfbbd2), C32e(0x904970a9), C32e(0x800e8907), C32e(0xf266a733), + C32e(0xc15ab62d), C32e(0x6678223c), C32e(0xad2a9215), C32e(0x608920c9), + C32e(0xdb154987), C32e(0x1a4fffaa), C32e(0x88a07850), C32e(0x8e517aa5), + C32e(0x8a068f03), C32e(0x13b2f859), C32e(0x9b128009), C32e(0x3934171a), + C32e(0x75cada65), C32e(0x53b531d7), C32e(0x5113c684), C32e(0xd3bbb8d0), + C32e(0x5e1fc382), C32e(0xcb52b029), C32e(0x99b4775a), C32e(0x333c111e), + C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c) +}; + +static const sph_u32 T1up[] = { + C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c), + C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc), + C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187), + C32e(0xe7e7cc2b), C32e(0xb5b513a6), C32e(0x4d4d7c31), C32e(0xecec59b5), + C32e(0x8f8f40cf), C32e(0x1f1fa3bc), C32e(0x898949c0), C32e(0xfafa6892), + C32e(0xefefd03f), C32e(0xb2b29426), C32e(0x8e8ece40), C32e(0xfbfbe61d), + C32e(0x41416e2f), C32e(0xb3b31aa9), C32e(0x5f5f431c), C32e(0x45456025), + C32e(0x2323f9da), C32e(0x53535102), C32e(0xe4e445a1), C32e(0x9b9b76ed), + C32e(0x7575285d), C32e(0xe1e1c524), C32e(0x3d3dd4e9), C32e(0x4c4cf2be), + C32e(0x6c6c82ee), C32e(0x7e7ebdc3), C32e(0xf5f5f306), C32e(0x838352d1), + C32e(0x68688ce4), C32e(0x51515607), C32e(0xd1d18d5c), C32e(0xf9f9e118), + C32e(0xe2e24cae), C32e(0xabab3e95), C32e(0x626297f5), C32e(0x2a2a6b41), + C32e(0x08081c14), C32e(0x959563f6), C32e(0x4646e9af), C32e(0x9d9d7fe2), + C32e(0x30304878), C32e(0x3737cff8), C32e(0x0a0a1b11), C32e(0x2f2febc4), + C32e(0x0e0e151b), C32e(0x24247e5a), C32e(0x1b1badb6), C32e(0xdfdf9847), + C32e(0xcdcda76a), C32e(0x4e4ef5bb), C32e(0x7f7f334c), C32e(0xeaea50ba), + C32e(0x12123f2d), C32e(0x1d1da4b9), C32e(0x5858c49c), C32e(0x34344672), + C32e(0x36364177), C32e(0xdcdc11cd), C32e(0xb4b49d29), C32e(0x5b5b4d16), + C32e(0xa4a4a501), C32e(0x7676a1d7), C32e(0xb7b714a3), C32e(0x7d7d3449), + C32e(0x5252df8d), C32e(0xdddd9f42), C32e(0x5e5ecd93), C32e(0x1313b1a2), + C32e(0xa6a6a204), C32e(0xb9b901b8), C32e(0x00000000), C32e(0xc1c1b574), + C32e(0x4040e0a0), C32e(0xe3e3c221), C32e(0x79793a43), C32e(0xb6b69a2c), + C32e(0xd4d40dd9), C32e(0x8d8d47ca), C32e(0x67671770), C32e(0x7272afdd), + C32e(0x9494ed79), C32e(0x9898ff67), C32e(0xb0b09323), C32e(0x85855bde), + C32e(0xbbbb06bd), C32e(0xc5c5bb7e), C32e(0x4f4f7b34), C32e(0xededd73a), + C32e(0x8686d254), C32e(0x9a9af862), C32e(0x666699ff), C32e(0x1111b6a7), + C32e(0x8a8ac04a), C32e(0xe9e9d930), C32e(0x04040e0a), C32e(0xfefe6698), + C32e(0xa0a0ab0b), C32e(0x7878b4cc), C32e(0x2525f0d5), C32e(0x4b4b753e), + C32e(0xa2a2ac0e), C32e(0x5d5d4419), C32e(0x8080db5b), C32e(0x05058085), + C32e(0x3f3fd3ec), C32e(0x2121fedf), C32e(0x7070a8d8), C32e(0xf1f1fd0c), + C32e(0x6363197a), C32e(0x77772f58), C32e(0xafaf309f), C32e(0x4242e7a5), + C32e(0x20207050), C32e(0xe5e5cb2e), C32e(0xfdfdef12), C32e(0xbfbf08b7), + C32e(0x818155d4), C32e(0x1818243c), C32e(0x2626795f), C32e(0xc3c3b271), + C32e(0xbebe8638), C32e(0x3535c8fd), C32e(0x8888c74f), C32e(0x2e2e654b), + C32e(0x93936af9), C32e(0x5555580d), C32e(0xfcfc619d), C32e(0x7a7ab3c9), + C32e(0xc8c827ef), C32e(0xbaba8832), C32e(0x32324f7d), C32e(0xe6e642a4), + C32e(0xc0c03bfb), C32e(0x1919aab3), C32e(0x9e9ef668), C32e(0xa3a32281), + C32e(0x4444eeaa), C32e(0x5454d682), C32e(0x3b3bdde6), C32e(0x0b0b959e), + C32e(0x8c8cc945), C32e(0xc7c7bc7b), C32e(0x6b6b056e), C32e(0x28286c44), + C32e(0xa7a72c8b), C32e(0xbcbc813d), C32e(0x16163127), C32e(0xadad379a), + C32e(0xdbdb964d), C32e(0x64649efa), C32e(0x7474a6d2), C32e(0x14143622), + C32e(0x9292e476), C32e(0x0c0c121e), C32e(0x4848fcb4), C32e(0xb8b88f37), + C32e(0x9f9f78e7), C32e(0xbdbd0fb2), C32e(0x4343692a), C32e(0xc4c435f1), + C32e(0x3939dae3), C32e(0x3131c6f7), C32e(0xd3d38a59), C32e(0xf2f27486), + C32e(0xd5d58356), C32e(0x8b8b4ec5), C32e(0x6e6e85eb), C32e(0xdada18c2), + C32e(0x01018e8f), C32e(0xb1b11dac), C32e(0x9c9cf16d), C32e(0x4949723b), + C32e(0xd8d81fc7), C32e(0xacacb915), C32e(0xf3f3fa09), C32e(0xcfcfa06f), + C32e(0xcaca20ea), C32e(0xf4f47d89), C32e(0x47476720), C32e(0x10103828), + C32e(0x6f6f0b64), C32e(0xf0f07383), C32e(0x4a4afbb1), C32e(0x5c5cca96), + C32e(0x3838546c), C32e(0x57575f08), C32e(0x73732152), C32e(0x979764f3), + C32e(0xcbcbae65), C32e(0xa1a12584), C32e(0xe8e857bf), C32e(0x3e3e5d63), + C32e(0x9696ea7c), C32e(0x61611e7f), C32e(0x0d0d9c91), C32e(0x0f0f9b94), + C32e(0xe0e04bab), C32e(0x7c7cbac6), C32e(0x71712657), C32e(0xcccc29e5), + C32e(0x9090e373), C32e(0x0606090f), C32e(0xf7f7f403), C32e(0x1c1c2a36), + C32e(0xc2c23cfe), C32e(0x6a6a8be1), C32e(0xaeaebe10), C32e(0x6969026b), + C32e(0x1717bfa8), C32e(0x999971e8), C32e(0x3a3a5369), C32e(0x2727f7d0), + C32e(0xd9d99148), C32e(0xebebde35), C32e(0x2b2be5ce), C32e(0x22227755), + C32e(0xd2d204d6), C32e(0xa9a93990), C32e(0x07078780), C32e(0x3333c1f2), + C32e(0x2d2decc1), C32e(0x3c3c5a66), C32e(0x1515b8ad), C32e(0xc9c9a960), + C32e(0x87875cdb), C32e(0xaaaab01a), C32e(0x5050d888), C32e(0xa5a52b8e), + C32e(0x0303898a), C32e(0x59594a13), C32e(0x0909929b), C32e(0x1a1a2339), + C32e(0x65651075), C32e(0xd7d78453), C32e(0x8484d551), C32e(0xd0d003d3), + C32e(0x8282dc5e), C32e(0x2929e2cb), C32e(0x5a5ac399), C32e(0x1e1e2d33), + C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e) +}; + +static const sph_u32 T1dn[] = { + C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d), + C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954), + C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d), + C32e(0x192bd519), C32e(0x62a67162), C32e(0xe6319ae6), C32e(0x9ab5c39a), + C32e(0x45cf0545), C32e(0x9dbc3e9d), C32e(0x40c00940), C32e(0x8792ef87), + C32e(0x153fc515), C32e(0xeb267feb), C32e(0xc94007c9), C32e(0x0b1ded0b), + C32e(0xec2f82ec), C32e(0x67a97d67), C32e(0xfd1cbefd), C32e(0xea258aea), + C32e(0xbfda46bf), C32e(0xf702a6f7), C32e(0x96a1d396), C32e(0x5bed2d5b), + C32e(0xc25deac2), C32e(0x1c24d91c), C32e(0xaee97aae), C32e(0x6abe986a), + C32e(0x5aeed85a), C32e(0x41c3fc41), C32e(0x0206f102), C32e(0x4fd11d4f), + C32e(0x5ce4d05c), C32e(0xf407a2f4), C32e(0x345cb934), C32e(0x0818e908), + C32e(0x93aedf93), C32e(0x73954d73), C32e(0x53f5c453), C32e(0x3f41543f), + C32e(0x0c14100c), C32e(0x52f63152), C32e(0x65af8c65), C32e(0x5ee2215e), + C32e(0x28786028), C32e(0xa1f86ea1), C32e(0x0f11140f), C32e(0xb5c45eb5), + C32e(0x091b1c09), C32e(0x365a4836), C32e(0x9bb6369b), C32e(0x3d47a53d), + C32e(0x266a8126), C32e(0x69bb9c69), C32e(0xcd4cfecd), C32e(0x9fbacf9f), + C32e(0x1b2d241b), C32e(0x9eb93a9e), C32e(0x749cb074), C32e(0x2e72682e), + C32e(0x2d776c2d), C32e(0xb2cda3b2), C32e(0xee2973ee), C32e(0xfb16b6fb), + C32e(0xf60153f6), C32e(0x4dd7ec4d), C32e(0x61a37561), C32e(0xce49face), + C32e(0x7b8da47b), C32e(0x3e42a13e), C32e(0x7193bc71), C32e(0x97a22697), + C32e(0xf50457f5), C32e(0x68b86968), C32e(0x00000000), C32e(0x2c74992c), + C32e(0x60a08060), C32e(0x1f21dd1f), C32e(0xc843f2c8), C32e(0xed2c77ed), + C32e(0xbed9b3be), C32e(0x46ca0146), C32e(0xd970ced9), C32e(0x4bdde44b), + C32e(0xde7933de), C32e(0xd4672bd4), C32e(0xe8237be8), C32e(0x4ade114a), + C32e(0x6bbd6d6b), C32e(0x2a7e912a), C32e(0xe5349ee5), C32e(0x163ac116), + C32e(0xc55417c5), C32e(0xd7622fd7), C32e(0x55ffcc55), C32e(0x94a72294), + C32e(0xcf4a0fcf), C32e(0x1030c910), C32e(0x060a0806), C32e(0x8198e781), + C32e(0xf00b5bf0), C32e(0x44ccf044), C32e(0xbad54aba), C32e(0xe33e96e3), + C32e(0xf30e5ff3), C32e(0xfe19bafe), C32e(0xc05b1bc0), C32e(0x8a850a8a), + C32e(0xadec7ead), C32e(0xbcdf42bc), C32e(0x48d8e048), C32e(0x040cf904), + C32e(0xdf7ac6df), C32e(0xc158eec1), C32e(0x759f4575), C32e(0x63a58463), + C32e(0x30504030), C32e(0x1a2ed11a), C32e(0x0e12e10e), C32e(0x6db7656d), + C32e(0x4cd4194c), C32e(0x143c3014), C32e(0x355f4c35), C32e(0x2f719d2f), + C32e(0xe13867e1), C32e(0xa2fd6aa2), C32e(0xcc4f0bcc), C32e(0x394b5c39), + C32e(0x57f93d57), C32e(0xf20daaf2), C32e(0x829de382), C32e(0x47c9f447), + C32e(0xacef8bac), C32e(0xe7326fe7), C32e(0x2b7d642b), C32e(0x95a4d795), + C32e(0xa0fb9ba0), C32e(0x98b33298), C32e(0xd16827d1), C32e(0x7f815d7f), + C32e(0x66aa8866), C32e(0x7e82a87e), C32e(0xabe676ab), C32e(0x839e1683), + C32e(0xca4503ca), C32e(0x297b9529), C32e(0xd36ed6d3), C32e(0x3c44503c), + C32e(0x798b5579), C32e(0xe23d63e2), C32e(0x1d272c1d), C32e(0x769a4176), + C32e(0x3b4dad3b), C32e(0x56fac856), C32e(0x4ed2e84e), C32e(0x1e22281e), + C32e(0xdb763fdb), C32e(0x0a1e180a), C32e(0x6cb4906c), C32e(0xe4376be4), + C32e(0x5de7255d), C32e(0x6eb2616e), C32e(0xef2a86ef), C32e(0xa6f193a6), + C32e(0xa8e372a8), C32e(0xa4f762a4), C32e(0x3759bd37), C32e(0x8b86ff8b), + C32e(0x3256b132), C32e(0x43c50d43), C32e(0x59ebdc59), C32e(0xb7c2afb7), + C32e(0x8c8f028c), C32e(0x64ac7964), C32e(0xd26d23d2), C32e(0xe03b92e0), + C32e(0xb4c7abb4), C32e(0xfa1543fa), C32e(0x0709fd07), C32e(0x256f8525), + C32e(0xafea8faf), C32e(0x8e89f38e), C32e(0xe9208ee9), C32e(0x18282018), + C32e(0xd564ded5), C32e(0x8883fb88), C32e(0x6fb1946f), C32e(0x7296b872), + C32e(0x246c7024), C32e(0xf108aef1), C32e(0xc752e6c7), C32e(0x51f33551), + C32e(0x23658d23), C32e(0x7c84597c), C32e(0x9cbfcb9c), C32e(0x21637c21), + C32e(0xdd7c37dd), C32e(0xdc7fc2dc), C32e(0x86911a86), C32e(0x85941e85), + C32e(0x90abdb90), C32e(0x42c6f842), C32e(0xc457e2c4), C32e(0xaae583aa), + C32e(0xd8733bd8), C32e(0x050f0c05), C32e(0x0103f501), C32e(0x12363812), + C32e(0xa3fe9fa3), C32e(0x5fe1d45f), C32e(0xf91047f9), C32e(0xd06bd2d0), + C32e(0x91a82e91), C32e(0x58e82958), C32e(0x27697427), C32e(0xb9d04eb9), + C32e(0x3848a938), C32e(0x1335cd13), C32e(0xb3ce56b3), C32e(0x33554433), + C32e(0xbbd6bfbb), C32e(0x70904970), C32e(0x89800e89), C32e(0xa7f266a7), + C32e(0xb6c15ab6), C32e(0x22667822), C32e(0x92ad2a92), C32e(0x20608920), + C32e(0x49db1549), C32e(0xff1a4fff), C32e(0x7888a078), C32e(0x7a8e517a), + C32e(0x8f8a068f), C32e(0xf813b2f8), C32e(0x809b1280), C32e(0x17393417), + C32e(0xda75cada), C32e(0x3153b531), C32e(0xc65113c6), C32e(0xb8d3bbb8), + C32e(0xc35e1fc3), C32e(0xb0cb52b0), C32e(0x7799b477), C32e(0x11333c11), + C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a) +}; + +static const sph_u32 T2up[] = { + C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a), + C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d), + C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1), + C32e(0x19e7e7cc), C32e(0x62b5b513), C32e(0xe64d4d7c), C32e(0x9aecec59), + C32e(0x458f8f40), C32e(0x9d1f1fa3), C32e(0x40898949), C32e(0x87fafa68), + C32e(0x15efefd0), C32e(0xebb2b294), C32e(0xc98e8ece), C32e(0x0bfbfbe6), + C32e(0xec41416e), C32e(0x67b3b31a), C32e(0xfd5f5f43), C32e(0xea454560), + C32e(0xbf2323f9), C32e(0xf7535351), C32e(0x96e4e445), C32e(0x5b9b9b76), + C32e(0xc2757528), C32e(0x1ce1e1c5), C32e(0xae3d3dd4), C32e(0x6a4c4cf2), + C32e(0x5a6c6c82), C32e(0x417e7ebd), C32e(0x02f5f5f3), C32e(0x4f838352), + C32e(0x5c68688c), C32e(0xf4515156), C32e(0x34d1d18d), C32e(0x08f9f9e1), + C32e(0x93e2e24c), C32e(0x73abab3e), C32e(0x53626297), C32e(0x3f2a2a6b), + C32e(0x0c08081c), C32e(0x52959563), C32e(0x654646e9), C32e(0x5e9d9d7f), + C32e(0x28303048), C32e(0xa13737cf), C32e(0x0f0a0a1b), C32e(0xb52f2feb), + C32e(0x090e0e15), C32e(0x3624247e), C32e(0x9b1b1bad), C32e(0x3ddfdf98), + C32e(0x26cdcda7), C32e(0x694e4ef5), C32e(0xcd7f7f33), C32e(0x9feaea50), + C32e(0x1b12123f), C32e(0x9e1d1da4), C32e(0x745858c4), C32e(0x2e343446), + C32e(0x2d363641), C32e(0xb2dcdc11), C32e(0xeeb4b49d), C32e(0xfb5b5b4d), + C32e(0xf6a4a4a5), C32e(0x4d7676a1), C32e(0x61b7b714), C32e(0xce7d7d34), + C32e(0x7b5252df), C32e(0x3edddd9f), C32e(0x715e5ecd), C32e(0x971313b1), + C32e(0xf5a6a6a2), C32e(0x68b9b901), C32e(0x00000000), C32e(0x2cc1c1b5), + C32e(0x604040e0), C32e(0x1fe3e3c2), C32e(0xc879793a), C32e(0xedb6b69a), + C32e(0xbed4d40d), C32e(0x468d8d47), C32e(0xd9676717), C32e(0x4b7272af), + C32e(0xde9494ed), C32e(0xd49898ff), C32e(0xe8b0b093), C32e(0x4a85855b), + C32e(0x6bbbbb06), C32e(0x2ac5c5bb), C32e(0xe54f4f7b), C32e(0x16ededd7), + C32e(0xc58686d2), C32e(0xd79a9af8), C32e(0x55666699), C32e(0x941111b6), + C32e(0xcf8a8ac0), C32e(0x10e9e9d9), C32e(0x0604040e), C32e(0x81fefe66), + C32e(0xf0a0a0ab), C32e(0x447878b4), C32e(0xba2525f0), C32e(0xe34b4b75), + C32e(0xf3a2a2ac), C32e(0xfe5d5d44), C32e(0xc08080db), C32e(0x8a050580), + C32e(0xad3f3fd3), C32e(0xbc2121fe), C32e(0x487070a8), C32e(0x04f1f1fd), + C32e(0xdf636319), C32e(0xc177772f), C32e(0x75afaf30), C32e(0x634242e7), + C32e(0x30202070), C32e(0x1ae5e5cb), C32e(0x0efdfdef), C32e(0x6dbfbf08), + C32e(0x4c818155), C32e(0x14181824), C32e(0x35262679), C32e(0x2fc3c3b2), + C32e(0xe1bebe86), C32e(0xa23535c8), C32e(0xcc8888c7), C32e(0x392e2e65), + C32e(0x5793936a), C32e(0xf2555558), C32e(0x82fcfc61), C32e(0x477a7ab3), + C32e(0xacc8c827), C32e(0xe7baba88), C32e(0x2b32324f), C32e(0x95e6e642), + C32e(0xa0c0c03b), C32e(0x981919aa), C32e(0xd19e9ef6), C32e(0x7fa3a322), + C32e(0x664444ee), C32e(0x7e5454d6), C32e(0xab3b3bdd), C32e(0x830b0b95), + C32e(0xca8c8cc9), C32e(0x29c7c7bc), C32e(0xd36b6b05), C32e(0x3c28286c), + C32e(0x79a7a72c), C32e(0xe2bcbc81), C32e(0x1d161631), C32e(0x76adad37), + C32e(0x3bdbdb96), C32e(0x5664649e), C32e(0x4e7474a6), C32e(0x1e141436), + C32e(0xdb9292e4), C32e(0x0a0c0c12), C32e(0x6c4848fc), C32e(0xe4b8b88f), + C32e(0x5d9f9f78), C32e(0x6ebdbd0f), C32e(0xef434369), C32e(0xa6c4c435), + C32e(0xa83939da), C32e(0xa43131c6), C32e(0x37d3d38a), C32e(0x8bf2f274), + C32e(0x32d5d583), C32e(0x438b8b4e), C32e(0x596e6e85), C32e(0xb7dada18), + C32e(0x8c01018e), C32e(0x64b1b11d), C32e(0xd29c9cf1), C32e(0xe0494972), + C32e(0xb4d8d81f), C32e(0xfaacacb9), C32e(0x07f3f3fa), C32e(0x25cfcfa0), + C32e(0xafcaca20), C32e(0x8ef4f47d), C32e(0xe9474767), C32e(0x18101038), + C32e(0xd56f6f0b), C32e(0x88f0f073), C32e(0x6f4a4afb), C32e(0x725c5cca), + C32e(0x24383854), C32e(0xf157575f), C32e(0xc7737321), C32e(0x51979764), + C32e(0x23cbcbae), C32e(0x7ca1a125), C32e(0x9ce8e857), C32e(0x213e3e5d), + C32e(0xdd9696ea), C32e(0xdc61611e), C32e(0x860d0d9c), C32e(0x850f0f9b), + C32e(0x90e0e04b), C32e(0x427c7cba), C32e(0xc4717126), C32e(0xaacccc29), + C32e(0xd89090e3), C32e(0x05060609), C32e(0x01f7f7f4), C32e(0x121c1c2a), + C32e(0xa3c2c23c), C32e(0x5f6a6a8b), C32e(0xf9aeaebe), C32e(0xd0696902), + C32e(0x911717bf), C32e(0x58999971), C32e(0x273a3a53), C32e(0xb92727f7), + C32e(0x38d9d991), C32e(0x13ebebde), C32e(0xb32b2be5), C32e(0x33222277), + C32e(0xbbd2d204), C32e(0x70a9a939), C32e(0x89070787), C32e(0xa73333c1), + C32e(0xb62d2dec), C32e(0x223c3c5a), C32e(0x921515b8), C32e(0x20c9c9a9), + C32e(0x4987875c), C32e(0xffaaaab0), C32e(0x785050d8), C32e(0x7aa5a52b), + C32e(0x8f030389), C32e(0xf859594a), C32e(0x80090992), C32e(0x171a1a23), + C32e(0xda656510), C32e(0x31d7d784), C32e(0xc68484d5), C32e(0xb8d0d003), + C32e(0xc38282dc), C32e(0xb02929e2), C32e(0x775a5ac3), C32e(0x111e1e2d), + C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62) +}; + +static const sph_u32 T2dn[] = { + C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7), + C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39), + C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac), + C32e(0x2b192bd5), C32e(0xa662a671), C32e(0x31e6319a), C32e(0xb59ab5c3), + C32e(0xcf45cf05), C32e(0xbc9dbc3e), C32e(0xc040c009), C32e(0x928792ef), + C32e(0x3f153fc5), C32e(0x26eb267f), C32e(0x40c94007), C32e(0x1d0b1ded), + C32e(0x2fec2f82), C32e(0xa967a97d), C32e(0x1cfd1cbe), C32e(0x25ea258a), + C32e(0xdabfda46), C32e(0x02f702a6), C32e(0xa196a1d3), C32e(0xed5bed2d), + C32e(0x5dc25dea), C32e(0x241c24d9), C32e(0xe9aee97a), C32e(0xbe6abe98), + C32e(0xee5aeed8), C32e(0xc341c3fc), C32e(0x060206f1), C32e(0xd14fd11d), + C32e(0xe45ce4d0), C32e(0x07f407a2), C32e(0x5c345cb9), C32e(0x180818e9), + C32e(0xae93aedf), C32e(0x9573954d), C32e(0xf553f5c4), C32e(0x413f4154), + C32e(0x140c1410), C32e(0xf652f631), C32e(0xaf65af8c), C32e(0xe25ee221), + C32e(0x78287860), C32e(0xf8a1f86e), C32e(0x110f1114), C32e(0xc4b5c45e), + C32e(0x1b091b1c), C32e(0x5a365a48), C32e(0xb69bb636), C32e(0x473d47a5), + C32e(0x6a266a81), C32e(0xbb69bb9c), C32e(0x4ccd4cfe), C32e(0xba9fbacf), + C32e(0x2d1b2d24), C32e(0xb99eb93a), C32e(0x9c749cb0), C32e(0x722e7268), + C32e(0x772d776c), C32e(0xcdb2cda3), C32e(0x29ee2973), C32e(0x16fb16b6), + C32e(0x01f60153), C32e(0xd74dd7ec), C32e(0xa361a375), C32e(0x49ce49fa), + C32e(0x8d7b8da4), C32e(0x423e42a1), C32e(0x937193bc), C32e(0xa297a226), + C32e(0x04f50457), C32e(0xb868b869), C32e(0x00000000), C32e(0x742c7499), + C32e(0xa060a080), C32e(0x211f21dd), C32e(0x43c843f2), C32e(0x2ced2c77), + C32e(0xd9bed9b3), C32e(0xca46ca01), C32e(0x70d970ce), C32e(0xdd4bdde4), + C32e(0x79de7933), C32e(0x67d4672b), C32e(0x23e8237b), C32e(0xde4ade11), + C32e(0xbd6bbd6d), C32e(0x7e2a7e91), C32e(0x34e5349e), C32e(0x3a163ac1), + C32e(0x54c55417), C32e(0x62d7622f), C32e(0xff55ffcc), C32e(0xa794a722), + C32e(0x4acf4a0f), C32e(0x301030c9), C32e(0x0a060a08), C32e(0x988198e7), + C32e(0x0bf00b5b), C32e(0xcc44ccf0), C32e(0xd5bad54a), C32e(0x3ee33e96), + C32e(0x0ef30e5f), C32e(0x19fe19ba), C32e(0x5bc05b1b), C32e(0x858a850a), + C32e(0xecadec7e), C32e(0xdfbcdf42), C32e(0xd848d8e0), C32e(0x0c040cf9), + C32e(0x7adf7ac6), C32e(0x58c158ee), C32e(0x9f759f45), C32e(0xa563a584), + C32e(0x50305040), C32e(0x2e1a2ed1), C32e(0x120e12e1), C32e(0xb76db765), + C32e(0xd44cd419), C32e(0x3c143c30), C32e(0x5f355f4c), C32e(0x712f719d), + C32e(0x38e13867), C32e(0xfda2fd6a), C32e(0x4fcc4f0b), C32e(0x4b394b5c), + C32e(0xf957f93d), C32e(0x0df20daa), C32e(0x9d829de3), C32e(0xc947c9f4), + C32e(0xefacef8b), C32e(0x32e7326f), C32e(0x7d2b7d64), C32e(0xa495a4d7), + C32e(0xfba0fb9b), C32e(0xb398b332), C32e(0x68d16827), C32e(0x817f815d), + C32e(0xaa66aa88), C32e(0x827e82a8), C32e(0xe6abe676), C32e(0x9e839e16), + C32e(0x45ca4503), C32e(0x7b297b95), C32e(0x6ed36ed6), C32e(0x443c4450), + C32e(0x8b798b55), C32e(0x3de23d63), C32e(0x271d272c), C32e(0x9a769a41), + C32e(0x4d3b4dad), C32e(0xfa56fac8), C32e(0xd24ed2e8), C32e(0x221e2228), + C32e(0x76db763f), C32e(0x1e0a1e18), C32e(0xb46cb490), C32e(0x37e4376b), + C32e(0xe75de725), C32e(0xb26eb261), C32e(0x2aef2a86), C32e(0xf1a6f193), + C32e(0xe3a8e372), C32e(0xf7a4f762), C32e(0x593759bd), C32e(0x868b86ff), + C32e(0x563256b1), C32e(0xc543c50d), C32e(0xeb59ebdc), C32e(0xc2b7c2af), + C32e(0x8f8c8f02), C32e(0xac64ac79), C32e(0x6dd26d23), C32e(0x3be03b92), + C32e(0xc7b4c7ab), C32e(0x15fa1543), C32e(0x090709fd), C32e(0x6f256f85), + C32e(0xeaafea8f), C32e(0x898e89f3), C32e(0x20e9208e), C32e(0x28182820), + C32e(0x64d564de), C32e(0x838883fb), C32e(0xb16fb194), C32e(0x967296b8), + C32e(0x6c246c70), C32e(0x08f108ae), C32e(0x52c752e6), C32e(0xf351f335), + C32e(0x6523658d), C32e(0x847c8459), C32e(0xbf9cbfcb), C32e(0x6321637c), + C32e(0x7cdd7c37), C32e(0x7fdc7fc2), C32e(0x9186911a), C32e(0x9485941e), + C32e(0xab90abdb), C32e(0xc642c6f8), C32e(0x57c457e2), C32e(0xe5aae583), + C32e(0x73d8733b), C32e(0x0f050f0c), C32e(0x030103f5), C32e(0x36123638), + C32e(0xfea3fe9f), C32e(0xe15fe1d4), C32e(0x10f91047), C32e(0x6bd06bd2), + C32e(0xa891a82e), C32e(0xe858e829), C32e(0x69276974), C32e(0xd0b9d04e), + C32e(0x483848a9), C32e(0x351335cd), C32e(0xceb3ce56), C32e(0x55335544), + C32e(0xd6bbd6bf), C32e(0x90709049), C32e(0x8089800e), C32e(0xf2a7f266), + C32e(0xc1b6c15a), C32e(0x66226678), C32e(0xad92ad2a), C32e(0x60206089), + C32e(0xdb49db15), C32e(0x1aff1a4f), C32e(0x887888a0), C32e(0x8e7a8e51), + C32e(0x8a8f8a06), C32e(0x13f813b2), C32e(0x9b809b12), C32e(0x39173934), + C32e(0x75da75ca), C32e(0x533153b5), C32e(0x51c65113), C32e(0xd3b8d3bb), + C32e(0x5ec35e1f), C32e(0xcbb0cb52), C32e(0x997799b4), C32e(0x3311333c), + C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58) +}; + +static const sph_u32 T3up[] = { + C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6), + C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191), + C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656), + C32e(0xd519e7e7), C32e(0x7162b5b5), C32e(0x9ae64d4d), C32e(0xc39aecec), + C32e(0x05458f8f), C32e(0x3e9d1f1f), C32e(0x09408989), C32e(0xef87fafa), + C32e(0xc515efef), C32e(0x7febb2b2), C32e(0x07c98e8e), C32e(0xed0bfbfb), + C32e(0x82ec4141), C32e(0x7d67b3b3), C32e(0xbefd5f5f), C32e(0x8aea4545), + C32e(0x46bf2323), C32e(0xa6f75353), C32e(0xd396e4e4), C32e(0x2d5b9b9b), + C32e(0xeac27575), C32e(0xd91ce1e1), C32e(0x7aae3d3d), C32e(0x986a4c4c), + C32e(0xd85a6c6c), C32e(0xfc417e7e), C32e(0xf102f5f5), C32e(0x1d4f8383), + C32e(0xd05c6868), C32e(0xa2f45151), C32e(0xb934d1d1), C32e(0xe908f9f9), + C32e(0xdf93e2e2), C32e(0x4d73abab), C32e(0xc4536262), C32e(0x543f2a2a), + C32e(0x100c0808), C32e(0x31529595), C32e(0x8c654646), C32e(0x215e9d9d), + C32e(0x60283030), C32e(0x6ea13737), C32e(0x140f0a0a), C32e(0x5eb52f2f), + C32e(0x1c090e0e), C32e(0x48362424), C32e(0x369b1b1b), C32e(0xa53ddfdf), + C32e(0x8126cdcd), C32e(0x9c694e4e), C32e(0xfecd7f7f), C32e(0xcf9feaea), + C32e(0x241b1212), C32e(0x3a9e1d1d), C32e(0xb0745858), C32e(0x682e3434), + C32e(0x6c2d3636), C32e(0xa3b2dcdc), C32e(0x73eeb4b4), C32e(0xb6fb5b5b), + C32e(0x53f6a4a4), C32e(0xec4d7676), C32e(0x7561b7b7), C32e(0xface7d7d), + C32e(0xa47b5252), C32e(0xa13edddd), C32e(0xbc715e5e), C32e(0x26971313), + C32e(0x57f5a6a6), C32e(0x6968b9b9), C32e(0x00000000), C32e(0x992cc1c1), + C32e(0x80604040), C32e(0xdd1fe3e3), C32e(0xf2c87979), C32e(0x77edb6b6), + C32e(0xb3bed4d4), C32e(0x01468d8d), C32e(0xced96767), C32e(0xe44b7272), + C32e(0x33de9494), C32e(0x2bd49898), C32e(0x7be8b0b0), C32e(0x114a8585), + C32e(0x6d6bbbbb), C32e(0x912ac5c5), C32e(0x9ee54f4f), C32e(0xc116eded), + C32e(0x17c58686), C32e(0x2fd79a9a), C32e(0xcc556666), C32e(0x22941111), + C32e(0x0fcf8a8a), C32e(0xc910e9e9), C32e(0x08060404), C32e(0xe781fefe), + C32e(0x5bf0a0a0), C32e(0xf0447878), C32e(0x4aba2525), C32e(0x96e34b4b), + C32e(0x5ff3a2a2), C32e(0xbafe5d5d), C32e(0x1bc08080), C32e(0x0a8a0505), + C32e(0x7ead3f3f), C32e(0x42bc2121), C32e(0xe0487070), C32e(0xf904f1f1), + C32e(0xc6df6363), C32e(0xeec17777), C32e(0x4575afaf), C32e(0x84634242), + C32e(0x40302020), C32e(0xd11ae5e5), C32e(0xe10efdfd), C32e(0x656dbfbf), + C32e(0x194c8181), C32e(0x30141818), C32e(0x4c352626), C32e(0x9d2fc3c3), + C32e(0x67e1bebe), C32e(0x6aa23535), C32e(0x0bcc8888), C32e(0x5c392e2e), + C32e(0x3d579393), C32e(0xaaf25555), C32e(0xe382fcfc), C32e(0xf4477a7a), + C32e(0x8bacc8c8), C32e(0x6fe7baba), C32e(0x642b3232), C32e(0xd795e6e6), + C32e(0x9ba0c0c0), C32e(0x32981919), C32e(0x27d19e9e), C32e(0x5d7fa3a3), + C32e(0x88664444), C32e(0xa87e5454), C32e(0x76ab3b3b), C32e(0x16830b0b), + C32e(0x03ca8c8c), C32e(0x9529c7c7), C32e(0xd6d36b6b), C32e(0x503c2828), + C32e(0x5579a7a7), C32e(0x63e2bcbc), C32e(0x2c1d1616), C32e(0x4176adad), + C32e(0xad3bdbdb), C32e(0xc8566464), C32e(0xe84e7474), C32e(0x281e1414), + C32e(0x3fdb9292), C32e(0x180a0c0c), C32e(0x906c4848), C32e(0x6be4b8b8), + C32e(0x255d9f9f), C32e(0x616ebdbd), C32e(0x86ef4343), C32e(0x93a6c4c4), + C32e(0x72a83939), C32e(0x62a43131), C32e(0xbd37d3d3), C32e(0xff8bf2f2), + C32e(0xb132d5d5), C32e(0x0d438b8b), C32e(0xdc596e6e), C32e(0xafb7dada), + C32e(0x028c0101), C32e(0x7964b1b1), C32e(0x23d29c9c), C32e(0x92e04949), + C32e(0xabb4d8d8), C32e(0x43faacac), C32e(0xfd07f3f3), C32e(0x8525cfcf), + C32e(0x8fafcaca), C32e(0xf38ef4f4), C32e(0x8ee94747), C32e(0x20181010), + C32e(0xded56f6f), C32e(0xfb88f0f0), C32e(0x946f4a4a), C32e(0xb8725c5c), + C32e(0x70243838), C32e(0xaef15757), C32e(0xe6c77373), C32e(0x35519797), + C32e(0x8d23cbcb), C32e(0x597ca1a1), C32e(0xcb9ce8e8), C32e(0x7c213e3e), + C32e(0x37dd9696), C32e(0xc2dc6161), C32e(0x1a860d0d), C32e(0x1e850f0f), + C32e(0xdb90e0e0), C32e(0xf8427c7c), C32e(0xe2c47171), C32e(0x83aacccc), + C32e(0x3bd89090), C32e(0x0c050606), C32e(0xf501f7f7), C32e(0x38121c1c), + C32e(0x9fa3c2c2), C32e(0xd45f6a6a), C32e(0x47f9aeae), C32e(0xd2d06969), + C32e(0x2e911717), C32e(0x29589999), C32e(0x74273a3a), C32e(0x4eb92727), + C32e(0xa938d9d9), C32e(0xcd13ebeb), C32e(0x56b32b2b), C32e(0x44332222), + C32e(0xbfbbd2d2), C32e(0x4970a9a9), C32e(0x0e890707), C32e(0x66a73333), + C32e(0x5ab62d2d), C32e(0x78223c3c), C32e(0x2a921515), C32e(0x8920c9c9), + C32e(0x15498787), C32e(0x4fffaaaa), C32e(0xa0785050), C32e(0x517aa5a5), + C32e(0x068f0303), C32e(0xb2f85959), C32e(0x12800909), C32e(0x34171a1a), + C32e(0xcada6565), C32e(0xb531d7d7), C32e(0x13c68484), C32e(0xbbb8d0d0), + C32e(0x1fc38282), C32e(0x52b02929), C32e(0xb4775a5a), C32e(0x3c111e1e), + C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c) +}; + +static const sph_u32 T3dn[] = { + C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c), + C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc), + C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87), + C32e(0xcc2b192b), C32e(0x13a662a6), C32e(0x7c31e631), C32e(0x59b59ab5), + C32e(0x40cf45cf), C32e(0xa3bc9dbc), C32e(0x49c040c0), C32e(0x68928792), + C32e(0xd03f153f), C32e(0x9426eb26), C32e(0xce40c940), C32e(0xe61d0b1d), + C32e(0x6e2fec2f), C32e(0x1aa967a9), C32e(0x431cfd1c), C32e(0x6025ea25), + C32e(0xf9dabfda), C32e(0x5102f702), C32e(0x45a196a1), C32e(0x76ed5bed), + C32e(0x285dc25d), C32e(0xc5241c24), C32e(0xd4e9aee9), C32e(0xf2be6abe), + C32e(0x82ee5aee), C32e(0xbdc341c3), C32e(0xf3060206), C32e(0x52d14fd1), + C32e(0x8ce45ce4), C32e(0x5607f407), C32e(0x8d5c345c), C32e(0xe1180818), + C32e(0x4cae93ae), C32e(0x3e957395), C32e(0x97f553f5), C32e(0x6b413f41), + C32e(0x1c140c14), C32e(0x63f652f6), C32e(0xe9af65af), C32e(0x7fe25ee2), + C32e(0x48782878), C32e(0xcff8a1f8), C32e(0x1b110f11), C32e(0xebc4b5c4), + C32e(0x151b091b), C32e(0x7e5a365a), C32e(0xadb69bb6), C32e(0x98473d47), + C32e(0xa76a266a), C32e(0xf5bb69bb), C32e(0x334ccd4c), C32e(0x50ba9fba), + C32e(0x3f2d1b2d), C32e(0xa4b99eb9), C32e(0xc49c749c), C32e(0x46722e72), + C32e(0x41772d77), C32e(0x11cdb2cd), C32e(0x9d29ee29), C32e(0x4d16fb16), + C32e(0xa501f601), C32e(0xa1d74dd7), C32e(0x14a361a3), C32e(0x3449ce49), + C32e(0xdf8d7b8d), C32e(0x9f423e42), C32e(0xcd937193), C32e(0xb1a297a2), + C32e(0xa204f504), C32e(0x01b868b8), C32e(0x00000000), C32e(0xb5742c74), + C32e(0xe0a060a0), C32e(0xc2211f21), C32e(0x3a43c843), C32e(0x9a2ced2c), + C32e(0x0dd9bed9), C32e(0x47ca46ca), C32e(0x1770d970), C32e(0xafdd4bdd), + C32e(0xed79de79), C32e(0xff67d467), C32e(0x9323e823), C32e(0x5bde4ade), + C32e(0x06bd6bbd), C32e(0xbb7e2a7e), C32e(0x7b34e534), C32e(0xd73a163a), + C32e(0xd254c554), C32e(0xf862d762), C32e(0x99ff55ff), C32e(0xb6a794a7), + C32e(0xc04acf4a), C32e(0xd9301030), C32e(0x0e0a060a), C32e(0x66988198), + C32e(0xab0bf00b), C32e(0xb4cc44cc), C32e(0xf0d5bad5), C32e(0x753ee33e), + C32e(0xac0ef30e), C32e(0x4419fe19), C32e(0xdb5bc05b), C32e(0x80858a85), + C32e(0xd3ecadec), C32e(0xfedfbcdf), C32e(0xa8d848d8), C32e(0xfd0c040c), + C32e(0x197adf7a), C32e(0x2f58c158), C32e(0x309f759f), C32e(0xe7a563a5), + C32e(0x70503050), C32e(0xcb2e1a2e), C32e(0xef120e12), C32e(0x08b76db7), + C32e(0x55d44cd4), C32e(0x243c143c), C32e(0x795f355f), C32e(0xb2712f71), + C32e(0x8638e138), C32e(0xc8fda2fd), C32e(0xc74fcc4f), C32e(0x654b394b), + C32e(0x6af957f9), C32e(0x580df20d), C32e(0x619d829d), C32e(0xb3c947c9), + C32e(0x27efacef), C32e(0x8832e732), C32e(0x4f7d2b7d), C32e(0x42a495a4), + C32e(0x3bfba0fb), C32e(0xaab398b3), C32e(0xf668d168), C32e(0x22817f81), + C32e(0xeeaa66aa), C32e(0xd6827e82), C32e(0xdde6abe6), C32e(0x959e839e), + C32e(0xc945ca45), C32e(0xbc7b297b), C32e(0x056ed36e), C32e(0x6c443c44), + C32e(0x2c8b798b), C32e(0x813de23d), C32e(0x31271d27), C32e(0x379a769a), + C32e(0x964d3b4d), C32e(0x9efa56fa), C32e(0xa6d24ed2), C32e(0x36221e22), + C32e(0xe476db76), C32e(0x121e0a1e), C32e(0xfcb46cb4), C32e(0x8f37e437), + C32e(0x78e75de7), C32e(0x0fb26eb2), C32e(0x692aef2a), C32e(0x35f1a6f1), + C32e(0xdae3a8e3), C32e(0xc6f7a4f7), C32e(0x8a593759), C32e(0x74868b86), + C32e(0x83563256), C32e(0x4ec543c5), C32e(0x85eb59eb), C32e(0x18c2b7c2), + C32e(0x8e8f8c8f), C32e(0x1dac64ac), C32e(0xf16dd26d), C32e(0x723be03b), + C32e(0x1fc7b4c7), C32e(0xb915fa15), C32e(0xfa090709), C32e(0xa06f256f), + C32e(0x20eaafea), C32e(0x7d898e89), C32e(0x6720e920), C32e(0x38281828), + C32e(0x0b64d564), C32e(0x73838883), C32e(0xfbb16fb1), C32e(0xca967296), + C32e(0x546c246c), C32e(0x5f08f108), C32e(0x2152c752), C32e(0x64f351f3), + C32e(0xae652365), C32e(0x25847c84), C32e(0x57bf9cbf), C32e(0x5d632163), + C32e(0xea7cdd7c), C32e(0x1e7fdc7f), C32e(0x9c918691), C32e(0x9b948594), + C32e(0x4bab90ab), C32e(0xbac642c6), C32e(0x2657c457), C32e(0x29e5aae5), + C32e(0xe373d873), C32e(0x090f050f), C32e(0xf4030103), C32e(0x2a361236), + C32e(0x3cfea3fe), C32e(0x8be15fe1), C32e(0xbe10f910), C32e(0x026bd06b), + C32e(0xbfa891a8), C32e(0x71e858e8), C32e(0x53692769), C32e(0xf7d0b9d0), + C32e(0x91483848), C32e(0xde351335), C32e(0xe5ceb3ce), C32e(0x77553355), + C32e(0x04d6bbd6), C32e(0x39907090), C32e(0x87808980), C32e(0xc1f2a7f2), + C32e(0xecc1b6c1), C32e(0x5a662266), C32e(0xb8ad92ad), C32e(0xa9602060), + C32e(0x5cdb49db), C32e(0xb01aff1a), C32e(0xd8887888), C32e(0x2b8e7a8e), + C32e(0x898a8f8a), C32e(0x4a13f813), C32e(0x929b809b), C32e(0x23391739), + C32e(0x1075da75), C32e(0x84533153), C32e(0xd551c651), C32e(0x03d3b8d3), + C32e(0xdc5ec35e), C32e(0xe2cbb0cb), C32e(0xc3997799), C32e(0x2d331133), + C32e(0x3d46cb46), C32e(0xb71ffc1f), C32e(0x0c61d661), C32e(0x624e3a4e) +}; + +#define DECL_STATE_SMALL \ + sph_u32 H[16]; + +#define READ_STATE_SMALL(sc) do { \ + memcpy(H, (sc)->state.narrow, sizeof H); \ + } while (0) + +#define WRITE_STATE_SMALL(sc) do { \ + memcpy((sc)->state.narrow, H, sizeof H); \ + } while (0) + +#define XCAT(x, y) XCAT_(x, y) +#define XCAT_(x, y) x ## y + +#define RSTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d0] = T0up[B32_0(a[b0])] \ + ^ T1up[B32_1(a[b1])] \ + ^ T2up[B32_2(a[b2])] \ + ^ T3up[B32_3(a[b3])] \ + ^ T0dn[B32_0(a[b4])] \ + ^ T1dn[B32_1(a[b5])] \ + ^ T2dn[B32_2(a[b6])] \ + ^ T3dn[B32_3(a[b7])]; \ + t[d1] = T0dn[B32_0(a[b0])] \ + ^ T1dn[B32_1(a[b1])] \ + ^ T2dn[B32_2(a[b2])] \ + ^ T3dn[B32_3(a[b3])] \ + ^ T0up[B32_0(a[b4])] \ + ^ T1up[B32_1(a[b5])] \ + ^ T2up[B32_2(a[b6])] \ + ^ T3up[B32_3(a[b7])]; \ + } while (0) + +#define ROUND_SMALL_P(a, r) do { \ + sph_u32 t[16]; \ + a[0x0] ^= PC32up(0x00, r); \ + a[0x1] ^= PC32dn(0x00, r); \ + a[0x2] ^= PC32up(0x10, r); \ + a[0x3] ^= PC32dn(0x10, r); \ + a[0x4] ^= PC32up(0x20, r); \ + a[0x5] ^= PC32dn(0x20, r); \ + a[0x6] ^= PC32up(0x30, r); \ + a[0x7] ^= PC32dn(0x30, r); \ + a[0x8] ^= PC32up(0x40, r); \ + a[0x9] ^= PC32dn(0x40, r); \ + a[0xA] ^= PC32up(0x50, r); \ + a[0xB] ^= PC32dn(0x50, r); \ + a[0xC] ^= PC32up(0x60, r); \ + a[0xD] ^= PC32dn(0x60, r); \ + a[0xE] ^= PC32up(0x70, r); \ + a[0xF] ^= PC32dn(0x70, r); \ + RSTT(0x0, 0x1, a, 0x0, 0x2, 0x4, 0x6, 0x9, 0xB, 0xD, 0xF); \ + RSTT(0x2, 0x3, a, 0x2, 0x4, 0x6, 0x8, 0xB, 0xD, 0xF, 0x1); \ + RSTT(0x4, 0x5, a, 0x4, 0x6, 0x8, 0xA, 0xD, 0xF, 0x1, 0x3); \ + RSTT(0x6, 0x7, a, 0x6, 0x8, 0xA, 0xC, 0xF, 0x1, 0x3, 0x5); \ + RSTT(0x8, 0x9, a, 0x8, 0xA, 0xC, 0xE, 0x1, 0x3, 0x5, 0x7); \ + RSTT(0xA, 0xB, a, 0xA, 0xC, 0xE, 0x0, 0x3, 0x5, 0x7, 0x9); \ + RSTT(0xC, 0xD, a, 0xC, 0xE, 0x0, 0x2, 0x5, 0x7, 0x9, 0xB); \ + RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD); \ + memcpy(a, t, sizeof t); \ + } while (0) + +#define ROUND_SMALL_Q(a, r) do { \ + sph_u32 t[16]; \ + a[0x0] ^= QC32up(0x00, r); \ + a[0x1] ^= QC32dn(0x00, r); \ + a[0x2] ^= QC32up(0x10, r); \ + a[0x3] ^= QC32dn(0x10, r); \ + a[0x4] ^= QC32up(0x20, r); \ + a[0x5] ^= QC32dn(0x20, r); \ + a[0x6] ^= QC32up(0x30, r); \ + a[0x7] ^= QC32dn(0x30, r); \ + a[0x8] ^= QC32up(0x40, r); \ + a[0x9] ^= QC32dn(0x40, r); \ + a[0xA] ^= QC32up(0x50, r); \ + a[0xB] ^= QC32dn(0x50, r); \ + a[0xC] ^= QC32up(0x60, r); \ + a[0xD] ^= QC32dn(0x60, r); \ + a[0xE] ^= QC32up(0x70, r); \ + a[0xF] ^= QC32dn(0x70, r); \ + RSTT(0x0, 0x1, a, 0x2, 0x6, 0xA, 0xE, 0x1, 0x5, 0x9, 0xD); \ + RSTT(0x2, 0x3, a, 0x4, 0x8, 0xC, 0x0, 0x3, 0x7, 0xB, 0xF); \ + RSTT(0x4, 0x5, a, 0x6, 0xA, 0xE, 0x2, 0x5, 0x9, 0xD, 0x1); \ + RSTT(0x6, 0x7, a, 0x8, 0xC, 0x0, 0x4, 0x7, 0xB, 0xF, 0x3); \ + RSTT(0x8, 0x9, a, 0xA, 0xE, 0x2, 0x6, 0x9, 0xD, 0x1, 0x5); \ + RSTT(0xA, 0xB, a, 0xC, 0x0, 0x4, 0x8, 0xB, 0xF, 0x3, 0x7); \ + RSTT(0xC, 0xD, a, 0xE, 0x2, 0x6, 0xA, 0xD, 0x1, 0x5, 0x9); \ + RSTT(0xE, 0xF, a, 0x0, 0x4, 0x8, 0xC, 0xF, 0x3, 0x7, 0xB); \ + memcpy(a, t, sizeof t); \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define PERM_SMALL_P(a) do { \ + int r; \ + for (r = 0; r < 10; r ++) \ + ROUND_SMALL_P(a, r); \ + } while (0) + +#define PERM_SMALL_Q(a) do { \ + int r; \ + for (r = 0; r < 10; r ++) \ + ROUND_SMALL_Q(a, r); \ + } while (0) + +#else + +#define PERM_SMALL_P(a) do { \ + int r; \ + for (r = 0; r < 10; r += 2) { \ + ROUND_SMALL_P(a, r + 0); \ + ROUND_SMALL_P(a, r + 1); \ + } \ + } while (0) + +#define PERM_SMALL_Q(a) do { \ + int r; \ + for (r = 0; r < 10; r += 2) { \ + ROUND_SMALL_Q(a, r + 0); \ + ROUND_SMALL_Q(a, r + 1); \ + } \ + } while (0) + +#endif + +#define COMPRESS_SMALL do { \ + sph_u32 g[16], m[16]; \ + size_t u; \ + for (u = 0; u < 16; u ++) { \ + m[u] = dec32e_aligned(buf + (u << 2)); \ + g[u] = m[u] ^ H[u]; \ + } \ + PERM_SMALL_P(g); \ + PERM_SMALL_Q(m); \ + for (u = 0; u < 16; u ++) \ + H[u] ^= g[u] ^ m[u]; \ + } while (0) + +#define FINAL_SMALL do { \ + sph_u32 x[16]; \ + size_t u; \ + memcpy(x, H, sizeof x); \ + PERM_SMALL_P(x); \ + for (u = 0; u < 16; u ++) \ + H[u] ^= x[u]; \ + } while (0) + +#define DECL_STATE_BIG \ + sph_u32 H[32]; + +#define READ_STATE_BIG(sc) do { \ + memcpy(H, (sc)->state.narrow, sizeof H); \ + } while (0) + +#define WRITE_STATE_BIG(sc) do { \ + memcpy((sc)->state.narrow, H, sizeof H); \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define RBTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + sph_u32 fu2 = T0up[B32_2(a[b2])]; \ + sph_u32 fd2 = T0dn[B32_2(a[b2])]; \ + sph_u32 fu3 = T1up[B32_3(a[b3])]; \ + sph_u32 fd3 = T1dn[B32_3(a[b3])]; \ + sph_u32 fu6 = T0up[B32_2(a[b6])]; \ + sph_u32 fd6 = T0dn[B32_2(a[b6])]; \ + sph_u32 fu7 = T1up[B32_3(a[b7])]; \ + sph_u32 fd7 = T1dn[B32_3(a[b7])]; \ + t[d0] = T0up[B32_0(a[b0])] \ + ^ T1up[B32_1(a[b1])] \ + ^ R32u(fu2, fd2) \ + ^ R32u(fu3, fd3) \ + ^ T0dn[B32_0(a[b4])] \ + ^ T1dn[B32_1(a[b5])] \ + ^ R32d(fu6, fd6) \ + ^ R32d(fu7, fd7); \ + t[d1] = T0dn[B32_0(a[b0])] \ + ^ T1dn[B32_1(a[b1])] \ + ^ R32d(fu2, fd2) \ + ^ R32d(fu3, fd3) \ + ^ T0up[B32_0(a[b4])] \ + ^ T1up[B32_1(a[b5])] \ + ^ R32u(fu6, fd6) \ + ^ R32u(fu7, fd7); \ + } while (0) + +#else + +#define RBTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d0] = T0up[B32_0(a[b0])] \ + ^ T1up[B32_1(a[b1])] \ + ^ T2up[B32_2(a[b2])] \ + ^ T3up[B32_3(a[b3])] \ + ^ T0dn[B32_0(a[b4])] \ + ^ T1dn[B32_1(a[b5])] \ + ^ T2dn[B32_2(a[b6])] \ + ^ T3dn[B32_3(a[b7])]; \ + t[d1] = T0dn[B32_0(a[b0])] \ + ^ T1dn[B32_1(a[b1])] \ + ^ T2dn[B32_2(a[b2])] \ + ^ T3dn[B32_3(a[b3])] \ + ^ T0up[B32_0(a[b4])] \ + ^ T1up[B32_1(a[b5])] \ + ^ T2up[B32_2(a[b6])] \ + ^ T3up[B32_3(a[b7])]; \ + } while (0) + +#endif + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define ROUND_BIG_P(a, r) do { \ + sph_u32 t[32]; \ + size_t u; \ + a[0x00] ^= PC32up(0x00, r); \ + a[0x01] ^= PC32dn(0x00, r); \ + a[0x02] ^= PC32up(0x10, r); \ + a[0x03] ^= PC32dn(0x10, r); \ + a[0x04] ^= PC32up(0x20, r); \ + a[0x05] ^= PC32dn(0x20, r); \ + a[0x06] ^= PC32up(0x30, r); \ + a[0x07] ^= PC32dn(0x30, r); \ + a[0x08] ^= PC32up(0x40, r); \ + a[0x09] ^= PC32dn(0x40, r); \ + a[0x0A] ^= PC32up(0x50, r); \ + a[0x0B] ^= PC32dn(0x50, r); \ + a[0x0C] ^= PC32up(0x60, r); \ + a[0x0D] ^= PC32dn(0x60, r); \ + a[0x0E] ^= PC32up(0x70, r); \ + a[0x0F] ^= PC32dn(0x70, r); \ + a[0x10] ^= PC32up(0x80, r); \ + a[0x11] ^= PC32dn(0x80, r); \ + a[0x12] ^= PC32up(0x90, r); \ + a[0x13] ^= PC32dn(0x90, r); \ + a[0x14] ^= PC32up(0xA0, r); \ + a[0x15] ^= PC32dn(0xA0, r); \ + a[0x16] ^= PC32up(0xB0, r); \ + a[0x17] ^= PC32dn(0xB0, r); \ + a[0x18] ^= PC32up(0xC0, r); \ + a[0x19] ^= PC32dn(0xC0, r); \ + a[0x1A] ^= PC32up(0xD0, r); \ + a[0x1B] ^= PC32dn(0xD0, r); \ + a[0x1C] ^= PC32up(0xE0, r); \ + a[0x1D] ^= PC32dn(0xE0, r); \ + a[0x1E] ^= PC32up(0xF0, r); \ + a[0x1F] ^= PC32dn(0xF0, r); \ + for (u = 0; u < 32; u += 8) { \ + RBTT(u + 0x00, (u + 0x01) & 0x1F, a, \ + u + 0x00, (u + 0x02) & 0x1F, \ + (u + 0x04) & 0x1F, (u + 0x06) & 0x1F, \ + (u + 0x09) & 0x1F, (u + 0x0B) & 0x1F, \ + (u + 0x0D) & 0x1F, (u + 0x17) & 0x1F); \ + RBTT(u + 0x02, (u + 0x03) & 0x1F, a, \ + u + 0x02, (u + 0x04) & 0x1F, \ + (u + 0x06) & 0x1F, (u + 0x08) & 0x1F, \ + (u + 0x0B) & 0x1F, (u + 0x0D) & 0x1F, \ + (u + 0x0F) & 0x1F, (u + 0x19) & 0x1F); \ + RBTT(u + 0x04, (u + 0x05) & 0x1F, a, \ + u + 0x04, (u + 0x06) & 0x1F, \ + (u + 0x08) & 0x1F, (u + 0x0A) & 0x1F, \ + (u + 0x0D) & 0x1F, (u + 0x0F) & 0x1F, \ + (u + 0x11) & 0x1F, (u + 0x1B) & 0x1F); \ + RBTT(u + 0x06, (u + 0x07) & 0x1F, a, \ + u + 0x06, (u + 0x08) & 0x1F, \ + (u + 0x0A) & 0x1F, (u + 0x0C) & 0x1F, \ + (u + 0x0F) & 0x1F, (u + 0x11) & 0x1F, \ + (u + 0x13) & 0x1F, (u + 0x1D) & 0x1F); \ + } \ + memcpy(a, t, sizeof t); \ + } while (0) + +#define ROUND_BIG_Q(a, r) do { \ + sph_u32 t[32]; \ + size_t u; \ + a[0x00] ^= QC32up(0x00, r); \ + a[0x01] ^= QC32dn(0x00, r); \ + a[0x02] ^= QC32up(0x10, r); \ + a[0x03] ^= QC32dn(0x10, r); \ + a[0x04] ^= QC32up(0x20, r); \ + a[0x05] ^= QC32dn(0x20, r); \ + a[0x06] ^= QC32up(0x30, r); \ + a[0x07] ^= QC32dn(0x30, r); \ + a[0x08] ^= QC32up(0x40, r); \ + a[0x09] ^= QC32dn(0x40, r); \ + a[0x0A] ^= QC32up(0x50, r); \ + a[0x0B] ^= QC32dn(0x50, r); \ + a[0x0C] ^= QC32up(0x60, r); \ + a[0x0D] ^= QC32dn(0x60, r); \ + a[0x0E] ^= QC32up(0x70, r); \ + a[0x0F] ^= QC32dn(0x70, r); \ + a[0x10] ^= QC32up(0x80, r); \ + a[0x11] ^= QC32dn(0x80, r); \ + a[0x12] ^= QC32up(0x90, r); \ + a[0x13] ^= QC32dn(0x90, r); \ + a[0x14] ^= QC32up(0xA0, r); \ + a[0x15] ^= QC32dn(0xA0, r); \ + a[0x16] ^= QC32up(0xB0, r); \ + a[0x17] ^= QC32dn(0xB0, r); \ + a[0x18] ^= QC32up(0xC0, r); \ + a[0x19] ^= QC32dn(0xC0, r); \ + a[0x1A] ^= QC32up(0xD0, r); \ + a[0x1B] ^= QC32dn(0xD0, r); \ + a[0x1C] ^= QC32up(0xE0, r); \ + a[0x1D] ^= QC32dn(0xE0, r); \ + a[0x1E] ^= QC32up(0xF0, r); \ + a[0x1F] ^= QC32dn(0xF0, r); \ + for (u = 0; u < 32; u += 8) { \ + RBTT(u + 0x00, (u + 0x01) & 0x1F, a, \ + (u + 0x02) & 0x1F, (u + 0x06) & 0x1F, \ + (u + 0x0A) & 0x1F, (u + 0x16) & 0x1F, \ + (u + 0x01) & 0x1F, (u + 0x05) & 0x1F, \ + (u + 0x09) & 0x1F, (u + 0x0D) & 0x1F); \ + RBTT(u + 0x02, (u + 0x03) & 0x1F, a, \ + (u + 0x04) & 0x1F, (u + 0x08) & 0x1F, \ + (u + 0x0C) & 0x1F, (u + 0x18) & 0x1F, \ + (u + 0x03) & 0x1F, (u + 0x07) & 0x1F, \ + (u + 0x0B) & 0x1F, (u + 0x0F) & 0x1F); \ + RBTT(u + 0x04, (u + 0x05) & 0x1F, a, \ + (u + 0x06) & 0x1F, (u + 0x0A) & 0x1F, \ + (u + 0x0E) & 0x1F, (u + 0x1A) & 0x1F, \ + (u + 0x05) & 0x1F, (u + 0x09) & 0x1F, \ + (u + 0x0D) & 0x1F, (u + 0x11) & 0x1F); \ + RBTT(u + 0x06, (u + 0x07) & 0x1F, a, \ + (u + 0x08) & 0x1F, (u + 0x0C) & 0x1F, \ + (u + 0x10) & 0x1F, (u + 0x1C) & 0x1F, \ + (u + 0x07) & 0x1F, (u + 0x0B) & 0x1F, \ + (u + 0x0F) & 0x1F, (u + 0x13) & 0x1F); \ + } \ + memcpy(a, t, sizeof t); \ + } while (0) + +#else + +#define ROUND_BIG_P(a, r) do { \ + sph_u32 t[32]; \ + a[0x00] ^= PC32up(0x00, r); \ + a[0x01] ^= PC32dn(0x00, r); \ + a[0x02] ^= PC32up(0x10, r); \ + a[0x03] ^= PC32dn(0x10, r); \ + a[0x04] ^= PC32up(0x20, r); \ + a[0x05] ^= PC32dn(0x20, r); \ + a[0x06] ^= PC32up(0x30, r); \ + a[0x07] ^= PC32dn(0x30, r); \ + a[0x08] ^= PC32up(0x40, r); \ + a[0x09] ^= PC32dn(0x40, r); \ + a[0x0A] ^= PC32up(0x50, r); \ + a[0x0B] ^= PC32dn(0x50, r); \ + a[0x0C] ^= PC32up(0x60, r); \ + a[0x0D] ^= PC32dn(0x60, r); \ + a[0x0E] ^= PC32up(0x70, r); \ + a[0x0F] ^= PC32dn(0x70, r); \ + a[0x10] ^= PC32up(0x80, r); \ + a[0x11] ^= PC32dn(0x80, r); \ + a[0x12] ^= PC32up(0x90, r); \ + a[0x13] ^= PC32dn(0x90, r); \ + a[0x14] ^= PC32up(0xA0, r); \ + a[0x15] ^= PC32dn(0xA0, r); \ + a[0x16] ^= PC32up(0xB0, r); \ + a[0x17] ^= PC32dn(0xB0, r); \ + a[0x18] ^= PC32up(0xC0, r); \ + a[0x19] ^= PC32dn(0xC0, r); \ + a[0x1A] ^= PC32up(0xD0, r); \ + a[0x1B] ^= PC32dn(0xD0, r); \ + a[0x1C] ^= PC32up(0xE0, r); \ + a[0x1D] ^= PC32dn(0xE0, r); \ + a[0x1E] ^= PC32up(0xF0, r); \ + a[0x1F] ^= PC32dn(0xF0, r); \ + RBTT(0x00, 0x01, a, \ + 0x00, 0x02, 0x04, 0x06, 0x09, 0x0B, 0x0D, 0x17); \ + RBTT(0x02, 0x03, a, \ + 0x02, 0x04, 0x06, 0x08, 0x0B, 0x0D, 0x0F, 0x19); \ + RBTT(0x04, 0x05, a, \ + 0x04, 0x06, 0x08, 0x0A, 0x0D, 0x0F, 0x11, 0x1B); \ + RBTT(0x06, 0x07, a, \ + 0x06, 0x08, 0x0A, 0x0C, 0x0F, 0x11, 0x13, 0x1D); \ + RBTT(0x08, 0x09, a, \ + 0x08, 0x0A, 0x0C, 0x0E, 0x11, 0x13, 0x15, 0x1F); \ + RBTT(0x0A, 0x0B, a, \ + 0x0A, 0x0C, 0x0E, 0x10, 0x13, 0x15, 0x17, 0x01); \ + RBTT(0x0C, 0x0D, a, \ + 0x0C, 0x0E, 0x10, 0x12, 0x15, 0x17, 0x19, 0x03); \ + RBTT(0x0E, 0x0F, a, \ + 0x0E, 0x10, 0x12, 0x14, 0x17, 0x19, 0x1B, 0x05); \ + RBTT(0x10, 0x11, a, \ + 0x10, 0x12, 0x14, 0x16, 0x19, 0x1B, 0x1D, 0x07); \ + RBTT(0x12, 0x13, a, \ + 0x12, 0x14, 0x16, 0x18, 0x1B, 0x1D, 0x1F, 0x09); \ + RBTT(0x14, 0x15, a, \ + 0x14, 0x16, 0x18, 0x1A, 0x1D, 0x1F, 0x01, 0x0B); \ + RBTT(0x16, 0x17, a, \ + 0x16, 0x18, 0x1A, 0x1C, 0x1F, 0x01, 0x03, 0x0D); \ + RBTT(0x18, 0x19, a, \ + 0x18, 0x1A, 0x1C, 0x1E, 0x01, 0x03, 0x05, 0x0F); \ + RBTT(0x1A, 0x1B, a, \ + 0x1A, 0x1C, 0x1E, 0x00, 0x03, 0x05, 0x07, 0x11); \ + RBTT(0x1C, 0x1D, a, \ + 0x1C, 0x1E, 0x00, 0x02, 0x05, 0x07, 0x09, 0x13); \ + RBTT(0x1E, 0x1F, a, \ + 0x1E, 0x00, 0x02, 0x04, 0x07, 0x09, 0x0B, 0x15); \ + memcpy(a, t, sizeof t); \ + } while (0) + +#define ROUND_BIG_Q(a, r) do { \ + sph_u32 t[32]; \ + a[0x00] ^= QC32up(0x00, r); \ + a[0x01] ^= QC32dn(0x00, r); \ + a[0x02] ^= QC32up(0x10, r); \ + a[0x03] ^= QC32dn(0x10, r); \ + a[0x04] ^= QC32up(0x20, r); \ + a[0x05] ^= QC32dn(0x20, r); \ + a[0x06] ^= QC32up(0x30, r); \ + a[0x07] ^= QC32dn(0x30, r); \ + a[0x08] ^= QC32up(0x40, r); \ + a[0x09] ^= QC32dn(0x40, r); \ + a[0x0A] ^= QC32up(0x50, r); \ + a[0x0B] ^= QC32dn(0x50, r); \ + a[0x0C] ^= QC32up(0x60, r); \ + a[0x0D] ^= QC32dn(0x60, r); \ + a[0x0E] ^= QC32up(0x70, r); \ + a[0x0F] ^= QC32dn(0x70, r); \ + a[0x10] ^= QC32up(0x80, r); \ + a[0x11] ^= QC32dn(0x80, r); \ + a[0x12] ^= QC32up(0x90, r); \ + a[0x13] ^= QC32dn(0x90, r); \ + a[0x14] ^= QC32up(0xA0, r); \ + a[0x15] ^= QC32dn(0xA0, r); \ + a[0x16] ^= QC32up(0xB0, r); \ + a[0x17] ^= QC32dn(0xB0, r); \ + a[0x18] ^= QC32up(0xC0, r); \ + a[0x19] ^= QC32dn(0xC0, r); \ + a[0x1A] ^= QC32up(0xD0, r); \ + a[0x1B] ^= QC32dn(0xD0, r); \ + a[0x1C] ^= QC32up(0xE0, r); \ + a[0x1D] ^= QC32dn(0xE0, r); \ + a[0x1E] ^= QC32up(0xF0, r); \ + a[0x1F] ^= QC32dn(0xF0, r); \ + RBTT(0x00, 0x01, a, \ + 0x02, 0x06, 0x0A, 0x16, 0x01, 0x05, 0x09, 0x0D); \ + RBTT(0x02, 0x03, a, \ + 0x04, 0x08, 0x0C, 0x18, 0x03, 0x07, 0x0B, 0x0F); \ + RBTT(0x04, 0x05, a, \ + 0x06, 0x0A, 0x0E, 0x1A, 0x05, 0x09, 0x0D, 0x11); \ + RBTT(0x06, 0x07, a, \ + 0x08, 0x0C, 0x10, 0x1C, 0x07, 0x0B, 0x0F, 0x13); \ + RBTT(0x08, 0x09, a, \ + 0x0A, 0x0E, 0x12, 0x1E, 0x09, 0x0D, 0x11, 0x15); \ + RBTT(0x0A, 0x0B, a, \ + 0x0C, 0x10, 0x14, 0x00, 0x0B, 0x0F, 0x13, 0x17); \ + RBTT(0x0C, 0x0D, a, \ + 0x0E, 0x12, 0x16, 0x02, 0x0D, 0x11, 0x15, 0x19); \ + RBTT(0x0E, 0x0F, a, \ + 0x10, 0x14, 0x18, 0x04, 0x0F, 0x13, 0x17, 0x1B); \ + RBTT(0x10, 0x11, a, \ + 0x12, 0x16, 0x1A, 0x06, 0x11, 0x15, 0x19, 0x1D); \ + RBTT(0x12, 0x13, a, \ + 0x14, 0x18, 0x1C, 0x08, 0x13, 0x17, 0x1B, 0x1F); \ + RBTT(0x14, 0x15, a, \ + 0x16, 0x1A, 0x1E, 0x0A, 0x15, 0x19, 0x1D, 0x01); \ + RBTT(0x16, 0x17, a, \ + 0x18, 0x1C, 0x00, 0x0C, 0x17, 0x1B, 0x1F, 0x03); \ + RBTT(0x18, 0x19, a, \ + 0x1A, 0x1E, 0x02, 0x0E, 0x19, 0x1D, 0x01, 0x05); \ + RBTT(0x1A, 0x1B, a, \ + 0x1C, 0x00, 0x04, 0x10, 0x1B, 0x1F, 0x03, 0x07); \ + RBTT(0x1C, 0x1D, a, \ + 0x1E, 0x02, 0x06, 0x12, 0x1D, 0x01, 0x05, 0x09); \ + RBTT(0x1E, 0x1F, a, \ + 0x00, 0x04, 0x08, 0x14, 0x1F, 0x03, 0x07, 0x0B); \ + memcpy(a, t, sizeof t); \ + } while (0) + +#endif + +#if SPH_SMALL_FOOTPRINT_GROESTL + +#define PERM_BIG_P(a) do { \ + int r; \ + for (r = 0; r < 14; r ++) \ + ROUND_BIG_P(a, r); \ + } while (0) + +#define PERM_BIG_Q(a) do { \ + int r; \ + for (r = 0; r < 14; r ++) \ + ROUND_BIG_Q(a, r); \ + } while (0) + +#else + +#define PERM_BIG_P(a) do { \ + int r; \ + for (r = 0; r < 14; r += 2) { \ + ROUND_BIG_P(a, r + 0); \ + ROUND_BIG_P(a, r + 1); \ + } \ + } while (0) + +#define PERM_BIG_Q(a) do { \ + int r; \ + for (r = 0; r < 14; r += 2) { \ + ROUND_BIG_Q(a, r + 0); \ + ROUND_BIG_Q(a, r + 1); \ + } \ + } while (0) + +#endif + +#define COMPRESS_BIG do { \ + sph_u32 g[32], m[32]; \ + size_t u; \ + for (u = 0; u < 32; u ++) { \ + m[u] = dec32e_aligned(buf + (u << 2)); \ + g[u] = m[u] ^ H[u]; \ + } \ + PERM_BIG_P(g); \ + PERM_BIG_Q(m); \ + for (u = 0; u < 32; u ++) \ + H[u] ^= g[u] ^ m[u]; \ + } while (0) + +#define FINAL_BIG do { \ + sph_u32 x[32]; \ + size_t u; \ + memcpy(x, H, sizeof x); \ + PERM_BIG_P(x); \ + for (u = 0; u < 32; u ++) \ + H[u] ^= x[u]; \ + } while (0) + +#endif + +static void +groestl_small_init(sph_groestl_small_context *sc, unsigned out_size) +{ + size_t u; + + sc->ptr = 0; +#if SPH_GROESTL_64 + for (u = 0; u < 7; u ++) + sc->state.wide[u] = 0; +#if USE_LE + sc->state.wide[7] = ((sph_u64)(out_size & 0xFF) << 56) + | ((sph_u64)(out_size & 0xFF00) << 40); +#else + sc->state.wide[7] = (sph_u64)out_size; +#endif +#else + for (u = 0; u < 15; u ++) + sc->state.narrow[u] = 0; +#if USE_LE + sc->state.narrow[15] = ((sph_u32)(out_size & 0xFF) << 24) + | ((sph_u32)(out_size & 0xFF00) << 8); +#else + sc->state.narrow[15] = (sph_u32)out_size; +#endif +#endif +#if SPH_64 + sc->count = 0; +#else + sc->count_high = 0; + sc->count_low = 0; +#endif +} + +static void +groestl_small_core(sph_groestl_small_context *sc, const void *data, size_t len) +{ + unsigned char *buf; + size_t ptr; + DECL_STATE_SMALL + + buf = sc->buf; + ptr = sc->ptr; + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE_SMALL(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data = (const unsigned char *)data + clen; + len -= clen; + if (ptr == sizeof sc->buf) { + COMPRESS_SMALL; +#if SPH_64 + sc->count ++; +#else + if ((sc->count_low = SPH_T32(sc->count_low + 1)) == 0) + sc->count_high = SPH_T32(sc->count_high + 1); +#endif + ptr = 0; + } + } + WRITE_STATE_SMALL(sc); + sc->ptr = ptr; +} + +static void +groestl_small_close(sph_groestl_small_context *sc, + unsigned ub, unsigned n, void *dst, size_t out_len) +{ + unsigned char *buf; + unsigned char pad[72]; + size_t u, ptr, pad_len; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif + unsigned z; + DECL_STATE_SMALL + + buf = sc->buf; + ptr = sc->ptr; + z = 0x80 >> n; + pad[0] = ((ub & -z) | z) & 0xFF; + if (ptr < 56) { + pad_len = 64 - ptr; +#if SPH_64 + count = SPH_T64(sc->count + 1); +#else + count_low = SPH_T32(sc->count_low + 1); + count_high = SPH_T32(sc->count_high); + if (count_low == 0) + count_high = SPH_T32(count_high + 1); +#endif + } else { + pad_len = 128 - ptr; +#if SPH_64 + count = SPH_T64(sc->count + 2); +#else + count_low = SPH_T32(sc->count_low + 2); + count_high = SPH_T32(sc->count_high); + if (count_low <= 1) + count_high = SPH_T32(count_high + 1); +#endif + } + memset(pad + 1, 0, pad_len - 9); +#if SPH_64 + sph_enc64be(pad + pad_len - 8, count); +#else + sph_enc64be(pad + pad_len - 8, count_high); + sph_enc64be(pad + pad_len - 4, count_low); +#endif + groestl_small_core(sc, pad, pad_len); + READ_STATE_SMALL(sc); + FINAL_SMALL; +#if SPH_GROESTL_64 + for (u = 0; u < 4; u ++) + enc64e(pad + (u << 3), H[u + 4]); +#else + for (u = 0; u < 8; u ++) + enc32e(pad + (u << 2), H[u + 8]); +#endif + memcpy(dst, pad + 32 - out_len, out_len); + groestl_small_init(sc, (unsigned)out_len << 3); +} + +static void +groestl_big_init(sph_groestl_big_context *sc, unsigned out_size) +{ + size_t u; + + sc->ptr = 0; +#if SPH_GROESTL_64 + for (u = 0; u < 15; u ++) + sc->state.wide[u] = 0; +#if USE_LE + sc->state.wide[15] = ((sph_u64)(out_size & 0xFF) << 56) + | ((sph_u64)(out_size & 0xFF00) << 40); +#else + sc->state.wide[15] = (sph_u64)out_size; +#endif +#else + for (u = 0; u < 31; u ++) + sc->state.narrow[u] = 0; +#if USE_LE + sc->state.narrow[31] = ((sph_u32)(out_size & 0xFF) << 24) + | ((sph_u32)(out_size & 0xFF00) << 8); +#else + sc->state.narrow[31] = (sph_u32)out_size; +#endif +#endif +#if SPH_64 + sc->count = 0; +#else + sc->count_high = 0; + sc->count_low = 0; +#endif +} + +static void +groestl_big_core(sph_groestl_big_context *sc, const void *data, size_t len) +{ + unsigned char *buf; + size_t ptr; + DECL_STATE_BIG + + buf = sc->buf; + ptr = sc->ptr; + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE_BIG(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data = (const unsigned char *)data + clen; + len -= clen; + if (ptr == sizeof sc->buf) { + COMPRESS_BIG; +#if SPH_64 + sc->count ++; +#else + if ((sc->count_low = SPH_T32(sc->count_low + 1)) == 0) + sc->count_high = SPH_T32(sc->count_high + 1); +#endif + ptr = 0; + } + } + WRITE_STATE_BIG(sc); + sc->ptr = ptr; +} + +static void +groestl_big_close(sph_groestl_big_context *sc, + unsigned ub, unsigned n, void *dst, size_t out_len) +{ + unsigned char *buf; + unsigned char pad[136]; + size_t ptr, pad_len, u; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif + unsigned z; + DECL_STATE_BIG + + buf = sc->buf; + ptr = sc->ptr; + z = 0x80 >> n; + pad[0] = ((ub & -z) | z) & 0xFF; + if (ptr < 120) { + pad_len = 128 - ptr; +#if SPH_64 + count = SPH_T64(sc->count + 1); +#else + count_low = SPH_T32(sc->count_low + 1); + count_high = SPH_T32(sc->count_high); + if (count_low == 0) + count_high = SPH_T32(count_high + 1); +#endif + } else { + pad_len = 256 - ptr; +#if SPH_64 + count = SPH_T64(sc->count + 2); +#else + count_low = SPH_T32(sc->count_low + 2); + count_high = SPH_T32(sc->count_high); + if (count_low <= 1) + count_high = SPH_T32(count_high + 1); +#endif + } + memset(pad + 1, 0, pad_len - 9); +#if SPH_64 + sph_enc64be(pad + pad_len - 8, count); +#else + sph_enc64be(pad + pad_len - 8, count_high); + sph_enc64be(pad + pad_len - 4, count_low); +#endif + groestl_big_core(sc, pad, pad_len); + READ_STATE_BIG(sc); + FINAL_BIG; +#if SPH_GROESTL_64 + for (u = 0; u < 8; u ++) + enc64e(pad + (u << 3), H[u + 8]); +#else + for (u = 0; u < 16; u ++) + enc32e(pad + (u << 2), H[u + 16]); +#endif + memcpy(dst, pad + 64 - out_len, out_len); + groestl_big_init(sc, (unsigned)out_len << 3); +} + +/* see sph_groestl.h */ +void +sph_groestl224_init(void *cc) +{ + groestl_small_init(cc, 224); +} + +/* see sph_groestl.h */ +void +sph_groestl224(void *cc, const void *data, size_t len) +{ + groestl_small_core(cc, data, len); +} + +/* see sph_groestl.h */ +void +sph_groestl224_close(void *cc, void *dst) +{ + groestl_small_close(cc, 0, 0, dst, 28); +} + +/* see sph_groestl.h */ +void +sph_groestl224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + groestl_small_close(cc, ub, n, dst, 28); +} + +/* see sph_groestl.h */ +void +sph_groestl256_init(void *cc) +{ + groestl_small_init(cc, 256); +} + +/* see sph_groestl.h */ +void +sph_groestl256(void *cc, const void *data, size_t len) +{ + groestl_small_core(cc, data, len); +} + +/* see sph_groestl.h */ +void +sph_groestl256_close(void *cc, void *dst) +{ + groestl_small_close(cc, 0, 0, dst, 32); +} + +/* see sph_groestl.h */ +void +sph_groestl256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + groestl_small_close(cc, ub, n, dst, 32); +} + +/* see sph_groestl.h */ +void +sph_groestl384_init(void *cc) +{ + groestl_big_init(cc, 384); +} + +/* see sph_groestl.h */ +void +sph_groestl384(void *cc, const void *data, size_t len) +{ + groestl_big_core(cc, data, len); +} + +/* see sph_groestl.h */ +void +sph_groestl384_close(void *cc, void *dst) +{ + groestl_big_close(cc, 0, 0, dst, 48); +} + +/* see sph_groestl.h */ +void +sph_groestl384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + groestl_big_close(cc, ub, n, dst, 48); +} + +/* see sph_groestl.h */ +void +sph_groestl512_init(void *cc) +{ + groestl_big_init(cc, 512); +} + +/* see sph_groestl.h */ +void +sph_groestl512(void *cc, const void *data, size_t len) +{ + groestl_big_core(cc, data, len); +} + +/* see sph_groestl.h */ +void +sph_groestl512_close(void *cc, void *dst) +{ + groestl_big_close(cc, 0, 0, dst, 64); +} + +/* see sph_groestl.h */ +void +sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + groestl_big_close(cc, ub, n, dst, 64); +} + +#ifdef __cplusplus +} +#endif diff --git a/heavy.cu b/heavy.cu new file mode 100644 index 0000000..6ce1a66 --- /dev/null +++ b/heavy.cu @@ -0,0 +1,416 @@ +#include +#include +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" +#include + +#ifndef _WIN32 +#include +#endif + +// include thrust +#include +#include +#include +#include + +#include "miner.h" + +#include "hefty1.h" +#include "sph_keccak.h" +#include "sph_blake.h" +#include "sph_groestl.h" + +#include "cuda_hefty1.h" +#include "cuda_sha256.h" +#include "cuda_keccak512.h" +#include "cuda_groestl512.h" +#include "cuda_blake512.h" +#include "cuda_combine.h" + +extern uint32_t *d_hash2output[8]; +extern uint32_t *d_hash3output[8]; +extern uint32_t *d_hash4output[8]; +extern uint32_t *d_hash5output[8]; + +#define HEAVYCOIN_BLKHDR_SZ 84 + +// nonce-array für die threads +uint32_t *d_nonceVector[8]; + +/* Combines top 64-bits from each hash into a single hash */ +static void combine_hashes(uint32_t *out, const uint32_t *hash1, const uint32_t *hash2, const uint32_t *hash3, const uint32_t *hash4) +{ + const uint32_t *hash[4] = { hash1, hash2, hash3, hash4 }; + int bits; + unsigned int i; + uint32_t mask; + unsigned int k; + + /* Transpose first 64 bits of each hash into out */ + memset(out, 0, 32); + bits = 0; + for (i = 7; i >= 6; i--) { + for (mask = 0x80000000; mask; mask >>= 1) { + for (k = 0; k < 4; k++) { + out[(255 - bits)/32] <<= 1; + if ((hash[k][i] & mask) != 0) + out[(255 - bits)/32] |= 1; + bits++; + } + } + } +} + +#ifdef _MSC_VER +#include +static uint32_t __inline bitsset( uint32_t x ) +{ + DWORD r = 0; + _BitScanReverse(&r, x); + return r; +} +#else +static uint32_t bitsset( uint32_t x ) +{ + return 31-__builtin_clz(x); +} +#endif + +// Finde das high bit in einem Multiword-Integer. +static int findhighbit(const uint32_t *ptarget, int words) +{ + int i; + int highbit = 0; + for (i=words-1; i >= 0; --i) + { + if (ptarget[i] != 0) { + highbit = i*32 + bitsset(ptarget[i])+1; + break; + } + } + return highbit; +} + +// Generiere ein Multiword-Integer das die Zahl +// (2 << highbit) - 1 repräsentiert. +static void genmask(uint32_t *ptarget, int words, int highbit) +{ + int i; + for (i=words-1; i >= 0; --i) + { + if ((i+1)*32 <= highbit) + ptarget[i] = 0xffffffff; + else if (i*32 > highbit) + ptarget[i] = 0x00000000; + else + ptarget[i] = (1 << (highbit-i*32)) - 1; + } +} + +struct check_nonce_for_remove +{ + check_nonce_for_remove(uint64_t target, uint32_t *hashes, uint32_t hashlen, uint32_t startNonce) : + m_target(target), + m_hashes(hashes), + m_hashlen(hashlen), + m_startNonce(startNonce) { } + + __device__ + bool operator()(const uint32_t x) + { + // Position im Hash Buffer + uint32_t hashIndex = x - m_startNonce; + // Wert des Hashes (als uint64_t) auslesen. + // Steht im 6. und 7. Wort des Hashes (jeder dieser Hashes hat 512 Bits) + uint64_t hashValue = *((uint64_t*)(&m_hashes[m_hashlen*hashIndex + 6])); + // gegen das Target prüfen. Es dürfen nur Bits aus dem Target gesetzt sein. + return (hashValue & m_target) != hashValue; + } + + uint64_t m_target; + uint32_t *m_hashes; + uint32_t m_hashlen; + uint32_t m_startNonce; +}; + +// Zahl der CUDA Devices im System bestimmen +extern "C" int cuda_num_devices() +{ + int version; + cudaError_t err = cudaDriverGetVersion(&version); + if (err != cudaSuccess) + { + applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?"); + exit(1); + } + + int maj = version / 1000, min = version % 100; // same as in deviceQuery sample + if (maj < 5 || (maj == 5 && min < 5)) + { + applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5); + exit(1); + } + + int GPU_N; + err = cudaGetDeviceCount(&GPU_N); + if (err != cudaSuccess) + { + applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?"); + exit(1); + } + return GPU_N; +} + +// Zeitsynchronisations-Routine von cudaminer mit CPU sleep +typedef struct { double value[8]; } tsumarray; +cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) +{ + cudaError_t result = cudaSuccess; + if (situation >= 0) + { + static std::map tsum; + + double a = 0.95, b = 0.05; + if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence + + double tsync = 0.0; + double tsleep = 0.95 * tsum[situation].value[thr_id]; + if (cudaStreamQuery(stream) == cudaErrorNotReady) + { + usleep((useconds_t)(1e6*tsleep)); + struct timeval tv_start, tv_end; + gettimeofday(&tv_start, NULL); + result = cudaStreamSynchronize(stream); + gettimeofday(&tv_end, NULL); + tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec); + } + if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync); + } + else + result = cudaStreamSynchronize(stream); + return result; +} + +int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote); + +extern "C" +int scanhash_heavy(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote) +{ + return scanhash_heavy_cpp(thr_id, pdata, + ptarget, max_nonce, hashes_done, maxvote); +} + +int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote) +{ + // CUDA will process thousands of threads. + const int throughput = 4096 * 128; + + int rc = 0; + uint32_t *hash = NULL; + cudaMallocHost(&hash, throughput*8*sizeof(uint32_t)); + uint32_t *cpu_nonceVector = NULL; + cudaMallocHost(&cpu_nonceVector, throughput*sizeof(uint32_t)); + + int nrmCalls[6]; + memset(nrmCalls, 0, sizeof(int) * 6); + + uint32_t start_nonce = pdata[19]; + uint16_t *ext = (uint16_t *)&pdata[20]; + + // für jeden Hash ein individuelles Target erstellen basierend + // auf dem höchsten Bit, das in ptarget gesetzt ist. + int highbit = findhighbit(ptarget, 8); + uint32_t target2[2], target3[2], target4[2], target5[2]; + genmask(target2, 2, highbit/4+(((highbit%4)>3)?1:0) ); // SHA256 + genmask(target3, 2, highbit/4+(((highbit%4)>2)?1:0) ); // keccak512 + genmask(target4, 2, highbit/4+(((highbit%4)>1)?1:0) ); // groestl512 + genmask(target5, 2, highbit/4+(((highbit%4)>0)?1:0) ); // blake512 + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + hefty_cpu_init(thr_id, throughput); + sha256_cpu_init(thr_id, throughput); + keccak512_cpu_init(thr_id, throughput); + groestl512_cpu_init(thr_id, throughput); + blake512_cpu_init(thr_id, throughput); + combine_cpu_init(thr_id, throughput); + init[thr_id] = true; + cudaMalloc(&d_nonceVector[thr_id], sizeof(uint32_t) * throughput); + } + + + if (opt_vote > maxvote) { + printf("Warning: Your block reward vote (%hu) exceeds " + "the maxvote reported by the pool (%hu).\n", + opt_vote, maxvote); + } + + if (opt_trust_pool && opt_vote > maxvote) { + printf("Warning: Capping block reward vote to maxvote reported by pool.\n"); + ext[0] = maxvote; + } + else + ext[0] = opt_vote; + + // Setze die Blockdaten + hefty_cpu_setBlock(thr_id, throughput, pdata); + sha256_cpu_setBlock(pdata); + keccak512_cpu_setBlock(pdata); + groestl512_cpu_setBlock(pdata); + blake512_cpu_setBlock(pdata); + + do { + int i; + + ////// Compaction init + thrust::device_ptr devNoncePtr(d_nonceVector[thr_id]); + thrust::device_ptr devNoncePtrEnd((d_nonceVector[thr_id]) + throughput); + uint32_t actualNumberOfValuesInNonceVectorGPU = throughput; + + hefty_cpu_hash(thr_id, throughput, pdata[19]); + //cudaThreadSynchronize(); + sha256_cpu_hash(thr_id, throughput, pdata[19]); + //cudaThreadSynchronize(); + + // Hier ist die längste CPU Wartephase. Deshalb ein strategisches MyStreamSynchronize() hier. + MyStreamSynchronize(NULL, 0, thr_id); + + ////// Compaction + devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*((uint64_t*)target2), d_hash2output[thr_id], 8, pdata[19])); + actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr); + if(actualNumberOfValuesInNonceVectorGPU == 0) + goto emptyNonceVector; + + keccak512_cpu_hash(thr_id, actualNumberOfValuesInNonceVectorGPU, pdata[19]); + //cudaThreadSynchronize(); + + ////// Compaction + devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*((uint64_t*)target3), d_hash3output[thr_id], 16, pdata[19])); + actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr); + if(actualNumberOfValuesInNonceVectorGPU == 0) + goto emptyNonceVector; + + blake512_cpu_hash(thr_id, actualNumberOfValuesInNonceVectorGPU, pdata[19]); + //cudaThreadSynchronize(); + + ////// Compaction + devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*((uint64_t*)target5), d_hash5output[thr_id], 16, pdata[19])); + actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr); + if(actualNumberOfValuesInNonceVectorGPU == 0) + goto emptyNonceVector; + + groestl512_cpu_hash(thr_id, actualNumberOfValuesInNonceVectorGPU, pdata[19]); + //cudaThreadSynchronize(); + + ////// Compaction + devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*((uint64_t*)target4), d_hash4output[thr_id], 16, pdata[19])); + actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr); + if(actualNumberOfValuesInNonceVectorGPU == 0) + goto emptyNonceVector; + + // combine + combine_cpu_hash(thr_id, actualNumberOfValuesInNonceVectorGPU, pdata[19], hash); + + // Ergebnisse kopieren + if(actualNumberOfValuesInNonceVectorGPU > 0) + { + cudaMemcpy(cpu_nonceVector, d_nonceVector[thr_id], sizeof(uint32_t) * actualNumberOfValuesInNonceVectorGPU, cudaMemcpyDeviceToHost); + + for (i=0; i + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those + * of the authors and should not be interpreted as representing official policies, + * either expressed or implied, of the FreeBSD Project. + */ + +#include +#include + +#include "hefty1.h" + +#ifdef WIN32 +#define inline __inline +#endif + +#define Min(A, B) (A <= B ? A : B) + +#define RoundFunc(ctx, A, B, C, D, E, F, G, H, W, K) \ + { \ + /* To thwart parallelism, Br modifies itself each time it's \ + * called. This also means that calling it in different \ + * orders yeilds different results. In C the order of \ + * evaluation of function arguments and + operands are \ + * unspecified (and depends on the compiler), so we must make \ + * the order of Br calls explicit. \ + */ \ + uint32_t brG = Br(ctx, G); \ + uint32_t tmp1 = Ch(E, Br(ctx, F), brG) + H + W + K; \ + uint32_t tmp2 = tmp1 + Sigma1(Br(ctx, E)); \ + uint32_t brC = Br(ctx, C); \ + uint32_t brB = Br(ctx, B); \ + uint32_t tmp3 = Ma(Br(ctx, A), brB, brC); \ + uint32_t tmp4 = tmp3 + Sigma0(Br(ctx, A)); \ + H = G; \ + G = F; \ + F = E; \ + E = D + Br(ctx, tmp2); \ + D = C; \ + C = B; \ + B = A; \ + A = tmp2 + tmp4; \ + } \ + +/* Nothing up my sleeve constants */ +const static uint32_t K[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +/* Initial hash values */ +const static uint32_t H[HEFTY1_STATE_WORDS] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL +}; + +static inline uint32_t Rr(uint32_t X, uint8_t n) +{ + return (X >> n) | (X << (32 - n)); +} + +static inline uint32_t Ch(uint32_t E, uint32_t F, uint32_t G) +{ + return (E & F) ^ (~E & G); +} + +static inline uint32_t Sigma1(uint32_t E) +{ + return Rr(E, 6) ^ Rr(E, 11) ^ Rr(E, 25); +} + +static inline uint32_t sigma1(uint32_t X) +{ + return Rr(X, 17) ^ Rr(X, 19) ^ (X >> 10); +} + +static inline uint32_t Ma(uint32_t A, uint32_t B, uint32_t C) +{ + return (A & B) ^ (A & C) ^ (B & C); +} + +static inline uint32_t Sigma0(uint32_t A) +{ + return Rr(A, 2) ^ Rr(A, 13) ^ Rr(A, 22); +} + +static inline uint32_t sigma0(uint32_t X) +{ + return Rr(X, 7) ^ Rr(X, 18) ^ (X >> 3); +} + +static inline uint32_t Reverse32(uint32_t n) +{ + #if BYTE_ORDER == LITTLE_ENDIAN + return n << 24 | (n & 0x0000ff00) << 8 | (n & 0x00ff0000) >> 8 | n >> 24; + #else + return n; + #endif +} + +static inline uint64_t Reverse64(uint64_t n) +{ + #if BYTE_ORDER == LITTLE_ENDIAN + uint32_t a = n >> 32; + uint32_t b = (n << 32) >> 32; + + return (uint64_t)Reverse32(b) << 32 | Reverse32(a); + #else + return n; + #endif +} + +/* Smoosh byte into nibble */ +static inline uint8_t Smoosh4(uint8_t X) +{ + return (X >> 4) ^ (X & 0xf); +} + +/* Smoosh 32-bit word into 2-bits */ +static inline uint8_t Smoosh2(uint32_t X) +{ + uint16_t w = (X >> 16) ^ (X & 0xffff); + uint8_t n = Smoosh4((w >> 8) ^ (w & 0xff)); + return (n >> 2) ^ (n & 0x3); +} +#include +static void Mangle(uint32_t *S) +{ + uint8_t r0 = Smoosh4(S[0] >> 24); + uint8_t r1 = Smoosh4(S[0] >> 16); + uint8_t r2 = Smoosh4(S[0] >> 8); + uint8_t r3 = Smoosh4(S[0] & 0xff); + + /* Diffuse */ + S[1] ^= Rr(S[0], r0); + switch (Smoosh2(S[1])) { + case 0: S[2] ^= Rr(S[0], 1 + r0); break; + case 1: S[2] += Rr(~S[0], 1 + r1); break; + case 2: S[2] &= Rr(~S[0], 1 + r2); break; + case 3: S[2] ^= Rr(S[0], 1 + r3); break; + } + switch (Smoosh2(S[1] ^ S[2])) { + case 0: S[3] ^= Rr(S[0], 2 + r0); break; + case 1: S[3] += Rr(~S[0], 2 + r1); break; + case 2: S[3] &= Rr(~S[0], 2 + r2); break; + case 3: S[3] ^= Rr(S[0], 2 + r3); break; + } + + /* Compress */ + S[0] ^= (S[1] ^ S[2]) + S[3]; +} + +static void Absorb(uint32_t *S, uint32_t X) +{ + uint32_t *R = S; + R[0] ^= X; + Mangle(S); +} + +static uint32_t Squeeze(uint32_t *S) +{ + uint32_t Y = S[0]; + Mangle(S); + return Y; +} + +/* Branch, compress and serialize function */ +static inline uint32_t Br(HEFTY1_CTX *ctx, uint32_t X) +{ + uint32_t R = Squeeze(ctx->sponge); + + uint8_t r0 = R >> 8; + uint8_t r1 = R & 0xff; + + uint32_t Y = 1 << (r0 % 32); + + switch (r1 % 4) + { + case 0: + /* Do nothing */ + break; + case 1: + return X & ~Y; + case 2: + return X | Y; + case 3: + return X ^ Y; + } + + return X; +} + +static void HashBlock(HEFTY1_CTX *ctx) +{ + uint32_t A, B, C, D, E, F, G, H; + uint32_t W[HEFTY1_BLOCK_BYTES]; + int t; + + assert(ctx); + + A = ctx->h[0]; + B = ctx->h[1]; + C = ctx->h[2]; + D = ctx->h[3]; + E = ctx->h[4]; + F = ctx->h[5]; + G = ctx->h[6]; + H = ctx->h[7]; + + t = 0; + for (; t < 16; t++) { + W[t] = Reverse32(((uint32_t *)&ctx->block[0])[t]); /* To host byte order */ + Absorb(ctx->sponge, W[t] ^ K[t]); + } + + for (t = 0; t < 16; t++) { + Absorb(ctx->sponge, D ^ H); + RoundFunc(ctx, A, B, C, D, E, F, G, H, W[t], K[t]); + } + for (t = 16; t < 64; t++) { + Absorb(ctx->sponge, H + D); + W[t] = sigma1(W[t - 2]) + W[t - 7] + sigma0(W[t - 15]) + W[t - 16]; + RoundFunc(ctx, A, B, C, D, E, F, G, H, W[t], K[t]); + } + + ctx->h[0] += A; + ctx->h[1] += B; + ctx->h[2] += C; + ctx->h[3] += D; + ctx->h[4] += E; + ctx->h[5] += F; + ctx->h[6] += G; + ctx->h[7] += H; + + A = 0; + B = 0; + C = 0; + D = 0; + E = 0; + F = 0; + G = 0; + H = 0; + + memset(W, 0, sizeof(W)); +} + +/* Public interface */ + +void HEFTY1_Init(HEFTY1_CTX *ctx) +{ + assert(ctx); + + memcpy(ctx->h, H, sizeof(ctx->h)); + memset(ctx->block, 0, sizeof(ctx->block)); + ctx->written = 0; + memset(ctx->sponge, 0, sizeof(ctx->sponge)); +} + +void HEFTY1_Update(HEFTY1_CTX *ctx, const void *buf, size_t len) +{ + uint64_t read; + assert(ctx); + + read = 0; + while (len) { + uint64_t end = ctx->written % HEFTY1_BLOCK_BYTES; + uint64_t count = Min(len, HEFTY1_BLOCK_BYTES - end); + memcpy(&ctx->block[end], &((unsigned char *)buf)[read], (size_t)count); + len -= (size_t)count; + read += count; + ctx->written += count; + if (!(ctx->written % HEFTY1_BLOCK_BYTES)) + HashBlock(ctx); + } +} + +void HEFTY1_Final(unsigned char *digest, HEFTY1_CTX *ctx) +{ + uint64_t used; + uint64_t *len; + int i; + assert(digest); + assert(ctx); + + /* Pad message (FIPS 180 Section 5.1.1) */ + used = ctx->written % HEFTY1_BLOCK_BYTES; + ctx->block[used++] = 0x80; /* Append 1 to end of message */ + if (used > HEFTY1_BLOCK_BYTES - 8) { + /* We have already written into the last 64bits, so + * we must continue into the next block. */ + memset(&ctx->block[used], 0, HEFTY1_BLOCK_BYTES - (size_t)used); + HashBlock(ctx); + used = 0; /* Create a new block (below) */ + } + + /* All remaining bits to zero */ + memset(&ctx->block[used], 0, HEFTY1_BLOCK_BYTES - 8 - (size_t)used); + + /* The last 64bits encode the length (in network byte order) */ + len = (uint64_t *)&ctx->block[HEFTY1_BLOCK_BYTES - 8]; + *len = Reverse64(ctx->written*8); + + HashBlock(ctx); + + /* Convert back to network byte order */ + i = 0; + for (; i < HEFTY1_STATE_WORDS; i++) + ctx->h[i] = Reverse32(ctx->h[i]); + + memcpy(digest, ctx->h, sizeof(ctx->h)); + memset(ctx, 0, sizeof(HEFTY1_CTX)); +} + +unsigned char* HEFTY1(const unsigned char *buf, size_t len, unsigned char *digest) +{ + HEFTY1_CTX ctx; + static unsigned char m[HEFTY1_DIGEST_BYTES]; + + if (!digest) + digest = m; + + HEFTY1_Init(&ctx); + HEFTY1_Update(&ctx, buf, len); + HEFTY1_Final(digest, &ctx); + + return digest; +} diff --git a/hefty1.h b/hefty1.h new file mode 100644 index 0000000..29939e8 --- /dev/null +++ b/hefty1.h @@ -0,0 +1,66 @@ +/* + * HEFTY1 CPU-only cryptographic hash function + * + * Copyright (c) 2014, dbcc14 + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those + * of the authors and should not be interpreted as representing official policies, + * either expressed or implied, of the FreeBSD Project. + */ + +#ifndef __HEFTY1_H__ +#define __HEFTY1_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef WIN32 +#include +#endif + +#include + +#define HEFTY1_DIGEST_BYTES 32 +#define HEFTY1_BLOCK_BYTES 64 +#define HEFTY1_STATE_WORDS 8 +#define HEFTY1_SPONGE_WORDS 4 + +typedef struct HEFTY1_CTX { + uint32_t h[HEFTY1_STATE_WORDS]; + uint8_t block[HEFTY1_BLOCK_BYTES]; + uint64_t written; + uint32_t sponge[HEFTY1_SPONGE_WORDS]; +} HEFTY1_CTX; + +void HEFTY1_Init(HEFTY1_CTX *cxt); +void HEFTY1_Update(HEFTY1_CTX *cxt, const void *data, size_t len); +void HEFTY1_Final(unsigned char *digest, HEFTY1_CTX *cxt); +unsigned char* HEFTY1(const unsigned char *data, size_t len, unsigned char *digest); + +#ifdef __cplusplus +} +#endif + +#endif /* __HEFTY1_H__ */ diff --git a/install-sh b/install-sh new file mode 100644 index 0000000..9c04de2 --- /dev/null +++ b/install-sh @@ -0,0 +1,527 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2011-01-19.21; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +nl=' +' +IFS=" "" $nl" + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit=${DOITPROG-} +if test -z "$doit"; then + doit_exec=exec +else + doit_exec=$doit +fi + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_glob='?' +initialize_posix_glob=' + test "$posix_glob" != "?" || { + if (set -f) 2>/dev/null; then + posix_glob= + else + posix_glob=: + fi + } +' + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +no_target_directory= + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *' '* | *' +'* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) dst_arg=$2 + # Protect names problematic for `test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) no_target_directory=true;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for `test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for `test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + # Prefer dirname, but fall back on a substitute if dirname fails. + dstdir=` + (dirname "$dst") 2>/dev/null || + expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$dst" : 'X\(//\)[^/]' \| \ + X"$dst" : 'X\(//\)$' \| \ + X"$dst" : 'X\(/\)' \| . 2>/dev/null || + echo X"$dst" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q' + ` + + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writeable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + eval "$initialize_posix_glob" + + oIFS=$IFS + IFS=/ + $posix_glob set -f + set fnord $dstdir + shift + $posix_glob set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + + eval "$initialize_posix_glob" && + $posix_glob set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + $posix_glob set +f && + + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/keccak.c b/keccak.c new file mode 100644 index 0000000..8c90f3a --- /dev/null +++ b/keccak.c @@ -0,0 +1,1824 @@ +/* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */ +/* + * Keccak implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_keccak.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +/* + * Parameters: + * + * SPH_KECCAK_64 use a 64-bit type + * SPH_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll) + * SPH_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only) + * SPH_KECCAK_NOCOPY do not copy the state into local variables + * + * If there is no usable 64-bit type, the code automatically switches + * back to the 32-bit implementation. + * + * Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1 + * code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core + * (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302, + * 8 kB L1 code cache), seem to show that the following are optimal: + * + * -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds, + * do not copy the state; unrolling 2, 6 or all rounds also provides + * near-optimal performance. + * -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds, + * interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds + * also provides near-optimal performance. + * -- PowerPC: use the 64-bit implementation, unroll 8 rounds, + * copy the state. Unrolling 4 or 6 rounds is near-optimal. + * -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds, + * copy the state. + * -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy + * the state. Unrolling only 1 round is also near-optimal. + * + * Also, interleaving does not always yield actual improvements when + * using a 32-bit implementation; in particular when the architecture + * does not offer a native rotation opcode (interleaving replaces one + * 64-bit rotation with two 32-bit rotations, which is a gain only if + * there is a native 32-bit rotation opcode and not a native 64-bit + * rotation opcode; also, interleaving implies a small overhead when + * processing input words). + * + * To sum up: + * -- when possible, use the 64-bit code + * -- exception: on 32-bit x86, use 32-bit code + * -- when using 32-bit code, use interleaving + * -- copy the state, except on x86 + * -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines + */ + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_KECCAK +#define SPH_SMALL_FOOTPRINT_KECCAK 1 +#endif + +/* + * By default, we select the 64-bit implementation if a 64-bit type + * is available, unless a 32-bit x86 is detected. + */ +#if !defined SPH_KECCAK_64 && SPH_64 \ + && !(defined __i386__ || SPH_I386_GCC || SPH_I386_MSVC) +#define SPH_KECCAK_64 1 +#endif + +/* + * If using a 32-bit implementation, we prefer to interleave. + */ +#if !SPH_KECCAK_64 && !defined SPH_KECCAK_INTERLEAVE +#define SPH_KECCAK_INTERLEAVE 1 +#endif + +/* + * Unroll 8 rounds on big systems, 2 rounds on small systems. + */ +#ifndef SPH_KECCAK_UNROLL +#if SPH_SMALL_FOOTPRINT_KECCAK +#define SPH_KECCAK_UNROLL 2 +#else +#define SPH_KECCAK_UNROLL 8 +#endif +#endif + +/* + * We do not want to copy the state to local variables on x86 (32-bit + * and 64-bit alike). + */ +#ifndef SPH_KECCAK_NOCOPY +#if defined __i386__ || defined __x86_64 || SPH_I386_MSVC || SPH_I386_GCC +#define SPH_KECCAK_NOCOPY 1 +#else +#define SPH_KECCAK_NOCOPY 0 +#endif +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +#if SPH_KECCAK_64 + +static const sph_u64 RC[] = { + SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), + SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), + SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), + SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), + SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), + SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), + SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), + SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), + SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), + SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), + SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), + SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) +}; + +#if SPH_KECCAK_NOCOPY + +#define a00 (kc->u.wide[ 0]) +#define a10 (kc->u.wide[ 1]) +#define a20 (kc->u.wide[ 2]) +#define a30 (kc->u.wide[ 3]) +#define a40 (kc->u.wide[ 4]) +#define a01 (kc->u.wide[ 5]) +#define a11 (kc->u.wide[ 6]) +#define a21 (kc->u.wide[ 7]) +#define a31 (kc->u.wide[ 8]) +#define a41 (kc->u.wide[ 9]) +#define a02 (kc->u.wide[10]) +#define a12 (kc->u.wide[11]) +#define a22 (kc->u.wide[12]) +#define a32 (kc->u.wide[13]) +#define a42 (kc->u.wide[14]) +#define a03 (kc->u.wide[15]) +#define a13 (kc->u.wide[16]) +#define a23 (kc->u.wide[17]) +#define a33 (kc->u.wide[18]) +#define a43 (kc->u.wide[19]) +#define a04 (kc->u.wide[20]) +#define a14 (kc->u.wide[21]) +#define a24 (kc->u.wide[22]) +#define a34 (kc->u.wide[23]) +#define a44 (kc->u.wide[24]) + +#define DECL_STATE +#define READ_STATE(sc) +#define WRITE_STATE(sc) + +#define INPUT_BUF(size) do { \ + size_t j; \ + for (j = 0; j < (size); j += 8) { \ + kc->u.wide[j >> 3] ^= sph_dec64le_aligned(buf + j); \ + } \ + } while (0) + +#define INPUT_BUF144 INPUT_BUF(144) +#define INPUT_BUF136 INPUT_BUF(136) +#define INPUT_BUF104 INPUT_BUF(104) +#define INPUT_BUF72 INPUT_BUF(72) + +#else + +#define DECL_STATE \ + sph_u64 a00, a01, a02, a03, a04; \ + sph_u64 a10, a11, a12, a13, a14; \ + sph_u64 a20, a21, a22, a23, a24; \ + sph_u64 a30, a31, a32, a33, a34; \ + sph_u64 a40, a41, a42, a43, a44; + +#define READ_STATE(state) do { \ + a00 = (state)->u.wide[ 0]; \ + a10 = (state)->u.wide[ 1]; \ + a20 = (state)->u.wide[ 2]; \ + a30 = (state)->u.wide[ 3]; \ + a40 = (state)->u.wide[ 4]; \ + a01 = (state)->u.wide[ 5]; \ + a11 = (state)->u.wide[ 6]; \ + a21 = (state)->u.wide[ 7]; \ + a31 = (state)->u.wide[ 8]; \ + a41 = (state)->u.wide[ 9]; \ + a02 = (state)->u.wide[10]; \ + a12 = (state)->u.wide[11]; \ + a22 = (state)->u.wide[12]; \ + a32 = (state)->u.wide[13]; \ + a42 = (state)->u.wide[14]; \ + a03 = (state)->u.wide[15]; \ + a13 = (state)->u.wide[16]; \ + a23 = (state)->u.wide[17]; \ + a33 = (state)->u.wide[18]; \ + a43 = (state)->u.wide[19]; \ + a04 = (state)->u.wide[20]; \ + a14 = (state)->u.wide[21]; \ + a24 = (state)->u.wide[22]; \ + a34 = (state)->u.wide[23]; \ + a44 = (state)->u.wide[24]; \ + } while (0) + +#define WRITE_STATE(state) do { \ + (state)->u.wide[ 0] = a00; \ + (state)->u.wide[ 1] = a10; \ + (state)->u.wide[ 2] = a20; \ + (state)->u.wide[ 3] = a30; \ + (state)->u.wide[ 4] = a40; \ + (state)->u.wide[ 5] = a01; \ + (state)->u.wide[ 6] = a11; \ + (state)->u.wide[ 7] = a21; \ + (state)->u.wide[ 8] = a31; \ + (state)->u.wide[ 9] = a41; \ + (state)->u.wide[10] = a02; \ + (state)->u.wide[11] = a12; \ + (state)->u.wide[12] = a22; \ + (state)->u.wide[13] = a32; \ + (state)->u.wide[14] = a42; \ + (state)->u.wide[15] = a03; \ + (state)->u.wide[16] = a13; \ + (state)->u.wide[17] = a23; \ + (state)->u.wide[18] = a33; \ + (state)->u.wide[19] = a43; \ + (state)->u.wide[20] = a04; \ + (state)->u.wide[21] = a14; \ + (state)->u.wide[22] = a24; \ + (state)->u.wide[23] = a34; \ + (state)->u.wide[24] = a44; \ + } while (0) + +#define INPUT_BUF144 do { \ + a00 ^= sph_dec64le_aligned(buf + 0); \ + a10 ^= sph_dec64le_aligned(buf + 8); \ + a20 ^= sph_dec64le_aligned(buf + 16); \ + a30 ^= sph_dec64le_aligned(buf + 24); \ + a40 ^= sph_dec64le_aligned(buf + 32); \ + a01 ^= sph_dec64le_aligned(buf + 40); \ + a11 ^= sph_dec64le_aligned(buf + 48); \ + a21 ^= sph_dec64le_aligned(buf + 56); \ + a31 ^= sph_dec64le_aligned(buf + 64); \ + a41 ^= sph_dec64le_aligned(buf + 72); \ + a02 ^= sph_dec64le_aligned(buf + 80); \ + a12 ^= sph_dec64le_aligned(buf + 88); \ + a22 ^= sph_dec64le_aligned(buf + 96); \ + a32 ^= sph_dec64le_aligned(buf + 104); \ + a42 ^= sph_dec64le_aligned(buf + 112); \ + a03 ^= sph_dec64le_aligned(buf + 120); \ + a13 ^= sph_dec64le_aligned(buf + 128); \ + a23 ^= sph_dec64le_aligned(buf + 136); \ + } while (0) + +#define INPUT_BUF136 do { \ + a00 ^= sph_dec64le_aligned(buf + 0); \ + a10 ^= sph_dec64le_aligned(buf + 8); \ + a20 ^= sph_dec64le_aligned(buf + 16); \ + a30 ^= sph_dec64le_aligned(buf + 24); \ + a40 ^= sph_dec64le_aligned(buf + 32); \ + a01 ^= sph_dec64le_aligned(buf + 40); \ + a11 ^= sph_dec64le_aligned(buf + 48); \ + a21 ^= sph_dec64le_aligned(buf + 56); \ + a31 ^= sph_dec64le_aligned(buf + 64); \ + a41 ^= sph_dec64le_aligned(buf + 72); \ + a02 ^= sph_dec64le_aligned(buf + 80); \ + a12 ^= sph_dec64le_aligned(buf + 88); \ + a22 ^= sph_dec64le_aligned(buf + 96); \ + a32 ^= sph_dec64le_aligned(buf + 104); \ + a42 ^= sph_dec64le_aligned(buf + 112); \ + a03 ^= sph_dec64le_aligned(buf + 120); \ + a13 ^= sph_dec64le_aligned(buf + 128); \ + } while (0) + +#define INPUT_BUF104 do { \ + a00 ^= sph_dec64le_aligned(buf + 0); \ + a10 ^= sph_dec64le_aligned(buf + 8); \ + a20 ^= sph_dec64le_aligned(buf + 16); \ + a30 ^= sph_dec64le_aligned(buf + 24); \ + a40 ^= sph_dec64le_aligned(buf + 32); \ + a01 ^= sph_dec64le_aligned(buf + 40); \ + a11 ^= sph_dec64le_aligned(buf + 48); \ + a21 ^= sph_dec64le_aligned(buf + 56); \ + a31 ^= sph_dec64le_aligned(buf + 64); \ + a41 ^= sph_dec64le_aligned(buf + 72); \ + a02 ^= sph_dec64le_aligned(buf + 80); \ + a12 ^= sph_dec64le_aligned(buf + 88); \ + a22 ^= sph_dec64le_aligned(buf + 96); \ + } while (0) + +#define INPUT_BUF72 do { \ + a00 ^= sph_dec64le_aligned(buf + 0); \ + a10 ^= sph_dec64le_aligned(buf + 8); \ + a20 ^= sph_dec64le_aligned(buf + 16); \ + a30 ^= sph_dec64le_aligned(buf + 24); \ + a40 ^= sph_dec64le_aligned(buf + 32); \ + a01 ^= sph_dec64le_aligned(buf + 40); \ + a11 ^= sph_dec64le_aligned(buf + 48); \ + a21 ^= sph_dec64le_aligned(buf + 56); \ + a31 ^= sph_dec64le_aligned(buf + 64); \ + } while (0) + +#define INPUT_BUF(lim) do { \ + a00 ^= sph_dec64le_aligned(buf + 0); \ + a10 ^= sph_dec64le_aligned(buf + 8); \ + a20 ^= sph_dec64le_aligned(buf + 16); \ + a30 ^= sph_dec64le_aligned(buf + 24); \ + a40 ^= sph_dec64le_aligned(buf + 32); \ + a01 ^= sph_dec64le_aligned(buf + 40); \ + a11 ^= sph_dec64le_aligned(buf + 48); \ + a21 ^= sph_dec64le_aligned(buf + 56); \ + a31 ^= sph_dec64le_aligned(buf + 64); \ + if ((lim) == 72) \ + break; \ + a41 ^= sph_dec64le_aligned(buf + 72); \ + a02 ^= sph_dec64le_aligned(buf + 80); \ + a12 ^= sph_dec64le_aligned(buf + 88); \ + a22 ^= sph_dec64le_aligned(buf + 96); \ + if ((lim) == 104) \ + break; \ + a32 ^= sph_dec64le_aligned(buf + 104); \ + a42 ^= sph_dec64le_aligned(buf + 112); \ + a03 ^= sph_dec64le_aligned(buf + 120); \ + a13 ^= sph_dec64le_aligned(buf + 128); \ + if ((lim) == 136) \ + break; \ + a23 ^= sph_dec64le_aligned(buf + 136); \ + } while (0) + +#endif + +#define DECL64(x) sph_u64 x +#define MOV64(d, s) (d = s) +#define XOR64(d, a, b) (d = a ^ b) +#define AND64(d, a, b) (d = a & b) +#define OR64(d, a, b) (d = a | b) +#define NOT64(d, s) (d = SPH_T64(~s)) +#define ROL64(d, v, n) (d = SPH_ROTL64(v, n)) +#define XOR64_IOTA XOR64 + +#else + +static const struct { + sph_u32 high, low; +} RC[] = { +#if SPH_KECCAK_INTERLEAVE + { SPH_C32(0x00000000), SPH_C32(0x00000001) }, + { SPH_C32(0x00000089), SPH_C32(0x00000000) }, + { SPH_C32(0x8000008B), SPH_C32(0x00000000) }, + { SPH_C32(0x80008080), SPH_C32(0x00000000) }, + { SPH_C32(0x0000008B), SPH_C32(0x00000001) }, + { SPH_C32(0x00008000), SPH_C32(0x00000001) }, + { SPH_C32(0x80008088), SPH_C32(0x00000001) }, + { SPH_C32(0x80000082), SPH_C32(0x00000001) }, + { SPH_C32(0x0000000B), SPH_C32(0x00000000) }, + { SPH_C32(0x0000000A), SPH_C32(0x00000000) }, + { SPH_C32(0x00008082), SPH_C32(0x00000001) }, + { SPH_C32(0x00008003), SPH_C32(0x00000000) }, + { SPH_C32(0x0000808B), SPH_C32(0x00000001) }, + { SPH_C32(0x8000000B), SPH_C32(0x00000001) }, + { SPH_C32(0x8000008A), SPH_C32(0x00000001) }, + { SPH_C32(0x80000081), SPH_C32(0x00000001) }, + { SPH_C32(0x80000081), SPH_C32(0x00000000) }, + { SPH_C32(0x80000008), SPH_C32(0x00000000) }, + { SPH_C32(0x00000083), SPH_C32(0x00000000) }, + { SPH_C32(0x80008003), SPH_C32(0x00000000) }, + { SPH_C32(0x80008088), SPH_C32(0x00000001) }, + { SPH_C32(0x80000088), SPH_C32(0x00000000) }, + { SPH_C32(0x00008000), SPH_C32(0x00000001) }, + { SPH_C32(0x80008082), SPH_C32(0x00000000) } +#else + { SPH_C32(0x00000000), SPH_C32(0x00000001) }, + { SPH_C32(0x00000000), SPH_C32(0x00008082) }, + { SPH_C32(0x80000000), SPH_C32(0x0000808A) }, + { SPH_C32(0x80000000), SPH_C32(0x80008000) }, + { SPH_C32(0x00000000), SPH_C32(0x0000808B) }, + { SPH_C32(0x00000000), SPH_C32(0x80000001) }, + { SPH_C32(0x80000000), SPH_C32(0x80008081) }, + { SPH_C32(0x80000000), SPH_C32(0x00008009) }, + { SPH_C32(0x00000000), SPH_C32(0x0000008A) }, + { SPH_C32(0x00000000), SPH_C32(0x00000088) }, + { SPH_C32(0x00000000), SPH_C32(0x80008009) }, + { SPH_C32(0x00000000), SPH_C32(0x8000000A) }, + { SPH_C32(0x00000000), SPH_C32(0x8000808B) }, + { SPH_C32(0x80000000), SPH_C32(0x0000008B) }, + { SPH_C32(0x80000000), SPH_C32(0x00008089) }, + { SPH_C32(0x80000000), SPH_C32(0x00008003) }, + { SPH_C32(0x80000000), SPH_C32(0x00008002) }, + { SPH_C32(0x80000000), SPH_C32(0x00000080) }, + { SPH_C32(0x00000000), SPH_C32(0x0000800A) }, + { SPH_C32(0x80000000), SPH_C32(0x8000000A) }, + { SPH_C32(0x80000000), SPH_C32(0x80008081) }, + { SPH_C32(0x80000000), SPH_C32(0x00008080) }, + { SPH_C32(0x00000000), SPH_C32(0x80000001) }, + { SPH_C32(0x80000000), SPH_C32(0x80008008) } +#endif +}; + +#if SPH_KECCAK_INTERLEAVE + +#define INTERLEAVE(xl, xh) do { \ + sph_u32 l, h, t; \ + l = (xl); h = (xh); \ + t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \ + t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \ + t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \ + t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \ + t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \ + t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \ + t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \ + t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \ + t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \ + l ^= t; h ^= t >> 16; \ + (xl) = l; (xh) = h; \ + } while (0) + +#define UNINTERLEAVE(xl, xh) do { \ + sph_u32 l, h, t; \ + l = (xl); h = (xh); \ + t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \ + l ^= t; h ^= t >> 16; \ + t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \ + t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \ + t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \ + t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \ + t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \ + t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \ + t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \ + t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \ + (xl) = l; (xh) = h; \ + } while (0) + +#else + +#define INTERLEAVE(l, h) +#define UNINTERLEAVE(l, h) + +#endif + +#if SPH_KECCAK_NOCOPY + +#define a00l (kc->u.narrow[2 * 0 + 0]) +#define a00h (kc->u.narrow[2 * 0 + 1]) +#define a10l (kc->u.narrow[2 * 1 + 0]) +#define a10h (kc->u.narrow[2 * 1 + 1]) +#define a20l (kc->u.narrow[2 * 2 + 0]) +#define a20h (kc->u.narrow[2 * 2 + 1]) +#define a30l (kc->u.narrow[2 * 3 + 0]) +#define a30h (kc->u.narrow[2 * 3 + 1]) +#define a40l (kc->u.narrow[2 * 4 + 0]) +#define a40h (kc->u.narrow[2 * 4 + 1]) +#define a01l (kc->u.narrow[2 * 5 + 0]) +#define a01h (kc->u.narrow[2 * 5 + 1]) +#define a11l (kc->u.narrow[2 * 6 + 0]) +#define a11h (kc->u.narrow[2 * 6 + 1]) +#define a21l (kc->u.narrow[2 * 7 + 0]) +#define a21h (kc->u.narrow[2 * 7 + 1]) +#define a31l (kc->u.narrow[2 * 8 + 0]) +#define a31h (kc->u.narrow[2 * 8 + 1]) +#define a41l (kc->u.narrow[2 * 9 + 0]) +#define a41h (kc->u.narrow[2 * 9 + 1]) +#define a02l (kc->u.narrow[2 * 10 + 0]) +#define a02h (kc->u.narrow[2 * 10 + 1]) +#define a12l (kc->u.narrow[2 * 11 + 0]) +#define a12h (kc->u.narrow[2 * 11 + 1]) +#define a22l (kc->u.narrow[2 * 12 + 0]) +#define a22h (kc->u.narrow[2 * 12 + 1]) +#define a32l (kc->u.narrow[2 * 13 + 0]) +#define a32h (kc->u.narrow[2 * 13 + 1]) +#define a42l (kc->u.narrow[2 * 14 + 0]) +#define a42h (kc->u.narrow[2 * 14 + 1]) +#define a03l (kc->u.narrow[2 * 15 + 0]) +#define a03h (kc->u.narrow[2 * 15 + 1]) +#define a13l (kc->u.narrow[2 * 16 + 0]) +#define a13h (kc->u.narrow[2 * 16 + 1]) +#define a23l (kc->u.narrow[2 * 17 + 0]) +#define a23h (kc->u.narrow[2 * 17 + 1]) +#define a33l (kc->u.narrow[2 * 18 + 0]) +#define a33h (kc->u.narrow[2 * 18 + 1]) +#define a43l (kc->u.narrow[2 * 19 + 0]) +#define a43h (kc->u.narrow[2 * 19 + 1]) +#define a04l (kc->u.narrow[2 * 20 + 0]) +#define a04h (kc->u.narrow[2 * 20 + 1]) +#define a14l (kc->u.narrow[2 * 21 + 0]) +#define a14h (kc->u.narrow[2 * 21 + 1]) +#define a24l (kc->u.narrow[2 * 22 + 0]) +#define a24h (kc->u.narrow[2 * 22 + 1]) +#define a34l (kc->u.narrow[2 * 23 + 0]) +#define a34h (kc->u.narrow[2 * 23 + 1]) +#define a44l (kc->u.narrow[2 * 24 + 0]) +#define a44h (kc->u.narrow[2 * 24 + 1]) + +#define DECL_STATE +#define READ_STATE(state) +#define WRITE_STATE(state) + +#define INPUT_BUF(size) do { \ + size_t j; \ + for (j = 0; j < (size); j += 8) { \ + sph_u32 tl, th; \ + tl = sph_dec32le_aligned(buf + j + 0); \ + th = sph_dec32le_aligned(buf + j + 4); \ + INTERLEAVE(tl, th); \ + kc->u.narrow[(j >> 2) + 0] ^= tl; \ + kc->u.narrow[(j >> 2) + 1] ^= th; \ + } \ + } while (0) + +#define INPUT_BUF144 INPUT_BUF(144) +#define INPUT_BUF136 INPUT_BUF(136) +#define INPUT_BUF104 INPUT_BUF(104) +#define INPUT_BUF72 INPUT_BUF(72) + +#else + +#define DECL_STATE \ + sph_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \ + sph_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \ + sph_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \ + sph_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \ + sph_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h; + +#define READ_STATE(state) do { \ + a00l = (state)->u.narrow[2 * 0 + 0]; \ + a00h = (state)->u.narrow[2 * 0 + 1]; \ + a10l = (state)->u.narrow[2 * 1 + 0]; \ + a10h = (state)->u.narrow[2 * 1 + 1]; \ + a20l = (state)->u.narrow[2 * 2 + 0]; \ + a20h = (state)->u.narrow[2 * 2 + 1]; \ + a30l = (state)->u.narrow[2 * 3 + 0]; \ + a30h = (state)->u.narrow[2 * 3 + 1]; \ + a40l = (state)->u.narrow[2 * 4 + 0]; \ + a40h = (state)->u.narrow[2 * 4 + 1]; \ + a01l = (state)->u.narrow[2 * 5 + 0]; \ + a01h = (state)->u.narrow[2 * 5 + 1]; \ + a11l = (state)->u.narrow[2 * 6 + 0]; \ + a11h = (state)->u.narrow[2 * 6 + 1]; \ + a21l = (state)->u.narrow[2 * 7 + 0]; \ + a21h = (state)->u.narrow[2 * 7 + 1]; \ + a31l = (state)->u.narrow[2 * 8 + 0]; \ + a31h = (state)->u.narrow[2 * 8 + 1]; \ + a41l = (state)->u.narrow[2 * 9 + 0]; \ + a41h = (state)->u.narrow[2 * 9 + 1]; \ + a02l = (state)->u.narrow[2 * 10 + 0]; \ + a02h = (state)->u.narrow[2 * 10 + 1]; \ + a12l = (state)->u.narrow[2 * 11 + 0]; \ + a12h = (state)->u.narrow[2 * 11 + 1]; \ + a22l = (state)->u.narrow[2 * 12 + 0]; \ + a22h = (state)->u.narrow[2 * 12 + 1]; \ + a32l = (state)->u.narrow[2 * 13 + 0]; \ + a32h = (state)->u.narrow[2 * 13 + 1]; \ + a42l = (state)->u.narrow[2 * 14 + 0]; \ + a42h = (state)->u.narrow[2 * 14 + 1]; \ + a03l = (state)->u.narrow[2 * 15 + 0]; \ + a03h = (state)->u.narrow[2 * 15 + 1]; \ + a13l = (state)->u.narrow[2 * 16 + 0]; \ + a13h = (state)->u.narrow[2 * 16 + 1]; \ + a23l = (state)->u.narrow[2 * 17 + 0]; \ + a23h = (state)->u.narrow[2 * 17 + 1]; \ + a33l = (state)->u.narrow[2 * 18 + 0]; \ + a33h = (state)->u.narrow[2 * 18 + 1]; \ + a43l = (state)->u.narrow[2 * 19 + 0]; \ + a43h = (state)->u.narrow[2 * 19 + 1]; \ + a04l = (state)->u.narrow[2 * 20 + 0]; \ + a04h = (state)->u.narrow[2 * 20 + 1]; \ + a14l = (state)->u.narrow[2 * 21 + 0]; \ + a14h = (state)->u.narrow[2 * 21 + 1]; \ + a24l = (state)->u.narrow[2 * 22 + 0]; \ + a24h = (state)->u.narrow[2 * 22 + 1]; \ + a34l = (state)->u.narrow[2 * 23 + 0]; \ + a34h = (state)->u.narrow[2 * 23 + 1]; \ + a44l = (state)->u.narrow[2 * 24 + 0]; \ + a44h = (state)->u.narrow[2 * 24 + 1]; \ + } while (0) + +#define WRITE_STATE(state) do { \ + (state)->u.narrow[2 * 0 + 0] = a00l; \ + (state)->u.narrow[2 * 0 + 1] = a00h; \ + (state)->u.narrow[2 * 1 + 0] = a10l; \ + (state)->u.narrow[2 * 1 + 1] = a10h; \ + (state)->u.narrow[2 * 2 + 0] = a20l; \ + (state)->u.narrow[2 * 2 + 1] = a20h; \ + (state)->u.narrow[2 * 3 + 0] = a30l; \ + (state)->u.narrow[2 * 3 + 1] = a30h; \ + (state)->u.narrow[2 * 4 + 0] = a40l; \ + (state)->u.narrow[2 * 4 + 1] = a40h; \ + (state)->u.narrow[2 * 5 + 0] = a01l; \ + (state)->u.narrow[2 * 5 + 1] = a01h; \ + (state)->u.narrow[2 * 6 + 0] = a11l; \ + (state)->u.narrow[2 * 6 + 1] = a11h; \ + (state)->u.narrow[2 * 7 + 0] = a21l; \ + (state)->u.narrow[2 * 7 + 1] = a21h; \ + (state)->u.narrow[2 * 8 + 0] = a31l; \ + (state)->u.narrow[2 * 8 + 1] = a31h; \ + (state)->u.narrow[2 * 9 + 0] = a41l; \ + (state)->u.narrow[2 * 9 + 1] = a41h; \ + (state)->u.narrow[2 * 10 + 0] = a02l; \ + (state)->u.narrow[2 * 10 + 1] = a02h; \ + (state)->u.narrow[2 * 11 + 0] = a12l; \ + (state)->u.narrow[2 * 11 + 1] = a12h; \ + (state)->u.narrow[2 * 12 + 0] = a22l; \ + (state)->u.narrow[2 * 12 + 1] = a22h; \ + (state)->u.narrow[2 * 13 + 0] = a32l; \ + (state)->u.narrow[2 * 13 + 1] = a32h; \ + (state)->u.narrow[2 * 14 + 0] = a42l; \ + (state)->u.narrow[2 * 14 + 1] = a42h; \ + (state)->u.narrow[2 * 15 + 0] = a03l; \ + (state)->u.narrow[2 * 15 + 1] = a03h; \ + (state)->u.narrow[2 * 16 + 0] = a13l; \ + (state)->u.narrow[2 * 16 + 1] = a13h; \ + (state)->u.narrow[2 * 17 + 0] = a23l; \ + (state)->u.narrow[2 * 17 + 1] = a23h; \ + (state)->u.narrow[2 * 18 + 0] = a33l; \ + (state)->u.narrow[2 * 18 + 1] = a33h; \ + (state)->u.narrow[2 * 19 + 0] = a43l; \ + (state)->u.narrow[2 * 19 + 1] = a43h; \ + (state)->u.narrow[2 * 20 + 0] = a04l; \ + (state)->u.narrow[2 * 20 + 1] = a04h; \ + (state)->u.narrow[2 * 21 + 0] = a14l; \ + (state)->u.narrow[2 * 21 + 1] = a14h; \ + (state)->u.narrow[2 * 22 + 0] = a24l; \ + (state)->u.narrow[2 * 22 + 1] = a24h; \ + (state)->u.narrow[2 * 23 + 0] = a34l; \ + (state)->u.narrow[2 * 23 + 1] = a34h; \ + (state)->u.narrow[2 * 24 + 0] = a44l; \ + (state)->u.narrow[2 * 24 + 1] = a44h; \ + } while (0) + +#define READ64(d, off) do { \ + sph_u32 tl, th; \ + tl = sph_dec32le_aligned(buf + (off)); \ + th = sph_dec32le_aligned(buf + (off) + 4); \ + INTERLEAVE(tl, th); \ + d ## l ^= tl; \ + d ## h ^= th; \ + } while (0) + +#define INPUT_BUF144 do { \ + READ64(a00, 0); \ + READ64(a10, 8); \ + READ64(a20, 16); \ + READ64(a30, 24); \ + READ64(a40, 32); \ + READ64(a01, 40); \ + READ64(a11, 48); \ + READ64(a21, 56); \ + READ64(a31, 64); \ + READ64(a41, 72); \ + READ64(a02, 80); \ + READ64(a12, 88); \ + READ64(a22, 96); \ + READ64(a32, 104); \ + READ64(a42, 112); \ + READ64(a03, 120); \ + READ64(a13, 128); \ + READ64(a23, 136); \ + } while (0) + +#define INPUT_BUF136 do { \ + READ64(a00, 0); \ + READ64(a10, 8); \ + READ64(a20, 16); \ + READ64(a30, 24); \ + READ64(a40, 32); \ + READ64(a01, 40); \ + READ64(a11, 48); \ + READ64(a21, 56); \ + READ64(a31, 64); \ + READ64(a41, 72); \ + READ64(a02, 80); \ + READ64(a12, 88); \ + READ64(a22, 96); \ + READ64(a32, 104); \ + READ64(a42, 112); \ + READ64(a03, 120); \ + READ64(a13, 128); \ + } while (0) + +#define INPUT_BUF104 do { \ + READ64(a00, 0); \ + READ64(a10, 8); \ + READ64(a20, 16); \ + READ64(a30, 24); \ + READ64(a40, 32); \ + READ64(a01, 40); \ + READ64(a11, 48); \ + READ64(a21, 56); \ + READ64(a31, 64); \ + READ64(a41, 72); \ + READ64(a02, 80); \ + READ64(a12, 88); \ + READ64(a22, 96); \ + } while (0) + +#define INPUT_BUF72 do { \ + READ64(a00, 0); \ + READ64(a10, 8); \ + READ64(a20, 16); \ + READ64(a30, 24); \ + READ64(a40, 32); \ + READ64(a01, 40); \ + READ64(a11, 48); \ + READ64(a21, 56); \ + READ64(a31, 64); \ + } while (0) + +#define INPUT_BUF(lim) do { \ + READ64(a00, 0); \ + READ64(a10, 8); \ + READ64(a20, 16); \ + READ64(a30, 24); \ + READ64(a40, 32); \ + READ64(a01, 40); \ + READ64(a11, 48); \ + READ64(a21, 56); \ + READ64(a31, 64); \ + if ((lim) == 72) \ + break; \ + READ64(a41, 72); \ + READ64(a02, 80); \ + READ64(a12, 88); \ + READ64(a22, 96); \ + if ((lim) == 104) \ + break; \ + READ64(a32, 104); \ + READ64(a42, 112); \ + READ64(a03, 120); \ + READ64(a13, 128); \ + if ((lim) == 136) \ + break; \ + READ64(a23, 136); \ + } while (0) + +#endif + +#define DECL64(x) sph_u64 x ## l, x ## h +#define MOV64(d, s) (d ## l = s ## l, d ## h = s ## h) +#define XOR64(d, a, b) (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h) +#define AND64(d, a, b) (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h) +#define OR64(d, a, b) (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h) +#define NOT64(d, s) (d ## l = SPH_T32(~s ## l), d ## h = SPH_T32(~s ## h)) +#define ROL64(d, v, n) ROL64_ ## n(d, v) + +#if SPH_KECCAK_INTERLEAVE + +#define ROL64_odd1(d, v) do { \ + sph_u32 tmp; \ + tmp = v ## l; \ + d ## l = SPH_T32(v ## h << 1) | (v ## h >> 31); \ + d ## h = tmp; \ + } while (0) + +#define ROL64_odd63(d, v) do { \ + sph_u32 tmp; \ + tmp = SPH_T32(v ## l << 31) | (v ## l >> 1); \ + d ## l = v ## h; \ + d ## h = tmp; \ + } while (0) + +#define ROL64_odd(d, v, n) do { \ + sph_u32 tmp; \ + tmp = SPH_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \ + d ## l = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \ + d ## h = tmp; \ + } while (0) + +#define ROL64_even(d, v, n) do { \ + d ## l = SPH_T32(v ## l << n) | (v ## l >> (32 - n)); \ + d ## h = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \ + } while (0) + +#define ROL64_0(d, v) +#define ROL64_1(d, v) ROL64_odd1(d, v) +#define ROL64_2(d, v) ROL64_even(d, v, 1) +#define ROL64_3(d, v) ROL64_odd( d, v, 2) +#define ROL64_4(d, v) ROL64_even(d, v, 2) +#define ROL64_5(d, v) ROL64_odd( d, v, 3) +#define ROL64_6(d, v) ROL64_even(d, v, 3) +#define ROL64_7(d, v) ROL64_odd( d, v, 4) +#define ROL64_8(d, v) ROL64_even(d, v, 4) +#define ROL64_9(d, v) ROL64_odd( d, v, 5) +#define ROL64_10(d, v) ROL64_even(d, v, 5) +#define ROL64_11(d, v) ROL64_odd( d, v, 6) +#define ROL64_12(d, v) ROL64_even(d, v, 6) +#define ROL64_13(d, v) ROL64_odd( d, v, 7) +#define ROL64_14(d, v) ROL64_even(d, v, 7) +#define ROL64_15(d, v) ROL64_odd( d, v, 8) +#define ROL64_16(d, v) ROL64_even(d, v, 8) +#define ROL64_17(d, v) ROL64_odd( d, v, 9) +#define ROL64_18(d, v) ROL64_even(d, v, 9) +#define ROL64_19(d, v) ROL64_odd( d, v, 10) +#define ROL64_20(d, v) ROL64_even(d, v, 10) +#define ROL64_21(d, v) ROL64_odd( d, v, 11) +#define ROL64_22(d, v) ROL64_even(d, v, 11) +#define ROL64_23(d, v) ROL64_odd( d, v, 12) +#define ROL64_24(d, v) ROL64_even(d, v, 12) +#define ROL64_25(d, v) ROL64_odd( d, v, 13) +#define ROL64_26(d, v) ROL64_even(d, v, 13) +#define ROL64_27(d, v) ROL64_odd( d, v, 14) +#define ROL64_28(d, v) ROL64_even(d, v, 14) +#define ROL64_29(d, v) ROL64_odd( d, v, 15) +#define ROL64_30(d, v) ROL64_even(d, v, 15) +#define ROL64_31(d, v) ROL64_odd( d, v, 16) +#define ROL64_32(d, v) ROL64_even(d, v, 16) +#define ROL64_33(d, v) ROL64_odd( d, v, 17) +#define ROL64_34(d, v) ROL64_even(d, v, 17) +#define ROL64_35(d, v) ROL64_odd( d, v, 18) +#define ROL64_36(d, v) ROL64_even(d, v, 18) +#define ROL64_37(d, v) ROL64_odd( d, v, 19) +#define ROL64_38(d, v) ROL64_even(d, v, 19) +#define ROL64_39(d, v) ROL64_odd( d, v, 20) +#define ROL64_40(d, v) ROL64_even(d, v, 20) +#define ROL64_41(d, v) ROL64_odd( d, v, 21) +#define ROL64_42(d, v) ROL64_even(d, v, 21) +#define ROL64_43(d, v) ROL64_odd( d, v, 22) +#define ROL64_44(d, v) ROL64_even(d, v, 22) +#define ROL64_45(d, v) ROL64_odd( d, v, 23) +#define ROL64_46(d, v) ROL64_even(d, v, 23) +#define ROL64_47(d, v) ROL64_odd( d, v, 24) +#define ROL64_48(d, v) ROL64_even(d, v, 24) +#define ROL64_49(d, v) ROL64_odd( d, v, 25) +#define ROL64_50(d, v) ROL64_even(d, v, 25) +#define ROL64_51(d, v) ROL64_odd( d, v, 26) +#define ROL64_52(d, v) ROL64_even(d, v, 26) +#define ROL64_53(d, v) ROL64_odd( d, v, 27) +#define ROL64_54(d, v) ROL64_even(d, v, 27) +#define ROL64_55(d, v) ROL64_odd( d, v, 28) +#define ROL64_56(d, v) ROL64_even(d, v, 28) +#define ROL64_57(d, v) ROL64_odd( d, v, 29) +#define ROL64_58(d, v) ROL64_even(d, v, 29) +#define ROL64_59(d, v) ROL64_odd( d, v, 30) +#define ROL64_60(d, v) ROL64_even(d, v, 30) +#define ROL64_61(d, v) ROL64_odd( d, v, 31) +#define ROL64_62(d, v) ROL64_even(d, v, 31) +#define ROL64_63(d, v) ROL64_odd63(d, v) + +#else + +#define ROL64_small(d, v, n) do { \ + sph_u32 tmp; \ + tmp = SPH_T32(v ## l << n) | (v ## h >> (32 - n)); \ + d ## h = SPH_T32(v ## h << n) | (v ## l >> (32 - n)); \ + d ## l = tmp; \ + } while (0) + +#define ROL64_0(d, v) 0 +#define ROL64_1(d, v) ROL64_small(d, v, 1) +#define ROL64_2(d, v) ROL64_small(d, v, 2) +#define ROL64_3(d, v) ROL64_small(d, v, 3) +#define ROL64_4(d, v) ROL64_small(d, v, 4) +#define ROL64_5(d, v) ROL64_small(d, v, 5) +#define ROL64_6(d, v) ROL64_small(d, v, 6) +#define ROL64_7(d, v) ROL64_small(d, v, 7) +#define ROL64_8(d, v) ROL64_small(d, v, 8) +#define ROL64_9(d, v) ROL64_small(d, v, 9) +#define ROL64_10(d, v) ROL64_small(d, v, 10) +#define ROL64_11(d, v) ROL64_small(d, v, 11) +#define ROL64_12(d, v) ROL64_small(d, v, 12) +#define ROL64_13(d, v) ROL64_small(d, v, 13) +#define ROL64_14(d, v) ROL64_small(d, v, 14) +#define ROL64_15(d, v) ROL64_small(d, v, 15) +#define ROL64_16(d, v) ROL64_small(d, v, 16) +#define ROL64_17(d, v) ROL64_small(d, v, 17) +#define ROL64_18(d, v) ROL64_small(d, v, 18) +#define ROL64_19(d, v) ROL64_small(d, v, 19) +#define ROL64_20(d, v) ROL64_small(d, v, 20) +#define ROL64_21(d, v) ROL64_small(d, v, 21) +#define ROL64_22(d, v) ROL64_small(d, v, 22) +#define ROL64_23(d, v) ROL64_small(d, v, 23) +#define ROL64_24(d, v) ROL64_small(d, v, 24) +#define ROL64_25(d, v) ROL64_small(d, v, 25) +#define ROL64_26(d, v) ROL64_small(d, v, 26) +#define ROL64_27(d, v) ROL64_small(d, v, 27) +#define ROL64_28(d, v) ROL64_small(d, v, 28) +#define ROL64_29(d, v) ROL64_small(d, v, 29) +#define ROL64_30(d, v) ROL64_small(d, v, 30) +#define ROL64_31(d, v) ROL64_small(d, v, 31) + +#define ROL64_32(d, v) do { \ + sph_u32 tmp; \ + tmp = v ## l; \ + d ## l = v ## h; \ + d ## h = tmp; \ + } while (0) + +#define ROL64_big(d, v, n) do { \ + sph_u32 trl, trh; \ + ROL64_small(tr, v, n); \ + d ## h = trl; \ + d ## l = trh; \ + } while (0) + +#define ROL64_33(d, v) ROL64_big(d, v, 1) +#define ROL64_34(d, v) ROL64_big(d, v, 2) +#define ROL64_35(d, v) ROL64_big(d, v, 3) +#define ROL64_36(d, v) ROL64_big(d, v, 4) +#define ROL64_37(d, v) ROL64_big(d, v, 5) +#define ROL64_38(d, v) ROL64_big(d, v, 6) +#define ROL64_39(d, v) ROL64_big(d, v, 7) +#define ROL64_40(d, v) ROL64_big(d, v, 8) +#define ROL64_41(d, v) ROL64_big(d, v, 9) +#define ROL64_42(d, v) ROL64_big(d, v, 10) +#define ROL64_43(d, v) ROL64_big(d, v, 11) +#define ROL64_44(d, v) ROL64_big(d, v, 12) +#define ROL64_45(d, v) ROL64_big(d, v, 13) +#define ROL64_46(d, v) ROL64_big(d, v, 14) +#define ROL64_47(d, v) ROL64_big(d, v, 15) +#define ROL64_48(d, v) ROL64_big(d, v, 16) +#define ROL64_49(d, v) ROL64_big(d, v, 17) +#define ROL64_50(d, v) ROL64_big(d, v, 18) +#define ROL64_51(d, v) ROL64_big(d, v, 19) +#define ROL64_52(d, v) ROL64_big(d, v, 20) +#define ROL64_53(d, v) ROL64_big(d, v, 21) +#define ROL64_54(d, v) ROL64_big(d, v, 22) +#define ROL64_55(d, v) ROL64_big(d, v, 23) +#define ROL64_56(d, v) ROL64_big(d, v, 24) +#define ROL64_57(d, v) ROL64_big(d, v, 25) +#define ROL64_58(d, v) ROL64_big(d, v, 26) +#define ROL64_59(d, v) ROL64_big(d, v, 27) +#define ROL64_60(d, v) ROL64_big(d, v, 28) +#define ROL64_61(d, v) ROL64_big(d, v, 29) +#define ROL64_62(d, v) ROL64_big(d, v, 30) +#define ROL64_63(d, v) ROL64_big(d, v, 31) + +#endif + +#define XOR64_IOTA(d, s, k) \ + (d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high) + +#endif + +#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \ + DECL64(tt0); \ + DECL64(tt1); \ + DECL64(tt2); \ + DECL64(tt3); \ + XOR64(tt0, d0, d1); \ + XOR64(tt1, d2, d3); \ + XOR64(tt0, tt0, d4); \ + XOR64(tt0, tt0, tt1); \ + ROL64(tt0, tt0, 1); \ + XOR64(tt2, c0, c1); \ + XOR64(tt3, c2, c3); \ + XOR64(tt0, tt0, c4); \ + XOR64(tt2, tt2, tt3); \ + XOR64(t, tt0, tt2); \ + } while (0) + +#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \ + b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \ + b40, b41, b42, b43, b44) \ + do { \ + DECL64(t0); \ + DECL64(t1); \ + DECL64(t2); \ + DECL64(t3); \ + DECL64(t4); \ + TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \ + TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \ + TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \ + TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \ + TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \ + XOR64(b00, b00, t0); \ + XOR64(b01, b01, t0); \ + XOR64(b02, b02, t0); \ + XOR64(b03, b03, t0); \ + XOR64(b04, b04, t0); \ + XOR64(b10, b10, t1); \ + XOR64(b11, b11, t1); \ + XOR64(b12, b12, t1); \ + XOR64(b13, b13, t1); \ + XOR64(b14, b14, t1); \ + XOR64(b20, b20, t2); \ + XOR64(b21, b21, t2); \ + XOR64(b22, b22, t2); \ + XOR64(b23, b23, t2); \ + XOR64(b24, b24, t2); \ + XOR64(b30, b30, t3); \ + XOR64(b31, b31, t3); \ + XOR64(b32, b32, t3); \ + XOR64(b33, b33, t3); \ + XOR64(b34, b34, t3); \ + XOR64(b40, b40, t4); \ + XOR64(b41, b41, t4); \ + XOR64(b42, b42, t4); \ + XOR64(b43, b43, t4); \ + XOR64(b44, b44, t4); \ + } while (0) + +#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \ + b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \ + b40, b41, b42, b43, b44) \ + do { \ + /* ROL64(b00, b00, 0); */ \ + ROL64(b01, b01, 36); \ + ROL64(b02, b02, 3); \ + ROL64(b03, b03, 41); \ + ROL64(b04, b04, 18); \ + ROL64(b10, b10, 1); \ + ROL64(b11, b11, 44); \ + ROL64(b12, b12, 10); \ + ROL64(b13, b13, 45); \ + ROL64(b14, b14, 2); \ + ROL64(b20, b20, 62); \ + ROL64(b21, b21, 6); \ + ROL64(b22, b22, 43); \ + ROL64(b23, b23, 15); \ + ROL64(b24, b24, 61); \ + ROL64(b30, b30, 28); \ + ROL64(b31, b31, 55); \ + ROL64(b32, b32, 25); \ + ROL64(b33, b33, 21); \ + ROL64(b34, b34, 56); \ + ROL64(b40, b40, 27); \ + ROL64(b41, b41, 20); \ + ROL64(b42, b42, 39); \ + ROL64(b43, b43, 8); \ + ROL64(b44, b44, 14); \ + } while (0) + +/* + * The KHI macro integrates the "lane complement" optimization. On input, + * some words are complemented: + * a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43 + * On output, the following words are complemented: + * a04 a10 a20 a22 a23 a31 + * + * The (implicit) permutation and the theta expansion will bring back + * the input mask for the next round. + */ + +#define KHI_XO(d, a, b, c) do { \ + DECL64(kt); \ + OR64(kt, b, c); \ + XOR64(d, a, kt); \ + } while (0) + +#define KHI_XA(d, a, b, c) do { \ + DECL64(kt); \ + AND64(kt, b, c); \ + XOR64(d, a, kt); \ + } while (0) + +#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \ + b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \ + b40, b41, b42, b43, b44) \ + do { \ + DECL64(c0); \ + DECL64(c1); \ + DECL64(c2); \ + DECL64(c3); \ + DECL64(c4); \ + DECL64(bnn); \ + NOT64(bnn, b20); \ + KHI_XO(c0, b00, b10, b20); \ + KHI_XO(c1, b10, bnn, b30); \ + KHI_XA(c2, b20, b30, b40); \ + KHI_XO(c3, b30, b40, b00); \ + KHI_XA(c4, b40, b00, b10); \ + MOV64(b00, c0); \ + MOV64(b10, c1); \ + MOV64(b20, c2); \ + MOV64(b30, c3); \ + MOV64(b40, c4); \ + NOT64(bnn, b41); \ + KHI_XO(c0, b01, b11, b21); \ + KHI_XA(c1, b11, b21, b31); \ + KHI_XO(c2, b21, b31, bnn); \ + KHI_XO(c3, b31, b41, b01); \ + KHI_XA(c4, b41, b01, b11); \ + MOV64(b01, c0); \ + MOV64(b11, c1); \ + MOV64(b21, c2); \ + MOV64(b31, c3); \ + MOV64(b41, c4); \ + NOT64(bnn, b32); \ + KHI_XO(c0, b02, b12, b22); \ + KHI_XA(c1, b12, b22, b32); \ + KHI_XA(c2, b22, bnn, b42); \ + KHI_XO(c3, bnn, b42, b02); \ + KHI_XA(c4, b42, b02, b12); \ + MOV64(b02, c0); \ + MOV64(b12, c1); \ + MOV64(b22, c2); \ + MOV64(b32, c3); \ + MOV64(b42, c4); \ + NOT64(bnn, b33); \ + KHI_XA(c0, b03, b13, b23); \ + KHI_XO(c1, b13, b23, b33); \ + KHI_XO(c2, b23, bnn, b43); \ + KHI_XA(c3, bnn, b43, b03); \ + KHI_XO(c4, b43, b03, b13); \ + MOV64(b03, c0); \ + MOV64(b13, c1); \ + MOV64(b23, c2); \ + MOV64(b33, c3); \ + MOV64(b43, c4); \ + NOT64(bnn, b14); \ + KHI_XA(c0, b04, bnn, b24); \ + KHI_XO(c1, bnn, b24, b34); \ + KHI_XA(c2, b24, b34, b44); \ + KHI_XO(c3, b34, b44, b04); \ + KHI_XA(c4, b44, b04, b14); \ + MOV64(b04, c0); \ + MOV64(b14, c1); \ + MOV64(b24, c2); \ + MOV64(b34, c3); \ + MOV64(b44, c4); \ + } while (0) + +#define IOTA(r) XOR64_IOTA(a00, a00, r) + +#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \ + a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44 +#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \ + a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14 +#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \ + a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31 +#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \ + a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13 +#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \ + a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01 +#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \ + a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30 +#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \ + a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33 +#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \ + a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23 +#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \ + a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12 +#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \ + a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21 +#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \ + a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02 +#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \ + a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10 +#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \ + a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11 +#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \ + a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41 +#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \ + a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24 +#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \ + a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42 +#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \ + a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04 +#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \ + a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20 +#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \ + a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22 +#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \ + a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32 +#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \ + a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43 +#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \ + a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34 +#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \ + a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03 +#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \ + a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40 + +#define P1_TO_P0 do { \ + DECL64(t); \ + MOV64(t, a01); \ + MOV64(a01, a30); \ + MOV64(a30, a33); \ + MOV64(a33, a23); \ + MOV64(a23, a12); \ + MOV64(a12, a21); \ + MOV64(a21, a02); \ + MOV64(a02, a10); \ + MOV64(a10, a11); \ + MOV64(a11, a41); \ + MOV64(a41, a24); \ + MOV64(a24, a42); \ + MOV64(a42, a04); \ + MOV64(a04, a20); \ + MOV64(a20, a22); \ + MOV64(a22, a32); \ + MOV64(a32, a43); \ + MOV64(a43, a34); \ + MOV64(a34, a03); \ + MOV64(a03, a40); \ + MOV64(a40, a44); \ + MOV64(a44, a14); \ + MOV64(a14, a31); \ + MOV64(a31, a13); \ + MOV64(a13, t); \ + } while (0) + +#define P2_TO_P0 do { \ + DECL64(t); \ + MOV64(t, a01); \ + MOV64(a01, a33); \ + MOV64(a33, a12); \ + MOV64(a12, a02); \ + MOV64(a02, a11); \ + MOV64(a11, a24); \ + MOV64(a24, a04); \ + MOV64(a04, a22); \ + MOV64(a22, a43); \ + MOV64(a43, a03); \ + MOV64(a03, a44); \ + MOV64(a44, a31); \ + MOV64(a31, t); \ + MOV64(t, a10); \ + MOV64(a10, a41); \ + MOV64(a41, a42); \ + MOV64(a42, a20); \ + MOV64(a20, a32); \ + MOV64(a32, a34); \ + MOV64(a34, a40); \ + MOV64(a40, a14); \ + MOV64(a14, a13); \ + MOV64(a13, a30); \ + MOV64(a30, a23); \ + MOV64(a23, a21); \ + MOV64(a21, t); \ + } while (0) + +#define P4_TO_P0 do { \ + DECL64(t); \ + MOV64(t, a01); \ + MOV64(a01, a12); \ + MOV64(a12, a11); \ + MOV64(a11, a04); \ + MOV64(a04, a43); \ + MOV64(a43, a44); \ + MOV64(a44, t); \ + MOV64(t, a02); \ + MOV64(a02, a24); \ + MOV64(a24, a22); \ + MOV64(a22, a03); \ + MOV64(a03, a31); \ + MOV64(a31, a33); \ + MOV64(a33, t); \ + MOV64(t, a10); \ + MOV64(a10, a42); \ + MOV64(a42, a32); \ + MOV64(a32, a40); \ + MOV64(a40, a13); \ + MOV64(a13, a23); \ + MOV64(a23, t); \ + MOV64(t, a14); \ + MOV64(a14, a30); \ + MOV64(a30, a21); \ + MOV64(a21, a41); \ + MOV64(a41, a20); \ + MOV64(a20, a34); \ + MOV64(a34, t); \ + } while (0) + +#define P6_TO_P0 do { \ + DECL64(t); \ + MOV64(t, a01); \ + MOV64(a01, a02); \ + MOV64(a02, a04); \ + MOV64(a04, a03); \ + MOV64(a03, t); \ + MOV64(t, a10); \ + MOV64(a10, a20); \ + MOV64(a20, a40); \ + MOV64(a40, a30); \ + MOV64(a30, t); \ + MOV64(t, a11); \ + MOV64(a11, a22); \ + MOV64(a22, a44); \ + MOV64(a44, a33); \ + MOV64(a33, t); \ + MOV64(t, a12); \ + MOV64(a12, a24); \ + MOV64(a24, a43); \ + MOV64(a43, a31); \ + MOV64(a31, t); \ + MOV64(t, a13); \ + MOV64(a13, a21); \ + MOV64(a21, a42); \ + MOV64(a42, a34); \ + MOV64(a34, t); \ + MOV64(t, a14); \ + MOV64(a14, a23); \ + MOV64(a23, a41); \ + MOV64(a41, a32); \ + MOV64(a32, t); \ + } while (0) + +#define P8_TO_P0 do { \ + DECL64(t); \ + MOV64(t, a01); \ + MOV64(a01, a11); \ + MOV64(a11, a43); \ + MOV64(a43, t); \ + MOV64(t, a02); \ + MOV64(a02, a22); \ + MOV64(a22, a31); \ + MOV64(a31, t); \ + MOV64(t, a03); \ + MOV64(a03, a33); \ + MOV64(a33, a24); \ + MOV64(a24, t); \ + MOV64(t, a04); \ + MOV64(a04, a44); \ + MOV64(a44, a12); \ + MOV64(a12, t); \ + MOV64(t, a10); \ + MOV64(a10, a32); \ + MOV64(a32, a13); \ + MOV64(a13, t); \ + MOV64(t, a14); \ + MOV64(a14, a21); \ + MOV64(a21, a20); \ + MOV64(a20, t); \ + MOV64(t, a23); \ + MOV64(a23, a42); \ + MOV64(a42, a40); \ + MOV64(a40, t); \ + MOV64(t, a30); \ + MOV64(a30, a41); \ + MOV64(a41, a34); \ + MOV64(a34, t); \ + } while (0) + +#define P12_TO_P0 do { \ + DECL64(t); \ + MOV64(t, a01); \ + MOV64(a01, a04); \ + MOV64(a04, t); \ + MOV64(t, a02); \ + MOV64(a02, a03); \ + MOV64(a03, t); \ + MOV64(t, a10); \ + MOV64(a10, a40); \ + MOV64(a40, t); \ + MOV64(t, a11); \ + MOV64(a11, a44); \ + MOV64(a44, t); \ + MOV64(t, a12); \ + MOV64(a12, a43); \ + MOV64(a43, t); \ + MOV64(t, a13); \ + MOV64(a13, a42); \ + MOV64(a42, t); \ + MOV64(t, a14); \ + MOV64(a14, a41); \ + MOV64(a41, t); \ + MOV64(t, a20); \ + MOV64(a20, a30); \ + MOV64(a30, t); \ + MOV64(t, a21); \ + MOV64(a21, a34); \ + MOV64(a34, t); \ + MOV64(t, a22); \ + MOV64(a22, a33); \ + MOV64(a33, t); \ + MOV64(t, a23); \ + MOV64(a23, a32); \ + MOV64(a32, t); \ + MOV64(t, a24); \ + MOV64(a24, a31); \ + MOV64(a31, t); \ + } while (0) + +#define LPAR ( +#define RPAR ) + +#define KF_ELT(r, s, k) do { \ + THETA LPAR P ## r RPAR; \ + RHO LPAR P ## r RPAR; \ + KHI LPAR P ## s RPAR; \ + IOTA(k); \ + } while (0) + +#define DO(x) x + +#define KECCAK_F_1600 DO(KECCAK_F_1600_) + +#if SPH_KECCAK_UNROLL == 1 + +#define KECCAK_F_1600_ do { \ + int j; \ + for (j = 0; j < 24; j ++) { \ + KF_ELT( 0, 1, RC[j + 0]); \ + P1_TO_P0; \ + } \ + } while (0) + +#elif SPH_KECCAK_UNROLL == 2 + +#define KECCAK_F_1600_ do { \ + int j; \ + for (j = 0; j < 24; j += 2) { \ + KF_ELT( 0, 1, RC[j + 0]); \ + KF_ELT( 1, 2, RC[j + 1]); \ + P2_TO_P0; \ + } \ + } while (0) + +#elif SPH_KECCAK_UNROLL == 4 + +#define KECCAK_F_1600_ do { \ + int j; \ + for (j = 0; j < 24; j += 4) { \ + KF_ELT( 0, 1, RC[j + 0]); \ + KF_ELT( 1, 2, RC[j + 1]); \ + KF_ELT( 2, 3, RC[j + 2]); \ + KF_ELT( 3, 4, RC[j + 3]); \ + P4_TO_P0; \ + } \ + } while (0) + +#elif SPH_KECCAK_UNROLL == 6 + +#define KECCAK_F_1600_ do { \ + int j; \ + for (j = 0; j < 24; j += 6) { \ + KF_ELT( 0, 1, RC[j + 0]); \ + KF_ELT( 1, 2, RC[j + 1]); \ + KF_ELT( 2, 3, RC[j + 2]); \ + KF_ELT( 3, 4, RC[j + 3]); \ + KF_ELT( 4, 5, RC[j + 4]); \ + KF_ELT( 5, 6, RC[j + 5]); \ + P6_TO_P0; \ + } \ + } while (0) + +#elif SPH_KECCAK_UNROLL == 8 + +#define KECCAK_F_1600_ do { \ + int j; \ + for (j = 0; j < 24; j += 8) { \ + KF_ELT( 0, 1, RC[j + 0]); \ + KF_ELT( 1, 2, RC[j + 1]); \ + KF_ELT( 2, 3, RC[j + 2]); \ + KF_ELT( 3, 4, RC[j + 3]); \ + KF_ELT( 4, 5, RC[j + 4]); \ + KF_ELT( 5, 6, RC[j + 5]); \ + KF_ELT( 6, 7, RC[j + 6]); \ + KF_ELT( 7, 8, RC[j + 7]); \ + P8_TO_P0; \ + } \ + } while (0) + +#elif SPH_KECCAK_UNROLL == 12 + +#define KECCAK_F_1600_ do { \ + int j; \ + for (j = 0; j < 24; j += 12) { \ + KF_ELT( 0, 1, RC[j + 0]); \ + KF_ELT( 1, 2, RC[j + 1]); \ + KF_ELT( 2, 3, RC[j + 2]); \ + KF_ELT( 3, 4, RC[j + 3]); \ + KF_ELT( 4, 5, RC[j + 4]); \ + KF_ELT( 5, 6, RC[j + 5]); \ + KF_ELT( 6, 7, RC[j + 6]); \ + KF_ELT( 7, 8, RC[j + 7]); \ + KF_ELT( 8, 9, RC[j + 8]); \ + KF_ELT( 9, 10, RC[j + 9]); \ + KF_ELT(10, 11, RC[j + 10]); \ + KF_ELT(11, 12, RC[j + 11]); \ + P12_TO_P0; \ + } \ + } while (0) + +#elif SPH_KECCAK_UNROLL == 0 + +#define KECCAK_F_1600_ do { \ + KF_ELT( 0, 1, RC[ 0]); \ + KF_ELT( 1, 2, RC[ 1]); \ + KF_ELT( 2, 3, RC[ 2]); \ + KF_ELT( 3, 4, RC[ 3]); \ + KF_ELT( 4, 5, RC[ 4]); \ + KF_ELT( 5, 6, RC[ 5]); \ + KF_ELT( 6, 7, RC[ 6]); \ + KF_ELT( 7, 8, RC[ 7]); \ + KF_ELT( 8, 9, RC[ 8]); \ + KF_ELT( 9, 10, RC[ 9]); \ + KF_ELT(10, 11, RC[10]); \ + KF_ELT(11, 12, RC[11]); \ + KF_ELT(12, 13, RC[12]); \ + KF_ELT(13, 14, RC[13]); \ + KF_ELT(14, 15, RC[14]); \ + KF_ELT(15, 16, RC[15]); \ + KF_ELT(16, 17, RC[16]); \ + KF_ELT(17, 18, RC[17]); \ + KF_ELT(18, 19, RC[18]); \ + KF_ELT(19, 20, RC[19]); \ + KF_ELT(20, 21, RC[20]); \ + KF_ELT(21, 22, RC[21]); \ + KF_ELT(22, 23, RC[22]); \ + KF_ELT(23, 0, RC[23]); \ + } while (0) + +#else + +#error Unimplemented unroll count for Keccak. + +#endif + +static void +keccak_init(sph_keccak_context *kc, unsigned out_size) +{ + int i; + +#if SPH_KECCAK_64 + for (i = 0; i < 25; i ++) + kc->u.wide[i] = 0; + /* + * Initialization for the "lane complement". + */ + kc->u.wide[ 1] = SPH_C64(0xFFFFFFFFFFFFFFFF); + kc->u.wide[ 2] = SPH_C64(0xFFFFFFFFFFFFFFFF); + kc->u.wide[ 8] = SPH_C64(0xFFFFFFFFFFFFFFFF); + kc->u.wide[12] = SPH_C64(0xFFFFFFFFFFFFFFFF); + kc->u.wide[17] = SPH_C64(0xFFFFFFFFFFFFFFFF); + kc->u.wide[20] = SPH_C64(0xFFFFFFFFFFFFFFFF); +#else + + for (i = 0; i < 50; i ++) + kc->u.narrow[i] = 0; + /* + * Initialization for the "lane complement". + * Note: since we set to all-one full 64-bit words, + * interleaving (if applicable) is a no-op. + */ + kc->u.narrow[ 2] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[ 3] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[ 4] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[ 5] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[16] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[17] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[24] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[25] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[34] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[35] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[40] = SPH_C32(0xFFFFFFFF); + kc->u.narrow[41] = SPH_C32(0xFFFFFFFF); +#endif + kc->ptr = 0; + kc->lim = 200 - (out_size >> 2); +} + +static void +keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim) +{ + unsigned char *buf; + size_t ptr; + DECL_STATE + + buf = kc->buf; + ptr = kc->ptr; + + if (len < (lim - ptr)) { + memcpy(buf + ptr, data, len); + kc->ptr = ptr + len; + return; + } + + READ_STATE(kc); + while (len > 0) { + size_t clen; + + clen = (lim - ptr); + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data = (const unsigned char *)data + clen; + len -= clen; + if (ptr == lim) { + INPUT_BUF(lim); + KECCAK_F_1600; + ptr = 0; + } + } + WRITE_STATE(kc); + kc->ptr = ptr; +} + +#if SPH_KECCAK_64 + +#define DEFCLOSE(d, lim) \ + static void keccak_close ## d( \ + sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \ + { \ + unsigned eb; \ + union { \ + unsigned char tmp[lim + 1]; \ + sph_u64 dummy; /* for alignment */ \ + } u; \ + size_t j; \ + \ + eb = (0x100 | (ub & 0xFF)) >> (8 - n); \ + if (kc->ptr == (lim - 1)) { \ + if (n == 7) { \ + u.tmp[0] = eb; \ + memset(u.tmp + 1, 0, lim - 1); \ + u.tmp[lim] = 0x80; \ + j = 1 + lim; \ + } else { \ + u.tmp[0] = eb | 0x80; \ + j = 1; \ + } \ + } else { \ + j = lim - kc->ptr; \ + u.tmp[0] = eb; \ + memset(u.tmp + 1, 0, j - 2); \ + u.tmp[j - 1] = 0x80; \ + } \ + keccak_core(kc, u.tmp, j, lim); \ + /* Finalize the "lane complement" */ \ + kc->u.wide[ 1] = ~kc->u.wide[ 1]; \ + kc->u.wide[ 2] = ~kc->u.wide[ 2]; \ + kc->u.wide[ 8] = ~kc->u.wide[ 8]; \ + kc->u.wide[12] = ~kc->u.wide[12]; \ + kc->u.wide[17] = ~kc->u.wide[17]; \ + kc->u.wide[20] = ~kc->u.wide[20]; \ + for (j = 0; j < d; j += 8) \ + sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \ + memcpy(dst, u.tmp, d); \ + keccak_init(kc, (unsigned)d << 3); \ + } \ + +#else + +#define DEFCLOSE(d, lim) \ + static void keccak_close ## d( \ + sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \ + { \ + unsigned eb; \ + union { \ + unsigned char tmp[lim + 1]; \ + sph_u64 dummy; /* for alignment */ \ + } u; \ + size_t j; \ + \ + eb = (0x100 | (ub & 0xFF)) >> (8 - n); \ + if (kc->ptr == (lim - 1)) { \ + if (n == 7) { \ + u.tmp[0] = eb; \ + memset(u.tmp + 1, 0, lim - 1); \ + u.tmp[lim] = 0x80; \ + j = 1 + lim; \ + } else { \ + u.tmp[0] = eb | 0x80; \ + j = 1; \ + } \ + } else { \ + j = lim - kc->ptr; \ + u.tmp[0] = eb; \ + memset(u.tmp + 1, 0, j - 2); \ + u.tmp[j - 1] = 0x80; \ + } \ + keccak_core(kc, u.tmp, j, lim); \ + /* Finalize the "lane complement" */ \ + kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \ + kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \ + kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \ + kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \ + kc->u.narrow[16] = ~kc->u.narrow[16]; \ + kc->u.narrow[17] = ~kc->u.narrow[17]; \ + kc->u.narrow[24] = ~kc->u.narrow[24]; \ + kc->u.narrow[25] = ~kc->u.narrow[25]; \ + kc->u.narrow[34] = ~kc->u.narrow[34]; \ + kc->u.narrow[35] = ~kc->u.narrow[35]; \ + kc->u.narrow[40] = ~kc->u.narrow[40]; \ + kc->u.narrow[41] = ~kc->u.narrow[41]; \ + /* un-interleave */ \ + for (j = 0; j < 50; j += 2) \ + UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \ + for (j = 0; j < d; j += 4) \ + sph_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \ + memcpy(dst, u.tmp, d); \ + keccak_init(kc, (unsigned)d << 3); \ + } \ + +#endif + +DEFCLOSE(28, 144) +DEFCLOSE(32, 136) +DEFCLOSE(48, 104) +DEFCLOSE(64, 72) + +/* see sph_keccak.h */ +void +sph_keccak224_init(void *cc) +{ + keccak_init(cc, 224); +} + +/* see sph_keccak.h */ +void +sph_keccak224(void *cc, const void *data, size_t len) +{ + keccak_core(cc, data, len, 144); +} + +/* see sph_keccak.h */ +void +sph_keccak224_close(void *cc, void *dst) +{ + sph_keccak224_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + keccak_close28(cc, ub, n, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak256_init(void *cc) +{ + keccak_init(cc, 256); +} + +/* see sph_keccak.h */ +void +sph_keccak256(void *cc, const void *data, size_t len) +{ + keccak_core(cc, data, len, 136); +} + +/* see sph_keccak.h */ +void +sph_keccak256_close(void *cc, void *dst) +{ + sph_keccak256_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + keccak_close32(cc, ub, n, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak384_init(void *cc) +{ + keccak_init(cc, 384); +} + +/* see sph_keccak.h */ +void +sph_keccak384(void *cc, const void *data, size_t len) +{ + keccak_core(cc, data, len, 104); +} + +/* see sph_keccak.h */ +void +sph_keccak384_close(void *cc, void *dst) +{ + sph_keccak384_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + keccak_close48(cc, ub, n, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak512_init(void *cc) +{ + keccak_init(cc, 512); +} + +/* see sph_keccak.h */ +void +sph_keccak512(void *cc, const void *data, size_t len) +{ + keccak_core(cc, data, len, 72); +} + +/* see sph_keccak.h */ +void +sph_keccak512_close(void *cc, void *dst) +{ + sph_keccak512_addbits_and_close(cc, 0, 0, dst); +} + +/* see sph_keccak.h */ +void +sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + keccak_close64(cc, ub, n, dst); +} + + +#ifdef __cplusplus +} +#endif diff --git a/miner.h b/miner.h new file mode 100644 index 0000000..0a743e7 --- /dev/null +++ b/miner.h @@ -0,0 +1,315 @@ +#ifndef __MINER_H__ +#define __MINER_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cpuminer-config.h" + +#include +#include +#include +#include +#include +#include + +#ifdef WIN32 +#define snprintf(...) _snprintf(__VA_ARGS__) +#define strdup(x) _strdup(x) +#define strncasecmp(x,y,z) _strnicmp(x,y,z) +#define strcasecmp(x,y) _stricmp(x,y) +typedef int ssize_t; +#endif + +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_ALLOCA_H +# include +#elif !defined alloca +# ifdef __GNUC__ +# define alloca __builtin_alloca +# elif defined _AIX +# define alloca __alloca +# elif defined _MSC_VER +# include +# define alloca _alloca +# elif !defined HAVE_ALLOCA +# ifdef __cplusplus +extern "C" +# endif +void *alloca (size_t); +# endif +#endif + +#ifdef HAVE_SYSLOG_H +#include +#else +enum { + LOG_ERR, + LOG_WARNING, + LOG_NOTICE, + LOG_INFO, + LOG_DEBUG, +}; +#endif + +#undef unlikely +#undef likely +#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) +#define unlikely(expr) (__builtin_expect(!!(expr), 0)) +#define likely(expr) (__builtin_expect(!!(expr), 1)) +#else +#define unlikely(expr) (expr) +#define likely(expr) (expr) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +#if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +#define WANT_BUILTIN_BSWAP +#else +#define bswap_32(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \ + | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) +#endif + +static inline uint32_t swab32(uint32_t v) +{ +#ifdef WANT_BUILTIN_BSWAP + return __builtin_bswap32(v); +#else + return bswap_32(v); +#endif +} + +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +#if !HAVE_DECL_BE32DEC +static inline uint32_t be32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + + ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); +} +#endif + +#if !HAVE_DECL_LE32DEC +static inline uint32_t le32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + + ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); +} +#endif + +#if !HAVE_DECL_BE32ENC +static inline void be32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[3] = x & 0xff; + p[2] = (x >> 8) & 0xff; + p[1] = (x >> 16) & 0xff; + p[0] = (x >> 24) & 0xff; +} +#endif + +#if !HAVE_DECL_LE32ENC +static inline void le32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; +} +#endif + +#if !HAVE_DECL_BE16DEC +static inline uint16_t be16dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint16_t)(p[1]) + ((uint16_t)(p[0]) << 8)); +} +#endif + +#if !HAVE_DECL_BE16ENC +static inline void be16enc(void *pp, uint16_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[1] = x & 0xff; + p[0] = (x >> 8) & 0xff; +} +#endif + +#if !HAVE_DECL_LE16DEC +static inline uint16_t le16dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint16_t)(p[0]) + ((uint16_t)(p[1]) << 8)); +} +#endif + +#if !HAVE_DECL_LE16ENC +static inline void le16enc(void *pp, uint16_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; +} +#endif + +#if JANSSON_MAJOR_VERSION >= 2 +#define JSON_LOADS(str, err_ptr) json_loads((str), 0, (err_ptr)) +#else +#define JSON_LOADS(str, err_ptr) json_loads((str), (err_ptr)) +#endif + +#define USER_AGENT PACKAGE_NAME "/" PACKAGE_VERSION + +void sha256_init(uint32_t *state); +void sha256_transform(uint32_t *state, const uint32_t *block, int swap); +void sha256d(unsigned char *hash, const unsigned char *data, int len); + +#if defined(__ARM_NEON__) || defined(__i386__) || defined(__x86_64__) +#define HAVE_SHA256_4WAY 0 +int sha256_use_4way(); +void sha256_init_4way(uint32_t *state); +void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); +#endif + +#if defined(__x86_64__) && defined(USE_AVX2) +#define HAVE_SHA256_8WAY 0 +int sha256_use_8way(); +void sha256_init_8way(uint32_t *state); +void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap); +#endif + +extern int scanhash_sha256d(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); + +extern unsigned char *scrypt_buffer_alloc(); + +extern int scanhash_scrypt(int thr_id, uint32_t *pdata, + unsigned char *scratchbuf, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done); + +extern int scanhash_heavy(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote); + +extern int scanhash_fugue256(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); + +struct thr_info { + int id; + pthread_t pth; + struct thread_q *q; +}; + +struct work_restart { + volatile unsigned long restart; + char padding[128 - sizeof(unsigned long)]; +}; + +extern bool opt_debug; +extern bool opt_protocol; +extern int opt_timeout; +extern bool want_longpoll; +extern bool have_longpoll; +extern bool want_stratum; +extern bool have_stratum; +extern char *opt_cert; +extern char *opt_proxy; +extern long opt_proxy_type; +extern bool use_syslog; +extern pthread_mutex_t applog_lock; +extern struct thr_info *thr_info; +extern int longpoll_thr_id; +extern int stratum_thr_id; +extern struct work_restart *work_restart; +extern bool opt_trust_pool; +extern uint16_t opt_vote; + +extern void applog(int prio, const char *fmt, ...); +extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass, + const char *rpc_req, bool, bool, int *); +extern char *bin2hex(const unsigned char *p, size_t len); +extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len); +extern int timeval_subtract(struct timeval *result, struct timeval *x, + struct timeval *y); +extern bool fulltest(const uint32_t *hash, const uint32_t *target); +extern void diff_to_target(uint32_t *target, double diff); + +struct stratum_job { + char *job_id; + unsigned char prevhash[32]; + size_t coinbase_size; + unsigned char *coinbase; + unsigned char *xnonce2; + int merkle_count; + unsigned char **merkle; + unsigned char version[4]; + unsigned char nbits[4]; + unsigned char ntime[4]; + bool clean; + unsigned char nreward[2]; + double diff; +}; + +struct stratum_ctx { + char *url; + + CURL *curl; + char *curl_url; + char curl_err_str[CURL_ERROR_SIZE]; + curl_socket_t sock; + size_t sockbuf_size; + char *sockbuf; + pthread_mutex_t sock_lock; + + double next_diff; + + char *session_id; + size_t xnonce1_size; + unsigned char *xnonce1; + size_t xnonce2_size; + struct stratum_job job; + pthread_mutex_t work_lock; +}; + +bool stratum_socket_full(struct stratum_ctx *sctx, int timeout); +bool stratum_send_line(struct stratum_ctx *sctx, char *s); +char *stratum_recv_line(struct stratum_ctx *sctx); +bool stratum_connect(struct stratum_ctx *sctx, const char *url); +void stratum_disconnect(struct stratum_ctx *sctx); +bool stratum_subscribe(struct stratum_ctx *sctx); +bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass); +bool stratum_handle_method(struct stratum_ctx *sctx, const char *s); + +struct thread_q; + +extern struct thread_q *tq_new(void); +extern void tq_free(struct thread_q *tq); +extern bool tq_push(struct thread_q *tq, void *data); +extern void *tq_pop(struct thread_q *tq, const struct timespec *abstime); +extern void tq_freeze(struct thread_q *tq); +extern void tq_thaw(struct thread_q *tq); + +#ifdef __cplusplus +} +#endif + +#endif /* __MINER_H__ */ diff --git a/missing b/missing new file mode 100644 index 0000000..1c8ff70 --- /dev/null +++ b/missing @@ -0,0 +1,367 @@ +#! /bin/sh +# Common stub for a few missing GNU programs while installing. + +scriptversion=2006-05-10.23 + +# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006 +# Free Software Foundation, Inc. +# Originally by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 +fi + +run=: +sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' +sed_minuso='s/.* -o \([^ ]*\).*/\1/p' + +# In the cases where this matters, `missing' is being run in the +# srcdir already. +if test -f configure.ac; then + configure_ac=configure.ac +else + configure_ac=configure.in +fi + +msg="missing on your system" + +case $1 in +--run) + # Try to run requested program, and just exit if it succeeds. + run= + shift + "$@" && exit 0 + # Exit code 63 means version mismatch. This often happens + # when the user try to use an ancient version of a tool on + # a file that requires a minimum version. In this case we + # we should proceed has if the program had been absent, or + # if --run hadn't been passed. + if test $? = 63; then + run=: + msg="probably too old" + fi + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an +error status if there is no known handling for PROGRAM. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + --run try to run the given command, and emulate it if it fails + +Supported PROGRAM values: + aclocal touch file \`aclocal.m4' + autoconf touch file \`configure' + autoheader touch file \`config.h.in' + autom4te touch the output file, or create a stub one + automake touch all \`Makefile.in' files + bison create \`y.tab.[ch]', if possible, from existing .[ch] + flex create \`lex.yy.c', if possible, from existing .c + help2man touch the output file + lex create \`lex.yy.c', if possible, from existing .c + makeinfo touch the output file + tar try tar, gnutar, gtar, then tar without non-portable flags + yacc create \`y.tab.[ch]', if possible, from existing .[ch] + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: Unknown \`$1' option" + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 + ;; + +esac + +# Now exit if we have it, but it failed. Also exit now if we +# don't have it and --version was passed (most likely to detect +# the program). +case $1 in + lex|yacc) + # Not GNU programs, they don't have --version. + ;; + + tar) + if test -n "$run"; then + echo 1>&2 "ERROR: \`tar' requires --run" + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + exit 1 + fi + ;; + + *) + if test -z "$run" && ($1 --version) > /dev/null 2>&1; then + # We have it, but it failed. + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + # Could not run --version or --help. This is probably someone + # running `$TOOL --version' or `$TOOL --help' to check whether + # $TOOL exists and not knowing $TOOL uses missing. + exit 1 + fi + ;; +esac + +# If it does not exist, or fails to run (possibly an outdated version), +# try to emulate it. +case $1 in + aclocal*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acinclude.m4' or \`${configure_ac}'. You might want + to install the \`Automake' and \`Perl' packages. Grab them from + any GNU archive site." + touch aclocal.m4 + ;; + + autoconf) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`${configure_ac}'. You might want to install the + \`Autoconf' and \`GNU m4' packages. Grab them from any GNU + archive site." + touch configure + ;; + + autoheader) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acconfig.h' or \`${configure_ac}'. You might want + to install the \`Autoconf' and \`GNU m4' packages. Grab them + from any GNU archive site." + files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` + test -z "$files" && files="config.h" + touch_files= + for f in $files; do + case $f in + *:*) touch_files="$touch_files "`echo "$f" | + sed -e 's/^[^:]*://' -e 's/:.*//'`;; + *) touch_files="$touch_files $f.in";; + esac + done + touch $touch_files + ;; + + automake*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. + You might want to install the \`Automake' and \`Perl' packages. + Grab them from any GNU archive site." + find . -type f -name Makefile.am -print | + sed 's/\.am$/.in/' | + while read f; do touch "$f"; done + ;; + + autom4te) + echo 1>&2 "\ +WARNING: \`$1' is needed, but is $msg. + You might have modified some files without having the + proper tools for further handling them. + You can get \`$1' as part of \`Autoconf' from any GNU + archive site." + + file=`echo "$*" | sed -n "$sed_output"` + test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` + if test -f "$file"; then + touch $file + else + test -z "$file" || exec >$file + echo "#! /bin/sh" + echo "# Created by GNU Automake missing as a replacement of" + echo "# $ $@" + echo "exit 0" + chmod +x $file + exit 1 + fi + ;; + + bison|yacc) + echo 1>&2 "\ +WARNING: \`$1' $msg. You should only need it if + you modified a \`.y' file. You may need the \`Bison' package + in order for those modifications to take effect. You can get + \`Bison' from any GNU archive site." + rm -f y.tab.c y.tab.h + if test $# -ne 1; then + eval LASTARG="\${$#}" + case $LASTARG in + *.y) + SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` + if test -f "$SRCFILE"; then + cp "$SRCFILE" y.tab.c + fi + SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` + if test -f "$SRCFILE"; then + cp "$SRCFILE" y.tab.h + fi + ;; + esac + fi + if test ! -f y.tab.h; then + echo >y.tab.h + fi + if test ! -f y.tab.c; then + echo 'main() { return 0; }' >y.tab.c + fi + ;; + + lex|flex) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.l' file. You may need the \`Flex' package + in order for those modifications to take effect. You can get + \`Flex' from any GNU archive site." + rm -f lex.yy.c + if test $# -ne 1; then + eval LASTARG="\${$#}" + case $LASTARG in + *.l) + SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` + if test -f "$SRCFILE"; then + cp "$SRCFILE" lex.yy.c + fi + ;; + esac + fi + if test ! -f lex.yy.c; then + echo 'main() { return 0; }' >lex.yy.c + fi + ;; + + help2man) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a dependency of a manual page. You may need the + \`Help2man' package in order for those modifications to take + effect. You can get \`Help2man' from any GNU archive site." + + file=`echo "$*" | sed -n "$sed_output"` + test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` + if test -f "$file"; then + touch $file + else + test -z "$file" || exec >$file + echo ".ab help2man is required to generate this page" + exit 1 + fi + ;; + + makeinfo) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.texi' or \`.texinfo' file, or any other file + indirectly affecting the aspect of the manual. The spurious + call might also be the consequence of using a buggy \`make' (AIX, + DU, IRIX). You might want to install the \`Texinfo' package or + the \`GNU make' package. Grab either from any GNU archive site." + # The file to touch is that specified with -o ... + file=`echo "$*" | sed -n "$sed_output"` + test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` + if test -z "$file"; then + # ... or it is the one specified with @setfilename ... + infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` + file=`sed -n ' + /^@setfilename/{ + s/.* \([^ ]*\) *$/\1/ + p + q + }' $infile` + # ... or it is derived from the source name (dir/f.texi becomes f.info) + test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info + fi + # If the file does not exist, the user really needs makeinfo; + # let's fail without touching anything. + test -f $file || exit 1 + touch $file + ;; + + tar) + shift + + # We have already tried tar in the generic part. + # Look for gnutar/gtar before invocation to avoid ugly error + # messages. + if (gnutar --version > /dev/null 2>&1); then + gnutar "$@" && exit 0 + fi + if (gtar --version > /dev/null 2>&1); then + gtar "$@" && exit 0 + fi + firstarg="$1" + if shift; then + case $firstarg in + *o*) + firstarg=`echo "$firstarg" | sed s/o//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + case $firstarg in + *h*) + firstarg=`echo "$firstarg" | sed s/h//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + fi + + echo 1>&2 "\ +WARNING: I can't seem to be able to run \`tar' with the given arguments. + You may want to install GNU tar or Free paxutils, or check the + command line arguments." + exit 1 + ;; + + *) + echo 1>&2 "\ +WARNING: \`$1' is needed, and is $msg. + You might have modified some files without having the + proper tools for further handling them. Check the \`README' file, + it often tells you about the needed prerequisites for installing + this package. You may also peek at any GNU archive site, in case + some other package would contain this missing \`$1' program." + exit 1 + ;; +esac + +exit 0 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/scrypt.c b/scrypt.c new file mode 100644 index 0000000..5efd0e2 --- /dev/null +++ b/scrypt.c @@ -0,0 +1,756 @@ +/* + * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "cpuminer-config.h" +#include "miner.h" + +#include +#include +#include + +static const uint32_t keypad[12] = { + 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280 +}; +static const uint32_t innerpad[11] = { + 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0 +}; +static const uint32_t outerpad[8] = { + 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300 +}; +static const uint32_t finalblk[16] = { + 0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620 +}; + +static inline void HMAC_SHA256_80_init(const uint32_t *key, + uint32_t *tstate, uint32_t *ostate) +{ + uint32_t ihash[8]; + uint32_t pad[16]; + int i; + + /* tstate is assumed to contain the midstate of key */ + memcpy(pad, key + 16, 16); + memcpy(pad + 4, keypad, 48); + sha256_transform(tstate, pad, 0); + memcpy(ihash, tstate, 32); + + sha256_init(ostate); + for (i = 0; i < 8; i++) + pad[i] = ihash[i] ^ 0x5c5c5c5c; + for (; i < 16; i++) + pad[i] = 0x5c5c5c5c; + sha256_transform(ostate, pad, 0); + + sha256_init(tstate); + for (i = 0; i < 8; i++) + pad[i] = ihash[i] ^ 0x36363636; + for (; i < 16; i++) + pad[i] = 0x36363636; + sha256_transform(tstate, pad, 0); +} + +static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate, + const uint32_t *ostate, const uint32_t *salt, uint32_t *output) +{ + uint32_t istate[8], ostate2[8]; + uint32_t ibuf[16], obuf[16]; + int i, j; + + memcpy(istate, tstate, 32); + sha256_transform(istate, salt, 0); + + memcpy(ibuf, salt + 16, 16); + memcpy(ibuf + 5, innerpad, 44); + memcpy(obuf + 8, outerpad, 32); + + for (i = 0; i < 4; i++) { + memcpy(obuf, istate, 32); + ibuf[4] = i + 1; + sha256_transform(obuf, ibuf, 0); + + memcpy(ostate2, ostate, 32); + sha256_transform(ostate2, obuf, 0); + for (j = 0; j < 8; j++) + output[8 * i + j] = swab32(ostate2[j]); + } +} + +static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate, + const uint32_t *salt, uint32_t *output) +{ + uint32_t buf[16]; + int i; + + sha256_transform(tstate, salt, 1); + sha256_transform(tstate, salt + 16, 1); + sha256_transform(tstate, finalblk, 0); + memcpy(buf, tstate, 32); + memcpy(buf + 8, outerpad, 32); + + sha256_transform(ostate, buf, 0); + for (i = 0; i < 8; i++) + output[i] = swab32(ostate[i]); +} + + +#if HAVE_SHA256_4WAY + +static const uint32_t keypad_4way[4 * 12] = { + 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000280, 0x00000280, 0x00000280, 0x00000280 +}; +static const uint32_t innerpad_4way[4 * 11] = { + 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0 +}; +static const uint32_t outerpad_4way[4 * 8] = { + 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000300, 0x00000300, 0x00000300, 0x00000300 +}; +static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = { + 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000620, 0x00000620, 0x00000620, 0x00000620 +}; + +static inline void HMAC_SHA256_80_init_4way(const uint32_t *key, + uint32_t *tstate, uint32_t *ostate) +{ + uint32_t ihash[4 * 8] __attribute__((aligned(16))); + uint32_t pad[4 * 16] __attribute__((aligned(16))); + int i; + + /* tstate is assumed to contain the midstate of key */ + memcpy(pad, key + 4 * 16, 4 * 16); + memcpy(pad + 4 * 4, keypad_4way, 4 * 48); + sha256_transform_4way(tstate, pad, 0); + memcpy(ihash, tstate, 4 * 32); + + sha256_init_4way(ostate); + for (i = 0; i < 4 * 8; i++) + pad[i] = ihash[i] ^ 0x5c5c5c5c; + for (; i < 4 * 16; i++) + pad[i] = 0x5c5c5c5c; + sha256_transform_4way(ostate, pad, 0); + + sha256_init_4way(tstate); + for (i = 0; i < 4 * 8; i++) + pad[i] = ihash[i] ^ 0x36363636; + for (; i < 4 * 16; i++) + pad[i] = 0x36363636; + sha256_transform_4way(tstate, pad, 0); +} + +static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate, + const uint32_t *ostate, const uint32_t *salt, uint32_t *output) +{ + uint32_t istate[4 * 8] __attribute__((aligned(16))); + uint32_t ostate2[4 * 8] __attribute__((aligned(16))); + uint32_t ibuf[4 * 16] __attribute__((aligned(16))); + uint32_t obuf[4 * 16] __attribute__((aligned(16))); + int i, j; + + memcpy(istate, tstate, 4 * 32); + sha256_transform_4way(istate, salt, 0); + + memcpy(ibuf, salt + 4 * 16, 4 * 16); + memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44); + memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32); + + for (i = 0; i < 4; i++) { + memcpy(obuf, istate, 4 * 32); + ibuf[4 * 4 + 0] = i + 1; + ibuf[4 * 4 + 1] = i + 1; + ibuf[4 * 4 + 2] = i + 1; + ibuf[4 * 4 + 3] = i + 1; + sha256_transform_4way(obuf, ibuf, 0); + + memcpy(ostate2, ostate, 4 * 32); + sha256_transform_4way(ostate2, obuf, 0); + for (j = 0; j < 4 * 8; j++) + output[4 * 8 * i + j] = swab32(ostate2[j]); + } +} + +static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate, + uint32_t *ostate, const uint32_t *salt, uint32_t *output) +{ + uint32_t buf[4 * 16] __attribute__((aligned(16))); + int i; + + sha256_transform_4way(tstate, salt, 1); + sha256_transform_4way(tstate, salt + 4 * 16, 1); + sha256_transform_4way(tstate, finalblk_4way, 0); + memcpy(buf, tstate, 4 * 32); + memcpy(buf + 4 * 8, outerpad_4way, 4 * 32); + + sha256_transform_4way(ostate, buf, 0); + for (i = 0; i < 4 * 8; i++) + output[i] = swab32(ostate[i]); +} + +#endif /* HAVE_SHA256_4WAY */ + + +#if HAVE_SHA256_8WAY + +static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = { + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620 +}; + +static inline void HMAC_SHA256_80_init_8way(const uint32_t *key, + uint32_t *tstate, uint32_t *ostate) +{ + uint32_t ihash[8 * 8] __attribute__((aligned(32))); + uint32_t pad[8 * 16] __attribute__((aligned(32))); + int i; + + /* tstate is assumed to contain the midstate of key */ + memcpy(pad, key + 8 * 16, 8 * 16); + for (i = 0; i < 8; i++) + pad[8 * 4 + i] = 0x80000000; + memset(pad + 8 * 5, 0x00, 8 * 40); + for (i = 0; i < 8; i++) + pad[8 * 15 + i] = 0x00000280; + sha256_transform_8way(tstate, pad, 0); + memcpy(ihash, tstate, 8 * 32); + + sha256_init_8way(ostate); + for (i = 0; i < 8 * 8; i++) + pad[i] = ihash[i] ^ 0x5c5c5c5c; + for (; i < 8 * 16; i++) + pad[i] = 0x5c5c5c5c; + sha256_transform_8way(ostate, pad, 0); + + sha256_init_8way(tstate); + for (i = 0; i < 8 * 8; i++) + pad[i] = ihash[i] ^ 0x36363636; + for (; i < 8 * 16; i++) + pad[i] = 0x36363636; + sha256_transform_8way(tstate, pad, 0); +} + +static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate, + const uint32_t *ostate, const uint32_t *salt, uint32_t *output) +{ + uint32_t istate[8 * 8] __attribute__((aligned(32))); + uint32_t ostate2[8 * 8] __attribute__((aligned(32))); + uint32_t ibuf[8 * 16] __attribute__((aligned(32))); + uint32_t obuf[8 * 16] __attribute__((aligned(32))); + int i, j; + + memcpy(istate, tstate, 8 * 32); + sha256_transform_8way(istate, salt, 0); + + memcpy(ibuf, salt + 8 * 16, 8 * 16); + for (i = 0; i < 8; i++) + ibuf[8 * 5 + i] = 0x80000000; + memset(ibuf + 8 * 6, 0x00, 8 * 36); + for (i = 0; i < 8; i++) + ibuf[8 * 15 + i] = 0x000004a0; + + for (i = 0; i < 8; i++) + obuf[8 * 8 + i] = 0x80000000; + memset(obuf + 8 * 9, 0x00, 8 * 24); + for (i = 0; i < 8; i++) + obuf[8 * 15 + i] = 0x00000300; + + for (i = 0; i < 4; i++) { + memcpy(obuf, istate, 8 * 32); + ibuf[8 * 4 + 0] = i + 1; + ibuf[8 * 4 + 1] = i + 1; + ibuf[8 * 4 + 2] = i + 1; + ibuf[8 * 4 + 3] = i + 1; + ibuf[8 * 4 + 4] = i + 1; + ibuf[8 * 4 + 5] = i + 1; + ibuf[8 * 4 + 6] = i + 1; + ibuf[8 * 4 + 7] = i + 1; + sha256_transform_8way(obuf, ibuf, 0); + + memcpy(ostate2, ostate, 8 * 32); + sha256_transform_8way(ostate2, obuf, 0); + for (j = 0; j < 8 * 8; j++) + output[8 * 8 * i + j] = swab32(ostate2[j]); + } +} + +static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate, + uint32_t *ostate, const uint32_t *salt, uint32_t *output) +{ + uint32_t buf[8 * 16] __attribute__((aligned(32))); + int i; + + sha256_transform_8way(tstate, salt, 1); + sha256_transform_8way(tstate, salt + 8 * 16, 1); + sha256_transform_8way(tstate, finalblk_8way, 0); + + memcpy(buf, tstate, 8 * 32); + for (i = 0; i < 8; i++) + buf[8 * 8 + i] = 0x80000000; + memset(buf + 8 * 9, 0x00, 8 * 24); + for (i = 0; i < 8; i++) + buf[8 * 15 + i] = 0x00000300; + sha256_transform_8way(ostate, buf, 0); + + for (i = 0; i < 8 * 8; i++) + output[i] = swab32(ostate[i]); +} + +#endif /* HAVE_SHA256_8WAY */ + + +#if defined(__x86_64__) + +#define SCRYPT_MAX_WAYS 1 +#define HAVE_SCRYPT_3WAY 0 +#define scrypt_best_throughput() 1 +static void scrypt_core(uint32_t *X, uint32_t *V); +void scrypt_core_3way(uint32_t *X, uint32_t *V); +#if defined(USE_AVX2) +#undef SCRYPT_MAX_WAYS +#define SCRYPT_MAX_WAYS 21 +#define HAVE_SCRYPT_6WAY 0 +void scrypt_core_6way(uint32_t *X, uint32_t *V); +#endif + +#elif defined(__i386__) + +#define SCRYPT_MAX_WAYS 1 +#define scrypt_best_throughput() 1 +static void scrypt_core(uint32_t *X, uint32_t *V); + +#elif defined(__arm__) && defined(__APCS_32__) + +static void scrypt_core(uint32_t *X, uint32_t *V); +#if defined(__ARM_NEON__) +#undef HAVE_SHA256_4WAY +#define SCRYPT_MAX_WAYS 1 +#define HAVE_SCRYPT_3WAY 0 +#define scrypt_best_throughput() 1 +void scrypt_core_3way(uint32_t *X, uint32_t *V); +#endif + +#endif + +static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) +{ + uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; + int i; + + x00 = (B[ 0] ^= Bx[ 0]); + x01 = (B[ 1] ^= Bx[ 1]); + x02 = (B[ 2] ^= Bx[ 2]); + x03 = (B[ 3] ^= Bx[ 3]); + x04 = (B[ 4] ^= Bx[ 4]); + x05 = (B[ 5] ^= Bx[ 5]); + x06 = (B[ 6] ^= Bx[ 6]); + x07 = (B[ 7] ^= Bx[ 7]); + x08 = (B[ 8] ^= Bx[ 8]); + x09 = (B[ 9] ^= Bx[ 9]); + x10 = (B[10] ^= Bx[10]); + x11 = (B[11] ^= Bx[11]); + x12 = (B[12] ^= Bx[12]); + x13 = (B[13] ^= Bx[13]); + x14 = (B[14] ^= Bx[14]); + x15 = (B[15] ^= Bx[15]); + for (i = 0; i < 8; i += 2) { +#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns. */ + x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); + x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7); + + x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); + x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9); + + x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); + x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13); + + x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); + x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18); + + /* Operate on rows. */ + x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); + x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7); + + x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); + x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9); + + x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); + x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13); + + x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); + x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18); +#undef R + } + B[ 0] += x00; + B[ 1] += x01; + B[ 2] += x02; + B[ 3] += x03; + B[ 4] += x04; + B[ 5] += x05; + B[ 6] += x06; + B[ 7] += x07; + B[ 8] += x08; + B[ 9] += x09; + B[10] += x10; + B[11] += x11; + B[12] += x12; + B[13] += x13; + B[14] += x14; + B[15] += x15; +} + +static inline void scrypt_core(uint32_t *X, uint32_t *V) +{ + uint32_t i, j, k; + + for (i = 0; i < 1024; i++) { + memcpy(&V[i * 32], X, 128); + xor_salsa8(&X[0], &X[16]); + xor_salsa8(&X[16], &X[0]); + } + for (i = 0; i < 1024; i++) { + j = 32 * (X[16] & 1023); + for (k = 0; k < 32; k++) + X[k] ^= V[j + k]; + xor_salsa8(&X[0], &X[16]); + xor_salsa8(&X[16], &X[0]); + } +} + +#ifndef SCRYPT_MAX_WAYS +#define SCRYPT_MAX_WAYS 1 +#define scrypt_best_throughput() 1 +#endif + +#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63) + +unsigned char *scrypt_buffer_alloc() +{ + return malloc(SCRYPT_BUFFER_SIZE); +} + +static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output, + uint32_t *midstate, unsigned char *scratchpad) +{ + uint32_t tstate[8], ostate[8]; + uint32_t X[32]; + uint32_t *V; + + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + memcpy(tstate, midstate, 32); + HMAC_SHA256_80_init(input, tstate, ostate); + PBKDF2_SHA256_80_128(tstate, ostate, input, X); + + scrypt_core(X, V); + + PBKDF2_SHA256_128_32(tstate, ostate, X, output); +} + +#if HAVE_SHA256_4WAY +static void scrypt_1024_1_1_256_4way(const uint32_t *input, + uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) +{ + uint32_t tstate[4 * 8] __attribute__((aligned(128))); + uint32_t ostate[4 * 8] __attribute__((aligned(128))); + uint32_t W[4 * 32] __attribute__((aligned(128))); + uint32_t X[4 * 32] __attribute__((aligned(128))); + uint32_t *V; + int i, k; + + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + for (i = 0; i < 20; i++) + for (k = 0; k < 4; k++) + W[4 * i + k] = input[k * 20 + i]; + for (i = 0; i < 8; i++) + for (k = 0; k < 4; k++) + tstate[4 * i + k] = midstate[i]; + HMAC_SHA256_80_init_4way(W, tstate, ostate); + PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W); + for (i = 0; i < 32; i++) + for (k = 0; k < 4; k++) + X[k * 32 + i] = W[4 * i + k]; + scrypt_core(X + 0 * 32, V); + scrypt_core(X + 1 * 32, V); + scrypt_core(X + 2 * 32, V); + scrypt_core(X + 3 * 32, V); + for (i = 0; i < 32; i++) + for (k = 0; k < 4; k++) + W[4 * i + k] = X[k * 32 + i]; + PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W); + for (i = 0; i < 8; i++) + for (k = 0; k < 4; k++) + output[k * 8 + i] = W[4 * i + k]; +} +#endif /* HAVE_SHA256_4WAY */ + +#if HAVE_SCRYPT_3WAY + +static void scrypt_1024_1_1_256_3way(const uint32_t *input, + uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) +{ + uint32_t tstate[3 * 8], ostate[3 * 8]; + uint32_t X[3 * 32] __attribute__((aligned(64))); + uint32_t *V; + + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + memcpy(tstate + 0, midstate, 32); + memcpy(tstate + 8, midstate, 32); + memcpy(tstate + 16, midstate, 32); + HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0); + HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8); + HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16); + PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0); + PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32); + PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64); + + scrypt_core_3way(X, V); + + PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0); + PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8); + PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16); +} + +#if HAVE_SHA256_4WAY +static void scrypt_1024_1_1_256_12way(const uint32_t *input, + uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) +{ + uint32_t tstate[12 * 8] __attribute__((aligned(128))); + uint32_t ostate[12 * 8] __attribute__((aligned(128))); + uint32_t W[12 * 32] __attribute__((aligned(128))); + uint32_t X[12 * 32] __attribute__((aligned(128))); + uint32_t *V; + int i, j, k; + + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + for (j = 0; j < 3; j++) + for (i = 0; i < 20; i++) + for (k = 0; k < 4; k++) + W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i]; + for (j = 0; j < 3; j++) + for (i = 0; i < 8; i++) + for (k = 0; k < 4; k++) + tstate[32 * j + 4 * i + k] = midstate[i]; + HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0); + HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32); + HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64); + PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0); + PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128); + PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256); + for (j = 0; j < 3; j++) + for (i = 0; i < 32; i++) + for (k = 0; k < 4; k++) + X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k]; + scrypt_core_3way(X + 0 * 96, V); + scrypt_core_3way(X + 1 * 96, V); + scrypt_core_3way(X + 2 * 96, V); + scrypt_core_3way(X + 3 * 96, V); + for (j = 0; j < 3; j++) + for (i = 0; i < 32; i++) + for (k = 0; k < 4; k++) + W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i]; + PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0); + PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128); + PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256); + for (j = 0; j < 3; j++) + for (i = 0; i < 8; i++) + for (k = 0; k < 4; k++) + output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k]; +} +#endif /* HAVE_SHA256_4WAY */ + +#endif /* HAVE_SCRYPT_3WAY */ + +#if HAVE_SCRYPT_6WAY +static void scrypt_1024_1_1_256_24way(const uint32_t *input, + uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) +{ + uint32_t tstate[24 * 8] __attribute__((aligned(128))); + uint32_t ostate[24 * 8] __attribute__((aligned(128))); + uint32_t W[24 * 32] __attribute__((aligned(128))); + uint32_t X[24 * 32] __attribute__((aligned(128))); + uint32_t *V; + int i, j, k; + + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + for (j = 0; j < 3; j++) + for (i = 0; i < 20; i++) + for (k = 0; k < 8; k++) + W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i]; + for (j = 0; j < 3; j++) + for (i = 0; i < 8; i++) + for (k = 0; k < 8; k++) + tstate[8 * 8 * j + 8 * i + k] = midstate[i]; + HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0); + HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64); + HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128); + PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0); + PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256); + PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512); + for (j = 0; j < 3; j++) + for (i = 0; i < 32; i++) + for (k = 0; k < 8; k++) + X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k]; + scrypt_core_6way(X + 0 * 32, V); + scrypt_core_6way(X + 6 * 32, V); + scrypt_core_6way(X + 12 * 32, V); + scrypt_core_6way(X + 18 * 32, V); + for (j = 0; j < 3; j++) + for (i = 0; i < 32; i++) + for (k = 0; k < 8; k++) + W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i]; + PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0); + PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256); + PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512); + for (j = 0; j < 3; j++) + for (i = 0; i < 8; i++) + for (k = 0; k < 8; k++) + output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k]; +} +#endif /* HAVE_SCRYPT_6WAY */ + +int scanhash_scrypt(int thr_id, uint32_t *pdata, + unsigned char *scratchbuf, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8]; + uint32_t midstate[8]; + uint32_t n = pdata[19] - 1; + const uint32_t Htarg = ptarget[7]; + int throughput = scrypt_best_throughput(); + int i; + +#if HAVE_SHA256_4WAY + if (sha256_use_4way()) + throughput *= 4; +#endif + + for (i = 0; i < throughput; i++) + memcpy(data + i * 20, pdata, 80); + + sha256_init(midstate); + sha256_transform(midstate, data, 0); + + do { + for (i = 0; i < throughput; i++) + data[i * 20 + 19] = ++n; + +#if defined(HAVE_SHA256_4WAY) + if (throughput == 4) + scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf); + else +#endif +#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY) + if (throughput == 12) + scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf); + else +#endif +#if defined(HAVE_SCRYPT_6WAY) + if (throughput == 24) + scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf); + else +#endif +#if defined(HAVE_SCRYPT_3WAY) + if (throughput == 3) + scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf); + else +#endif + scrypt_1024_1_1_256(data, hash, midstate, scratchbuf); + + for (i = 0; i < throughput; i++) { + if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) { + *hashes_done = n - pdata[19] + 1; + pdata[19] = data[i * 20 + 19]; + return 1; + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - pdata[19] + 1; + pdata[19] = n; + return 0; +} diff --git a/sha2.c b/sha2.c new file mode 100644 index 0000000..4bfdcc7 --- /dev/null +++ b/sha2.c @@ -0,0 +1,630 @@ +/* + * Copyright 2011 ArtForz + * Copyright 2011-2013 pooler + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. See COPYING for more details. + */ + +#include "cpuminer-config.h" +#include "miner.h" + +#include +#include + +#if defined(__arm__) && defined(__APCS_32__) +#define EXTERN_SHA256 +#endif + +static const uint32_t sha256_h[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +static const uint32_t sha256_k[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +void sha256_init(uint32_t *state) +{ + memcpy(state, sha256_h, 32); +} + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + do { \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; \ + } while (0) + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + sha256_k[i]) + +#ifndef EXTERN_SHA256 + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +void sha256_transform(uint32_t *state, const uint32_t *block, int swap) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + + /* 1. Prepare message schedule W. */ + if (swap) { + for (i = 0; i < 16; i++) + W[i] = swab32(block[i]); + } else + memcpy(W, block, 64); + for (i = 16; i < 64; i += 2) { + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15]; + } + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0); + RNDr(S, W, 1); + RNDr(S, W, 2); + RNDr(S, W, 3); + RNDr(S, W, 4); + RNDr(S, W, 5); + RNDr(S, W, 6); + RNDr(S, W, 7); + RNDr(S, W, 8); + RNDr(S, W, 9); + RNDr(S, W, 10); + RNDr(S, W, 11); + RNDr(S, W, 12); + RNDr(S, W, 13); + RNDr(S, W, 14); + RNDr(S, W, 15); + RNDr(S, W, 16); + RNDr(S, W, 17); + RNDr(S, W, 18); + RNDr(S, W, 19); + RNDr(S, W, 20); + RNDr(S, W, 21); + RNDr(S, W, 22); + RNDr(S, W, 23); + RNDr(S, W, 24); + RNDr(S, W, 25); + RNDr(S, W, 26); + RNDr(S, W, 27); + RNDr(S, W, 28); + RNDr(S, W, 29); + RNDr(S, W, 30); + RNDr(S, W, 31); + RNDr(S, W, 32); + RNDr(S, W, 33); + RNDr(S, W, 34); + RNDr(S, W, 35); + RNDr(S, W, 36); + RNDr(S, W, 37); + RNDr(S, W, 38); + RNDr(S, W, 39); + RNDr(S, W, 40); + RNDr(S, W, 41); + RNDr(S, W, 42); + RNDr(S, W, 43); + RNDr(S, W, 44); + RNDr(S, W, 45); + RNDr(S, W, 46); + RNDr(S, W, 47); + RNDr(S, W, 48); + RNDr(S, W, 49); + RNDr(S, W, 50); + RNDr(S, W, 51); + RNDr(S, W, 52); + RNDr(S, W, 53); + RNDr(S, W, 54); + RNDr(S, W, 55); + RNDr(S, W, 56); + RNDr(S, W, 57); + RNDr(S, W, 58); + RNDr(S, W, 59); + RNDr(S, W, 60); + RNDr(S, W, 61); + RNDr(S, W, 62); + RNDr(S, W, 63); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +#endif /* EXTERN_SHA256 */ + + +static const uint32_t sha256d_hash1[16] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x80000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000100 +}; + +static void sha256d_80_swap(uint32_t *hash, const uint32_t *data) +{ + uint32_t S[16]; + int i; + + sha256_init(S); + sha256_transform(S, data, 0); + sha256_transform(S, data + 16, 0); + memcpy(S + 8, sha256d_hash1 + 8, 32); + sha256_init(hash); + sha256_transform(hash, S, 0); + for (i = 0; i < 8; i++) + hash[i] = swab32(hash[i]); +} + +void sha256d(unsigned char *hash, const unsigned char *data, int len) +{ + uint32_t S[16], T[16]; + int i, r; + + sha256_init(S); + for (r = len; r > -9; r -= 64) { + if (r < 64) + memset(T, 0, 64); + memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); + if (r >= 0 && r < 64) + ((unsigned char *)T)[r] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + if (r < 56) + T[15] = 8 * len; + sha256_transform(S, T, 0); + } + memcpy(S + 8, sha256d_hash1 + 8, 32); + sha256_init(T); + sha256_transform(T, S, 0); + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, T[i]); +} + +static inline void sha256d_preextend(uint32_t *W) +{ + W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0]; + W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1]; + W[18] = s1(W[16]) + W[11] + W[ 2]; + W[19] = s1(W[17]) + W[12] + s0(W[ 4]); + W[20] = W[13] + s0(W[ 5]) + W[ 4]; + W[21] = W[14] + s0(W[ 6]) + W[ 5]; + W[22] = W[15] + s0(W[ 7]) + W[ 6]; + W[23] = W[16] + s0(W[ 8]) + W[ 7]; + W[24] = W[17] + s0(W[ 9]) + W[ 8]; + W[25] = s0(W[10]) + W[ 9]; + W[26] = s0(W[11]) + W[10]; + W[27] = s0(W[12]) + W[11]; + W[28] = s0(W[13]) + W[12]; + W[29] = s0(W[14]) + W[13]; + W[30] = s0(W[15]) + W[14]; + W[31] = s0(W[16]) + W[15]; +} + +static inline void sha256d_prehash(uint32_t *S, const uint32_t *W) +{ + uint32_t t0, t1; + RNDr(S, W, 0); + RNDr(S, W, 1); + RNDr(S, W, 2); +} + +#ifdef EXTERN_SHA256 + +void sha256d_ms(uint32_t *hash, uint32_t *W, + const uint32_t *midstate, const uint32_t *prehash); + +#else + +static inline void sha256d_ms(uint32_t *hash, uint32_t *W, + const uint32_t *midstate, const uint32_t *prehash) +{ + uint32_t S[64]; + uint32_t t0, t1; + int i; + + S[18] = W[18]; + S[19] = W[19]; + S[20] = W[20]; + S[22] = W[22]; + S[23] = W[23]; + S[24] = W[24]; + S[30] = W[30]; + S[31] = W[31]; + + W[18] += s0(W[3]); + W[19] += W[3]; + W[20] += s1(W[18]); + W[21] = s1(W[19]); + W[22] += s1(W[20]); + W[23] += s1(W[21]); + W[24] += s1(W[22]); + W[25] = s1(W[23]) + W[18]; + W[26] = s1(W[24]) + W[19]; + W[27] = s1(W[25]) + W[20]; + W[28] = s1(W[26]) + W[21]; + W[29] = s1(W[27]) + W[22]; + W[30] += s1(W[28]) + W[23]; + W[31] += s1(W[29]) + W[24]; + for (i = 32; i < 64; i += 2) { + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15]; + } + + memcpy(S, prehash, 32); + + RNDr(S, W, 3); + RNDr(S, W, 4); + RNDr(S, W, 5); + RNDr(S, W, 6); + RNDr(S, W, 7); + RNDr(S, W, 8); + RNDr(S, W, 9); + RNDr(S, W, 10); + RNDr(S, W, 11); + RNDr(S, W, 12); + RNDr(S, W, 13); + RNDr(S, W, 14); + RNDr(S, W, 15); + RNDr(S, W, 16); + RNDr(S, W, 17); + RNDr(S, W, 18); + RNDr(S, W, 19); + RNDr(S, W, 20); + RNDr(S, W, 21); + RNDr(S, W, 22); + RNDr(S, W, 23); + RNDr(S, W, 24); + RNDr(S, W, 25); + RNDr(S, W, 26); + RNDr(S, W, 27); + RNDr(S, W, 28); + RNDr(S, W, 29); + RNDr(S, W, 30); + RNDr(S, W, 31); + RNDr(S, W, 32); + RNDr(S, W, 33); + RNDr(S, W, 34); + RNDr(S, W, 35); + RNDr(S, W, 36); + RNDr(S, W, 37); + RNDr(S, W, 38); + RNDr(S, W, 39); + RNDr(S, W, 40); + RNDr(S, W, 41); + RNDr(S, W, 42); + RNDr(S, W, 43); + RNDr(S, W, 44); + RNDr(S, W, 45); + RNDr(S, W, 46); + RNDr(S, W, 47); + RNDr(S, W, 48); + RNDr(S, W, 49); + RNDr(S, W, 50); + RNDr(S, W, 51); + RNDr(S, W, 52); + RNDr(S, W, 53); + RNDr(S, W, 54); + RNDr(S, W, 55); + RNDr(S, W, 56); + RNDr(S, W, 57); + RNDr(S, W, 58); + RNDr(S, W, 59); + RNDr(S, W, 60); + RNDr(S, W, 61); + RNDr(S, W, 62); + RNDr(S, W, 63); + + for (i = 0; i < 8; i++) + S[i] += midstate[i]; + + W[18] = S[18]; + W[19] = S[19]; + W[20] = S[20]; + W[22] = S[22]; + W[23] = S[23]; + W[24] = S[24]; + W[30] = S[30]; + W[31] = S[31]; + + memcpy(S + 8, sha256d_hash1 + 8, 32); + S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0]; + S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1]; + S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2]; + S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3]; + S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4]; + S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5]; + S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6]; + S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7]; + S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8]; + S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9]; + S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10]; + S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11]; + S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12]; + S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13]; + S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14]; + S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15]; + for (i = 32; i < 60; i += 2) { + S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16]; + S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15]; + } + S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44]; + + sha256_init(hash); + + RNDr(hash, S, 0); + RNDr(hash, S, 1); + RNDr(hash, S, 2); + RNDr(hash, S, 3); + RNDr(hash, S, 4); + RNDr(hash, S, 5); + RNDr(hash, S, 6); + RNDr(hash, S, 7); + RNDr(hash, S, 8); + RNDr(hash, S, 9); + RNDr(hash, S, 10); + RNDr(hash, S, 11); + RNDr(hash, S, 12); + RNDr(hash, S, 13); + RNDr(hash, S, 14); + RNDr(hash, S, 15); + RNDr(hash, S, 16); + RNDr(hash, S, 17); + RNDr(hash, S, 18); + RNDr(hash, S, 19); + RNDr(hash, S, 20); + RNDr(hash, S, 21); + RNDr(hash, S, 22); + RNDr(hash, S, 23); + RNDr(hash, S, 24); + RNDr(hash, S, 25); + RNDr(hash, S, 26); + RNDr(hash, S, 27); + RNDr(hash, S, 28); + RNDr(hash, S, 29); + RNDr(hash, S, 30); + RNDr(hash, S, 31); + RNDr(hash, S, 32); + RNDr(hash, S, 33); + RNDr(hash, S, 34); + RNDr(hash, S, 35); + RNDr(hash, S, 36); + RNDr(hash, S, 37); + RNDr(hash, S, 38); + RNDr(hash, S, 39); + RNDr(hash, S, 40); + RNDr(hash, S, 41); + RNDr(hash, S, 42); + RNDr(hash, S, 43); + RNDr(hash, S, 44); + RNDr(hash, S, 45); + RNDr(hash, S, 46); + RNDr(hash, S, 47); + RNDr(hash, S, 48); + RNDr(hash, S, 49); + RNDr(hash, S, 50); + RNDr(hash, S, 51); + RNDr(hash, S, 52); + RNDr(hash, S, 53); + RNDr(hash, S, 54); + RNDr(hash, S, 55); + RNDr(hash, S, 56); + + hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5]) + + S[57] + sha256_k[57]; + hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4]) + + S[58] + sha256_k[58]; + hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3]) + + S[59] + sha256_k[59]; + hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2]) + + S[60] + sha256_k[60] + + sha256_h[7]; +} + +#endif /* EXTERN_SHA256 */ + +#if HAVE_SHA256_4WAY + +void sha256d_ms_4way(uint32_t *hash, uint32_t *data, + const uint32_t *midstate, const uint32_t *prehash); + +static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[4 * 64] __attribute__((aligned(128))); + uint32_t hash[4 * 8] __attribute__((aligned(32))); + uint32_t midstate[4 * 8] __attribute__((aligned(32))); + uint32_t prehash[4 * 8] __attribute__((aligned(32))); + uint32_t n = pdata[19] - 1; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + int i, j; + + memcpy(data, pdata + 16, 64); + sha256d_preextend(data); + for (i = 31; i >= 0; i--) + for (j = 0; j < 4; j++) + data[i * 4 + j] = data[i]; + + sha256_init(midstate); + sha256_transform(midstate, pdata, 0); + memcpy(prehash, midstate, 32); + sha256d_prehash(prehash, pdata + 16); + for (i = 7; i >= 0; i--) { + for (j = 0; j < 4; j++) { + midstate[i * 4 + j] = midstate[i]; + prehash[i * 4 + j] = prehash[i]; + } + } + + do { + for (i = 0; i < 4; i++) + data[4 * 3 + i] = ++n; + + sha256d_ms_4way(hash, data, midstate, prehash); + + for (i = 0; i < 4; i++) { + if (swab32(hash[4 * 7 + i]) <= Htarg) { + pdata[19] = data[4 * 3 + i]; + sha256d_80_swap(hash, pdata); + if (fulltest(hash, ptarget)) { + *hashes_done = n - first_nonce + 1; + return 1; + } + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} + +#endif /* HAVE_SHA256_4WAY */ + +#if HAVE_SHA256_8WAY + +void sha256d_ms_8way(uint32_t *hash, uint32_t *data, + const uint32_t *midstate, const uint32_t *prehash); + +static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[8 * 64] __attribute__((aligned(128))); + uint32_t hash[8 * 8] __attribute__((aligned(32))); + uint32_t midstate[8 * 8] __attribute__((aligned(32))); + uint32_t prehash[8 * 8] __attribute__((aligned(32))); + uint32_t n = pdata[19] - 1; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + int i, j; + + memcpy(data, pdata + 16, 64); + sha256d_preextend(data); + for (i = 31; i >= 0; i--) + for (j = 0; j < 8; j++) + data[i * 8 + j] = data[i]; + + sha256_init(midstate); + sha256_transform(midstate, pdata, 0); + memcpy(prehash, midstate, 32); + sha256d_prehash(prehash, pdata + 16); + for (i = 7; i >= 0; i--) { + for (j = 0; j < 8; j++) { + midstate[i * 8 + j] = midstate[i]; + prehash[i * 8 + j] = prehash[i]; + } + } + + do { + for (i = 0; i < 8; i++) + data[8 * 3 + i] = ++n; + + sha256d_ms_8way(hash, data, midstate, prehash); + + for (i = 0; i < 8; i++) { + if (swab32(hash[8 * 7 + i]) <= Htarg) { + pdata[19] = data[8 * 3 + i]; + sha256d_80_swap(hash, pdata); + if (fulltest(hash, ptarget)) { + *hashes_done = n - first_nonce + 1; + return 1; + } + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} + +#endif /* HAVE_SHA256_8WAY */ + +int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[64] /* __attribute__((aligned(128))) */; + uint32_t hash[8] /* __attribute__((aligned(32))) */; + uint32_t midstate[8] /* __attribute__((aligned(32))) */; + uint32_t prehash[8] /* __attribute__((aligned(32))) */; + uint32_t n = pdata[19] - 1; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + +#if HAVE_SHA256_8WAY + if (sha256_use_8way()) + return scanhash_sha256d_8way(thr_id, pdata, ptarget, + max_nonce, hashes_done); +#endif +#if HAVE_SHA256_4WAY + if (sha256_use_4way()) + return scanhash_sha256d_4way(thr_id, pdata, ptarget, + max_nonce, hashes_done); +#endif + + memcpy(data, pdata + 16, 64); + sha256d_preextend(data); + + sha256_init(midstate); + sha256_transform(midstate, pdata, 0); + memcpy(prehash, midstate, 32); + sha256d_prehash(prehash, pdata + 16); + + do { + data[3] = ++n; + sha256d_ms(hash, data, midstate, prehash); + if (swab32(hash[7]) <= Htarg) { + pdata[19] = data[3]; + sha256d_80_swap(hash, pdata); + if (fulltest(hash, ptarget)) { + *hashes_done = n - first_nonce + 1; + return 1; + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} diff --git a/sph_blake.h b/sph_blake.h new file mode 100644 index 0000000..0fc4295 --- /dev/null +++ b/sph_blake.h @@ -0,0 +1,327 @@ +/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */ +/** + * BLAKE interface. BLAKE is a family of functions which differ by their + * output size; this implementation defines BLAKE for output sizes 224, + * 256, 384 and 512 bits. This implementation conforms to the "third + * round" specification. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_blake.h + * @author Thomas Pornin + */ + +#ifndef SPH_BLAKE_H__ +#define SPH_BLAKE_H__ + +#ifdef __cplusplus +extern "C"{ +#endif + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for BLAKE-224. + */ +#define SPH_SIZE_blake224 224 + +/** + * Output size (in bits) for BLAKE-256. + */ +#define SPH_SIZE_blake256 256 + +#if SPH_64 + +/** + * Output size (in bits) for BLAKE-384. + */ +#define SPH_SIZE_blake384 384 + +/** + * Output size (in bits) for BLAKE-512. + */ +#define SPH_SIZE_blake512 512 + +#endif + +/** + * This structure is a context for BLAKE-224 and BLAKE-256 computations: + * it contains the intermediate values and some data from the last + * entered block. Once a BLAKE computation has been performed, the + * context can be reused for another computation. + * + * The contents of this structure are private. A running BLAKE + * computation can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + size_t ptr; + sph_u32 H[8]; + sph_u32 S[4]; + sph_u32 T0, T1; +#endif +} sph_blake_small_context; + +/** + * This structure is a context for BLAKE-224 computations. It is + * identical to the common sph_blake_small_context. + */ +typedef sph_blake_small_context sph_blake224_context; + +/** + * This structure is a context for BLAKE-256 computations. It is + * identical to the common sph_blake_small_context. + */ +typedef sph_blake_small_context sph_blake256_context; + +#if SPH_64 + +/** + * This structure is a context for BLAKE-384 and BLAKE-512 computations: + * it contains the intermediate values and some data from the last + * entered block. Once a BLAKE computation has been performed, the + * context can be reused for another computation. + * + * The contents of this structure are private. A running BLAKE + * computation can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[128]; /* first field, for alignment */ + size_t ptr; + sph_u64 H[8]; + sph_u64 S[4]; + sph_u64 T0, T1; +#endif +} sph_blake_big_context; + +/** + * This structure is a context for BLAKE-384 computations. It is + * identical to the common sph_blake_small_context. + */ +typedef sph_blake_big_context sph_blake384_context; + +/** + * This structure is a context for BLAKE-512 computations. It is + * identical to the common sph_blake_small_context. + */ +typedef sph_blake_big_context sph_blake512_context; + +#endif + +/** + * Initialize a BLAKE-224 context. This process performs no memory allocation. + * + * @param cc the BLAKE-224 context (pointer to a + * sph_blake224_context) + */ +void sph_blake224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the BLAKE-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_blake224(void *cc, const void *data, size_t len); + +/** + * Terminate the current BLAKE-224 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the BLAKE-224 context + * @param dst the destination buffer + */ +void sph_blake224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the BLAKE-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_blake224_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a BLAKE-256 context. This process performs no memory allocation. + * + * @param cc the BLAKE-256 context (pointer to a + * sph_blake256_context) + */ +void sph_blake256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the BLAKE-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_blake256(void *cc, const void *data, size_t len); + +/** + * Terminate the current BLAKE-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the BLAKE-256 context + * @param dst the destination buffer + */ +void sph_blake256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the BLAKE-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_blake256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#if SPH_64 + +/** + * Initialize a BLAKE-384 context. This process performs no memory allocation. + * + * @param cc the BLAKE-384 context (pointer to a + * sph_blake384_context) + */ +void sph_blake384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the BLAKE-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_blake384(void *cc, const void *data, size_t len); + +/** + * Terminate the current BLAKE-384 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the BLAKE-384 context + * @param dst the destination buffer + */ +void sph_blake384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the BLAKE-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_blake384_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a BLAKE-512 context. This process performs no memory allocation. + * + * @param cc the BLAKE-512 context (pointer to a + * sph_blake512_context) + */ +void sph_blake512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the BLAKE-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_blake512(void *cc, const void *data, size_t len); + +/** + * Terminate the current BLAKE-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the BLAKE-512 context + * @param dst the destination buffer + */ +void sph_blake512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the BLAKE-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_blake512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sph_fugue.h b/sph_fugue.h new file mode 100644 index 0000000..c8ff395 --- /dev/null +++ b/sph_fugue.h @@ -0,0 +1,81 @@ +#ifndef SPH_FUGUE_H__ +#define SPH_FUGUE_H__ + +#include +#include "sph_types.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#define SPH_SIZE_fugue224 224 + +#define SPH_SIZE_fugue256 256 + +#define SPH_SIZE_fugue384 384 + +#define SPH_SIZE_fugue512 512 + +typedef struct { +#ifndef DOXYGEN_IGNORE + sph_u32 partial; + unsigned partial_len; + unsigned round_shift; + sph_u32 S[36]; +#if SPH_64 + sph_u64 bit_count; +#else + sph_u32 bit_count_high, bit_count_low; +#endif +#endif +} sph_fugue_context; + +typedef sph_fugue_context sph_fugue224_context; + +typedef sph_fugue_context sph_fugue256_context; + +typedef sph_fugue_context sph_fugue384_context; + +typedef sph_fugue_context sph_fugue512_context; + +void sph_fugue224_init(void *cc); + +void sph_fugue224(void *cc, const void *data, size_t len); + +void sph_fugue224_close(void *cc, void *dst); + +void sph_fugue224_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +void sph_fugue256_init(void *cc); + +void sph_fugue256(void *cc, const void *data, size_t len); + +void sph_fugue256_close(void *cc, void *dst); + +void sph_fugue256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +void sph_fugue384_init(void *cc); + +void sph_fugue384(void *cc, const void *data, size_t len); + +void sph_fugue384_close(void *cc, void *dst); + +void sph_fugue384_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +void sph_fugue512_init(void *cc); + +void sph_fugue512(void *cc, const void *data, size_t len); + +void sph_fugue512_close(void *cc, void *dst); + +void sph_fugue512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sph_groestl.h b/sph_groestl.h new file mode 100644 index 0000000..a997431 --- /dev/null +++ b/sph_groestl.h @@ -0,0 +1,329 @@ +/* $Id: sph_groestl.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * Groestl interface. This code implements Groestl with the recommended + * parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_groestl.h + * @author Thomas Pornin + */ + +#ifndef SPH_GROESTL_H__ +#define SPH_GROESTL_H__ + +#ifdef __cplusplus +extern "C"{ +#endif + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for Groestl-224. + */ +#define SPH_SIZE_groestl224 224 + +/** + * Output size (in bits) for Groestl-256. + */ +#define SPH_SIZE_groestl256 256 + +/** + * Output size (in bits) for Groestl-384. + */ +#define SPH_SIZE_groestl384 384 + +/** + * Output size (in bits) for Groestl-512. + */ +#define SPH_SIZE_groestl512 512 + +/** + * This structure is a context for Groestl-224 and Groestl-256 computations: + * it contains the intermediate values and some data from the last + * entered block. Once a Groestl computation has been performed, the + * context can be reused for another computation. + * + * The contents of this structure are private. A running Groestl + * computation can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + size_t ptr; + union { +#if SPH_64 + sph_u64 wide[8]; +#endif + sph_u32 narrow[16]; + } state; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_groestl_small_context; + +/** + * This structure is a context for Groestl-224 computations. It is + * identical to the common sph_groestl_small_context. + */ +typedef sph_groestl_small_context sph_groestl224_context; + +/** + * This structure is a context for Groestl-256 computations. It is + * identical to the common sph_groestl_small_context. + */ +typedef sph_groestl_small_context sph_groestl256_context; + +/** + * This structure is a context for Groestl-384 and Groestl-512 computations: + * it contains the intermediate values and some data from the last + * entered block. Once a Groestl computation has been performed, the + * context can be reused for another computation. + * + * The contents of this structure are private. A running Groestl + * computation can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[128]; /* first field, for alignment */ + size_t ptr; + union { +#if SPH_64 + sph_u64 wide[16]; +#endif + sph_u32 narrow[32]; + } state; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_groestl_big_context; + +/** + * This structure is a context for Groestl-384 computations. It is + * identical to the common sph_groestl_small_context. + */ +typedef sph_groestl_big_context sph_groestl384_context; + +/** + * This structure is a context for Groestl-512 computations. It is + * identical to the common sph_groestl_small_context. + */ +typedef sph_groestl_big_context sph_groestl512_context; + +/** + * Initialize a Groestl-224 context. This process performs no memory allocation. + * + * @param cc the Groestl-224 context (pointer to a + * sph_groestl224_context) + */ +void sph_groestl224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Groestl-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_groestl224(void *cc, const void *data, size_t len); + +/** + * Terminate the current Groestl-224 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the Groestl-224 context + * @param dst the destination buffer + */ +void sph_groestl224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Groestl-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_groestl224_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Groestl-256 context. This process performs no memory allocation. + * + * @param cc the Groestl-256 context (pointer to a + * sph_groestl256_context) + */ +void sph_groestl256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Groestl-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_groestl256(void *cc, const void *data, size_t len); + +/** + * Terminate the current Groestl-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the Groestl-256 context + * @param dst the destination buffer + */ +void sph_groestl256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Groestl-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_groestl256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Groestl-384 context. This process performs no memory allocation. + * + * @param cc the Groestl-384 context (pointer to a + * sph_groestl384_context) + */ +void sph_groestl384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Groestl-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_groestl384(void *cc, const void *data, size_t len); + +/** + * Terminate the current Groestl-384 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the Groestl-384 context + * @param dst the destination buffer + */ +void sph_groestl384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Groestl-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_groestl384_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Groestl-512 context. This process performs no memory allocation. + * + * @param cc the Groestl-512 context (pointer to a + * sph_groestl512_context) + */ +void sph_groestl512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Groestl-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_groestl512(void *cc, const void *data, size_t len); + +/** + * Terminate the current Groestl-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the Groestl-512 context + * @param dst the destination buffer + */ +void sph_groestl512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Groestl-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_groestl512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sph_keccak.h b/sph_keccak.h new file mode 100644 index 0000000..8760598 --- /dev/null +++ b/sph_keccak.h @@ -0,0 +1,293 @@ +/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * Keccak interface. This is the interface for Keccak with the + * recommended parameters for SHA-3, with output lengths 224, 256, + * 384 and 512 bits. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_keccak.h + * @author Thomas Pornin + */ + +#ifndef SPH_KECCAK_H__ +#define SPH_KECCAK_H__ + +#ifdef __cplusplus +extern "C"{ +#endif + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for Keccak-224. + */ +#define SPH_SIZE_keccak224 224 + +/** + * Output size (in bits) for Keccak-256. + */ +#define SPH_SIZE_keccak256 256 + +/** + * Output size (in bits) for Keccak-384. + */ +#define SPH_SIZE_keccak384 384 + +/** + * Output size (in bits) for Keccak-512. + */ +#define SPH_SIZE_keccak512 512 + +/** + * This structure is a context for Keccak computations: it contains the + * intermediate values and some data from the last entered block. Once a + * Keccak computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running Keccak computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[144]; /* first field, for alignment */ + size_t ptr, lim; + union { +#if SPH_64 + sph_u64 wide[25]; +#endif + sph_u32 narrow[50]; + } u; +#endif +} sph_keccak_context; + +/** + * Type for a Keccak-224 context (identical to the common context). + */ +typedef sph_keccak_context sph_keccak224_context; + +/** + * Type for a Keccak-256 context (identical to the common context). + */ +typedef sph_keccak_context sph_keccak256_context; + +/** + * Type for a Keccak-384 context (identical to the common context). + */ +typedef sph_keccak_context sph_keccak384_context; + +/** + * Type for a Keccak-512 context (identical to the common context). + */ +typedef sph_keccak_context sph_keccak512_context; + +/** + * Initialize a Keccak-224 context. This process performs no memory allocation. + * + * @param cc the Keccak-224 context (pointer to a + * sph_keccak224_context) + */ +void sph_keccak224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Keccak-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_keccak224(void *cc, const void *data, size_t len); + +/** + * Terminate the current Keccak-224 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the Keccak-224 context + * @param dst the destination buffer + */ +void sph_keccak224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Keccak-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_keccak224_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Keccak-256 context. This process performs no memory allocation. + * + * @param cc the Keccak-256 context (pointer to a + * sph_keccak256_context) + */ +void sph_keccak256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Keccak-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_keccak256(void *cc, const void *data, size_t len); + +/** + * Terminate the current Keccak-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the Keccak-256 context + * @param dst the destination buffer + */ +void sph_keccak256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Keccak-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_keccak256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Keccak-384 context. This process performs no memory allocation. + * + * @param cc the Keccak-384 context (pointer to a + * sph_keccak384_context) + */ +void sph_keccak384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Keccak-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_keccak384(void *cc, const void *data, size_t len); + +/** + * Terminate the current Keccak-384 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the Keccak-384 context + * @param dst the destination buffer + */ +void sph_keccak384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Keccak-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_keccak384_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Keccak-512 context. This process performs no memory allocation. + * + * @param cc the Keccak-512 context (pointer to a + * sph_keccak512_context) + */ +void sph_keccak512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Keccak-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_keccak512(void *cc, const void *data, size_t len); + +/** + * Terminate the current Keccak-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the Keccak-512 context + * @param dst the destination buffer + */ +void sph_keccak512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Keccak-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_keccak512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sph_types.h b/sph_types.h new file mode 100644 index 0000000..054c96f --- /dev/null +++ b/sph_types.h @@ -0,0 +1,1976 @@ +/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */ +/** + * Basic type definitions. + * + * This header file defines the generic integer types that will be used + * for the implementation of hash functions; it also contains helper + * functions which encode and decode multi-byte integer values, using + * either little-endian or big-endian conventions. + * + * This file contains a compile-time test on the size of a byte + * (the unsigned char C type). If bytes are not octets, + * i.e. if they do not have a size of exactly 8 bits, then compilation + * is aborted. Architectures where bytes are not octets are relatively + * rare, even in the embedded devices market. We forbid non-octet bytes + * because there is no clear convention on how octet streams are encoded + * on such systems. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_types.h + * @author Thomas Pornin + */ + +#ifndef SPH_TYPES_H__ +#define SPH_TYPES_H__ + +#include + +/* + * All our I/O functions are defined over octet streams. We do not know + * how to handle input data if bytes are not octets. + */ +#if CHAR_BIT != 8 +#error This code requires 8-bit bytes +#endif + +/* ============= BEGIN documentation block for Doxygen ============ */ + +#ifdef DOXYGEN_IGNORE + +/** @mainpage sphlib C code documentation + * + * @section overview Overview + * + * sphlib is a library which contains implementations of + * various cryptographic hash functions. These pages have been generated + * with doxygen and + * document the API for the C implementations. + * + * The API is described in appropriate header files, which are available + * in the "Files" section. Each hash function family has its own header, + * whose name begins with "sph_" and contains the family + * name. For instance, the API for the RIPEMD hash functions is available + * in the header file sph_ripemd.h. + * + * @section principles API structure and conventions + * + * @subsection io Input/output conventions + * + * In all generality, hash functions operate over strings of bits. + * Individual bits are rarely encountered in C programming or actual + * communication protocols; most protocols converge on the ubiquitous + * "octet" which is a group of eight bits. Data is thus expressed as a + * stream of octets. The C programming language contains the notion of a + * "byte", which is a data unit managed under the type "unsigned + * char". The C standard prescribes that a byte should hold at + * least eight bits, but possibly more. Most modern architectures, even + * in the embedded world, feature eight-bit bytes, i.e. map bytes to + * octets. + * + * Nevertheless, for some of the implemented hash functions, an extra + * API has been added, which allows the input of arbitrary sequences of + * bits: when the computation is about to be closed, 1 to 7 extra bits + * can be added. The functions for which this API is implemented include + * the SHA-2 functions and all SHA-3 candidates. + * + * sphlib defines hash function which may hash octet streams, + * i.e. streams of bits where the number of bits is a multiple of eight. + * The data input functions in the sphlib API expect data + * as anonymous pointers ("const void *") with a length + * (of type "size_t") which gives the input data chunk length + * in bytes. A byte is assumed to be an octet; the sph_types.h + * header contains a compile-time test which prevents compilation on + * architectures where this property is not met. + * + * The hash function output is also converted into bytes. All currently + * implemented hash functions have an output width which is a multiple of + * eight, and this is likely to remain true for new designs. + * + * Most hash functions internally convert input data into 32-bit of 64-bit + * words, using either little-endian or big-endian conversion. The hash + * output also often consists of such words, which are encoded into output + * bytes with a similar endianness convention. Some hash functions have + * been only loosely specified on that subject; when necessary, + * sphlib has been tested against published "reference" + * implementations in order to use the same conventions. + * + * @subsection shortname Function short name + * + * Each implemented hash function has a "short name" which is used + * internally to derive the identifiers for the functions and context + * structures which the function uses. For instance, MD5 has the short + * name "md5". Short names are listed in the next section, + * for the implemented hash functions. In subsequent sections, the + * short name will be assumed to be "XXX": replace with the + * actual hash function name to get the C identifier. + * + * Note: some functions within the same family share the same core + * elements, such as update function or context structure. Correspondingly, + * some of the defined types or functions may actually be macros which + * transparently evaluate to another type or function name. + * + * @subsection context Context structure + * + * Each implemented hash fonction has its own context structure, available + * under the type name "sph_XXX_context" for the hash function + * with short name "XXX". This structure holds all needed + * state for a running hash computation. + * + * The contents of these structures are meant to be opaque, and private + * to the implementation. However, these contents are specified in the + * header files so that application code which uses sphlib + * may access the size of those structures. + * + * The caller is responsible for allocating the context structure, + * whether by dynamic allocation (malloc() or equivalent), + * static allocation (a global permanent variable), as an automatic + * variable ("on the stack"), or by any other mean which ensures proper + * structure alignment. sphlib code performs no dynamic + * allocation by itself. + * + * The context must be initialized before use, using the + * sph_XXX_init() function. This function sets the context + * state to proper initial values for hashing. + * + * Since all state data is contained within the context structure, + * sphlib is thread-safe and reentrant: several hash + * computations may be performed in parallel, provided that they do not + * operate on the same context. Moreover, a running computation can be + * cloned by copying the context (with a simple memcpy()): + * the context and its clone are then independant and may be updated + * with new data and/or closed without interfering with each other. + * Similarly, a context structure can be moved in memory at will: + * context structures contain no pointer, in particular no pointer to + * themselves. + * + * @subsection dataio Data input + * + * Hashed data is input with the sph_XXX() fonction, which + * takes as parameters a pointer to the context, a pointer to the data + * to hash, and the number of data bytes to hash. The context is updated + * with the new data. + * + * Data can be input in one or several calls, with arbitrary input lengths. + * However, it is best, performance wise, to input data by relatively big + * chunks (say a few kilobytes), because this allows sphlib to + * optimize things and avoid internal copying. + * + * When all data has been input, the context can be closed with + * sph_XXX_close(). The hash output is computed and written + * into the provided buffer. The caller must take care to provide a + * buffer of appropriate length; e.g., when using SHA-1, the output is + * a 20-byte word, therefore the output buffer must be at least 20-byte + * long. + * + * For some hash functions, the sph_XXX_addbits_and_close() + * function can be used instead of sph_XXX_close(). This + * function can take a few extra bits to be added at + * the end of the input message. This allows hashing messages with a + * bit length which is not a multiple of 8. The extra bits are provided + * as an unsigned integer value, and a bit count. The bit count must be + * between 0 and 7, inclusive. The extra bits are provided as bits 7 to + * 0 (bits of numerical value 128, 64, 32... downto 0), in that order. + * For instance, to add three bits of value 1, 1 and 0, the unsigned + * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count + * will be 3. + * + * The SPH_SIZE_XXX macro is defined for each hash function; + * it evaluates to the function output size, expressed in bits. For instance, + * SPH_SIZE_sha1 evaluates to 160. + * + * When closed, the context is automatically reinitialized and can be + * immediately used for another computation. It is not necessary to call + * sph_XXX_init() after a close. Note that + * sph_XXX_init() can still be called to "reset" a context, + * i.e. forget previously input data, and get back to the initial state. + * + * @subsection alignment Data alignment + * + * "Alignment" is a property of data, which is said to be "properly + * aligned" when its emplacement in memory is such that the data can + * be optimally read by full words. This depends on the type of access; + * basically, some hash functions will read data by 32-bit or 64-bit + * words. sphlib does not mandate such alignment for input + * data, but using aligned data can substantially improve performance. + * + * As a rule, it is best to input data by chunks whose length (in bytes) + * is a multiple of eight, and which begins at "generally aligned" + * addresses, such as the base address returned by a call to + * malloc(). + * + * @section functions Implemented functions + * + * We give here the list of implemented functions. They are grouped by + * family; to each family corresponds a specific header file. Each + * individual function has its associated "short name". Please refer to + * the documentation for that header file to get details on the hash + * function denomination and provenance. + * + * Note: the functions marked with a '(64)' in the list below are + * available only if the C compiler provides an integer type of length + * 64 bits or more. Such a type is mandatory in the latest C standard + * (ISO 9899:1999, aka "C99") and is present in several older compilers + * as well, so chances are that such a type is available. + * + * - HAVAL family: file sph_haval.h + * - HAVAL-128/3 (128-bit, 3 passes): short name: haval128_3 + * - HAVAL-128/4 (128-bit, 4 passes): short name: haval128_4 + * - HAVAL-128/5 (128-bit, 5 passes): short name: haval128_5 + * - HAVAL-160/3 (160-bit, 3 passes): short name: haval160_3 + * - HAVAL-160/4 (160-bit, 4 passes): short name: haval160_4 + * - HAVAL-160/5 (160-bit, 5 passes): short name: haval160_5 + * - HAVAL-192/3 (192-bit, 3 passes): short name: haval192_3 + * - HAVAL-192/4 (192-bit, 4 passes): short name: haval192_4 + * - HAVAL-192/5 (192-bit, 5 passes): short name: haval192_5 + * - HAVAL-224/3 (224-bit, 3 passes): short name: haval224_3 + * - HAVAL-224/4 (224-bit, 4 passes): short name: haval224_4 + * - HAVAL-224/5 (224-bit, 5 passes): short name: haval224_5 + * - HAVAL-256/3 (256-bit, 3 passes): short name: haval256_3 + * - HAVAL-256/4 (256-bit, 4 passes): short name: haval256_4 + * - HAVAL-256/5 (256-bit, 5 passes): short name: haval256_5 + * - MD2: file sph_md2.h, short name: md2 + * - MD4: file sph_md4.h, short name: md4 + * - MD5: file sph_md5.h, short name: md5 + * - PANAMA: file sph_panama.h, short name: panama + * - RadioGatun family: file sph_radiogatun.h + * - RadioGatun[32]: short name: radiogatun32 + * - RadioGatun[64]: short name: radiogatun64 (64) + * - RIPEMD family: file sph_ripemd.h + * - RIPEMD: short name: ripemd + * - RIPEMD-128: short name: ripemd128 + * - RIPEMD-160: short name: ripemd160 + * - SHA-0: file sph_sha0.h, short name: sha0 + * - SHA-1: file sph_sha1.h, short name: sha1 + * - SHA-2 family, 32-bit hashes: file sph_sha2.h + * - SHA-224: short name: sha224 + * - SHA-256: short name: sha256 + * - SHA-384: short name: sha384 (64) + * - SHA-512: short name: sha512 (64) + * - Tiger family: file sph_tiger.h + * - Tiger: short name: tiger (64) + * - Tiger2: short name: tiger2 (64) + * - WHIRLPOOL family: file sph_whirlpool.h + * - WHIRLPOOL-0: short name: whirlpool0 (64) + * - WHIRLPOOL-1: short name: whirlpool1 (64) + * - WHIRLPOOL: short name: whirlpool (64) + * + * The fourteen second-round SHA-3 candidates are also implemented; + * when applicable, the implementations follow the "final" specifications + * as published for the third round of the SHA-3 competition (BLAKE, + * Groestl, JH, Keccak and Skein have been tweaked for third round). + * + * - BLAKE family: file sph_blake.h + * - BLAKE-224: short name: blake224 + * - BLAKE-256: short name: blake256 + * - BLAKE-384: short name: blake384 + * - BLAKE-512: short name: blake512 + * - BMW (Blue Midnight Wish) family: file sph_bmw.h + * - BMW-224: short name: bmw224 + * - BMW-256: short name: bmw256 + * - BMW-384: short name: bmw384 (64) + * - BMW-512: short name: bmw512 (64) + * - CubeHash family: file sph_cubehash.h (specified as + * CubeHash16/32 in the CubeHash specification) + * - CubeHash-224: short name: cubehash224 + * - CubeHash-256: short name: cubehash256 + * - CubeHash-384: short name: cubehash384 + * - CubeHash-512: short name: cubehash512 + * - ECHO family: file sph_echo.h + * - ECHO-224: short name: echo224 + * - ECHO-256: short name: echo256 + * - ECHO-384: short name: echo384 + * - ECHO-512: short name: echo512 + * - Fugue family: file sph_fugue.h + * - Fugue-224: short name: fugue224 + * - Fugue-256: short name: fugue256 + * - Fugue-384: short name: fugue384 + * - Fugue-512: short name: fugue512 + * - Groestl family: file sph_groestl.h + * - Groestl-224: short name: groestl224 + * - Groestl-256: short name: groestl256 + * - Groestl-384: short name: groestl384 + * - Groestl-512: short name: groestl512 + * - Hamsi family: file sph_hamsi.h + * - Hamsi-224: short name: hamsi224 + * - Hamsi-256: short name: hamsi256 + * - Hamsi-384: short name: hamsi384 + * - Hamsi-512: short name: hamsi512 + * - JH family: file sph_jh.h + * - JH-224: short name: jh224 + * - JH-256: short name: jh256 + * - JH-384: short name: jh384 + * - JH-512: short name: jh512 + * - Keccak family: file sph_keccak.h + * - Keccak-224: short name: keccak224 + * - Keccak-256: short name: keccak256 + * - Keccak-384: short name: keccak384 + * - Keccak-512: short name: keccak512 + * - Luffa family: file sph_luffa.h + * - Luffa-224: short name: luffa224 + * - Luffa-256: short name: luffa256 + * - Luffa-384: short name: luffa384 + * - Luffa-512: short name: luffa512 + * - Shabal family: file sph_shabal.h + * - Shabal-192: short name: shabal192 + * - Shabal-224: short name: shabal224 + * - Shabal-256: short name: shabal256 + * - Shabal-384: short name: shabal384 + * - Shabal-512: short name: shabal512 + * - SHAvite-3 family: file sph_shavite.h + * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"): + * short name: shabal224 + * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"): + * short name: shabal256 + * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"): + * short name: shabal384 + * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"): + * short name: shabal512 + * - SIMD family: file sph_simd.h + * - SIMD-224: short name: simd224 + * - SIMD-256: short name: simd256 + * - SIMD-384: short name: simd384 + * - SIMD-512: short name: simd512 + * - Skein family: file sph_skein.h + * - Skein-224 (nominally specified as Skein-512-224): short name: + * skein224 (64) + * - Skein-256 (nominally specified as Skein-512-256): short name: + * skein256 (64) + * - Skein-384 (nominally specified as Skein-512-384): short name: + * skein384 (64) + * - Skein-512 (nominally specified as Skein-512-512): short name: + * skein512 (64) + * + * For the second-round SHA-3 candidates, the functions are as specified + * for round 2, i.e. with the "tweaks" that some candidates added + * between round 1 and round 2. Also, some of the submitted packages for + * round 2 contained errors, in the specification, reference code, or + * both. sphlib implements the corrected versions. + */ + +/** @hideinitializer + * Unsigned integer type whose length is at least 32 bits; on most + * architectures, it will have a width of exactly 32 bits. Unsigned C + * types implement arithmetics modulo a power of 2; use the + * SPH_T32() macro to ensure that the value is truncated + * to exactly 32 bits. Unless otherwise specified, all macros and + * functions which accept sph_u32 values assume that these + * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures + * where sph_u32 is larger than that. + */ +typedef __arch_dependant__ sph_u32; + +/** @hideinitializer + * Signed integer type corresponding to sph_u32; it has + * width 32 bits or more. + */ +typedef __arch_dependant__ sph_s32; + +/** @hideinitializer + * Unsigned integer type whose length is at least 64 bits; on most + * architectures which feature such a type, it will have a width of + * exactly 64 bits. C99-compliant platform will have this type; it + * is also defined when the GNU compiler (gcc) is used, and on + * platforms where unsigned long is large enough. If this + * type is not available, then some hash functions which depends on + * a 64-bit type will not be available (most notably SHA-384, SHA-512, + * Tiger and WHIRLPOOL). + */ +typedef __arch_dependant__ sph_u64; + +/** @hideinitializer + * Signed integer type corresponding to sph_u64; it has + * width 64 bits or more. + */ +typedef __arch_dependant__ sph_s64; + +/** + * This macro expands the token x into a suitable + * constant expression of type sph_u32. Depending on + * how this type is defined, a suffix such as UL may + * be appended to the argument. + * + * @param x the token to expand into a suitable constant expression + */ +#define SPH_C32(x) + +/** + * Truncate a 32-bit value to exactly 32 bits. On most systems, this is + * a no-op, recognized as such by the compiler. + * + * @param x the value to truncate (of type sph_u32) + */ +#define SPH_T32(x) + +/** + * Rotate a 32-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 31. This macro assumes that its + * first argument fits in 32 bits (no extra bit allowed on machines where + * sph_u32 is wider); both arguments may be evaluated + * several times. + * + * @param x the value to rotate (of type sph_u32) + * @param n the rotation count (between 1 and 31, inclusive) + */ +#define SPH_ROTL32(x, n) + +/** + * Rotate a 32-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 31. This macro assumes that its + * first argument fits in 32 bits (no extra bit allowed on machines where + * sph_u32 is wider); both arguments may be evaluated + * several times. + * + * @param x the value to rotate (of type sph_u32) + * @param n the rotation count (between 1 and 31, inclusive) + */ +#define SPH_ROTR32(x, n) + +/** + * This macro is defined on systems for which a 64-bit type has been + * detected, and is used for sph_u64. + */ +#define SPH_64 + +/** + * This macro is defined on systems for the "native" integer size is + * 64 bits (64-bit values fit in one register). + */ +#define SPH_64_TRUE + +/** + * This macro expands the token x into a suitable + * constant expression of type sph_u64. Depending on + * how this type is defined, a suffix such as ULL may + * be appended to the argument. This macro is defined only if a + * 64-bit type was detected and used for sph_u64. + * + * @param x the token to expand into a suitable constant expression + */ +#define SPH_C64(x) + +/** + * Truncate a 64-bit value to exactly 64 bits. On most systems, this is + * a no-op, recognized as such by the compiler. This macro is defined only + * if a 64-bit type was detected and used for sph_u64. + * + * @param x the value to truncate (of type sph_u64) + */ +#define SPH_T64(x) + +/** + * Rotate a 64-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 63. This macro assumes that its + * first argument fits in 64 bits (no extra bit allowed on machines where + * sph_u64 is wider); both arguments may be evaluated + * several times. This macro is defined only if a 64-bit type was detected + * and used for sph_u64. + * + * @param x the value to rotate (of type sph_u64) + * @param n the rotation count (between 1 and 63, inclusive) + */ +#define SPH_ROTL64(x, n) + +/** + * Rotate a 64-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 63. This macro assumes that its + * first argument fits in 64 bits (no extra bit allowed on machines where + * sph_u64 is wider); both arguments may be evaluated + * several times. This macro is defined only if a 64-bit type was detected + * and used for sph_u64. + * + * @param x the value to rotate (of type sph_u64) + * @param n the rotation count (between 1 and 63, inclusive) + */ +#define SPH_ROTR64(x, n) + +/** + * This macro evaluates to inline or an equivalent construction, + * if available on the compilation platform, or to nothing otherwise. This + * is used to declare inline functions, for which the compiler should + * endeavour to include the code directly in the caller. Inline functions + * are typically defined in header files as replacement for macros. + */ +#define SPH_INLINE + +/** + * This macro is defined if the platform has been detected as using + * little-endian convention. This implies that the sph_u32 + * type (and the sph_u64 type also, if it is defined) has + * an exact width (i.e. exactly 32-bit, respectively 64-bit). + */ +#define SPH_LITTLE_ENDIAN + +/** + * This macro is defined if the platform has been detected as using + * big-endian convention. This implies that the sph_u32 + * type (and the sph_u64 type also, if it is defined) has + * an exact width (i.e. exactly 32-bit, respectively 64-bit). + */ +#define SPH_BIG_ENDIAN + +/** + * This macro is defined if 32-bit words (and 64-bit words, if defined) + * can be read from and written to memory efficiently in little-endian + * convention. This is the case for little-endian platforms, and also + * for the big-endian platforms which have special little-endian access + * opcodes (e.g. Ultrasparc). + */ +#define SPH_LITTLE_FAST + +/** + * This macro is defined if 32-bit words (and 64-bit words, if defined) + * can be read from and written to memory efficiently in big-endian + * convention. This is the case for little-endian platforms, and also + * for the little-endian platforms which have special big-endian access + * opcodes. + */ +#define SPH_BIG_FAST + +/** + * On some platforms, this macro is defined to an unsigned integer type + * into which pointer values may be cast. The resulting value can then + * be tested for being a multiple of 2, 4 or 8, indicating an aligned + * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses. + */ +#define SPH_UPTR + +/** + * When defined, this macro indicates that unaligned memory accesses + * are possible with only a minor penalty, and thus should be prefered + * over strategies which first copy data to an aligned buffer. + */ +#define SPH_UNALIGNED + +/** + * Byte-swap a 32-bit word (i.e. 0x12345678 becomes + * 0x78563412). This is an inline function which resorts + * to inline assembly on some platforms, for better performance. + * + * @param x the 32-bit value to byte-swap + * @return the byte-swapped value + */ +static inline sph_u32 sph_bswap32(sph_u32 x); + +/** + * Byte-swap a 64-bit word. This is an inline function which resorts + * to inline assembly on some platforms, for better performance. This + * function is defined only if a suitable 64-bit type was found for + * sph_u64 + * + * @param x the 64-bit value to byte-swap + * @return the byte-swapped value + */ +static inline sph_u64 sph_bswap64(sph_u64 x); + +/** + * Decode a 16-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline unsigned sph_dec16le(const void *src); + +/** + * Encode a 16-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc16le(void *dst, unsigned val); + +/** + * Decode a 16-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline unsigned sph_dec16be(const void *src); + +/** + * Encode a 16-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc16be(void *dst, unsigned val); + +/** + * Decode a 32-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32le(const void *src); + +/** + * Decode a 32-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec32le() function. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32le_aligned(const void *src); + +/** + * Encode a 32-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32le(void *dst, sph_u32 val); + +/** + * Encode a 32-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc32le() function. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32le_aligned(void *dst, sph_u32 val); + +/** + * Decode a 32-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32be(const void *src); + +/** + * Decode a 32-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec32be() function. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32be_aligned(const void *src); + +/** + * Encode a 32-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32be(void *dst, sph_u32 val); + +/** + * Encode a 32-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc32be() function. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32be_aligned(void *dst, sph_u32 val); + +/** + * Decode a 64-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64le(const void *src); + +/** + * Decode a 64-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec64le() function. This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64le_aligned(const void *src); + +/** + * Encode a 64-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64le(void *dst, sph_u64 val); + +/** + * Encode a 64-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc64le() function. This function is defined + * only if a suitable 64-bit type was detected and used for + * sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64le_aligned(void *dst, sph_u64 val); + +/** + * Decode a 64-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64be(const void *src); + +/** + * Decode a 64-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec64be() function. This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64be_aligned(const void *src); + +/** + * Encode a 64-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64be(void *dst, sph_u64 val); + +/** + * Encode a 64-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc64be() function. This function is defined + * only if a suitable 64-bit type was detected and used for + * sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64be_aligned(void *dst, sph_u64 val); + +#endif + +/* ============== END documentation block for Doxygen ============= */ + +#ifndef DOXYGEN_IGNORE + +/* + * We want to define the types "sph_u32" and "sph_u64" which hold + * unsigned values of at least, respectively, 32 and 64 bits. These + * tests should select appropriate types for most platforms. The + * macro "SPH_64" is defined if the 64-bit is supported. + */ + +#undef SPH_64 +#undef SPH_64_TRUE + +#if defined __STDC__ && __STDC_VERSION__ >= 199901L + +/* + * On C99 implementations, we can use to get an exact 64-bit + * type, if any, or otherwise use a wider type (which must exist, for + * C99 conformance). + */ + +#include + +#ifdef UINT32_MAX +typedef uint32_t sph_u32; +typedef int32_t sph_s32; +#else +typedef uint_fast32_t sph_u32; +typedef int_fast32_t sph_s32; +#endif +#if !SPH_NO_64 +#ifdef UINT64_MAX +typedef uint64_t sph_u64; +typedef int64_t sph_s64; +#else +typedef uint_fast64_t sph_u64; +typedef int_fast64_t sph_s64; +#endif +#endif + +#define SPH_C32(x) ((sph_u32)(x)) +#if !SPH_NO_64 +#define SPH_C64(x) ((sph_u64)(x)) +#define SPH_64 1 +#endif + +#else + +/* + * On non-C99 systems, we use "unsigned int" if it is wide enough, + * "unsigned long" otherwise. This supports all "reasonable" architectures. + * We have to be cautious: pre-C99 preprocessors handle constants + * differently in '#if' expressions. Hence the shifts to test UINT_MAX. + */ + +#if ((UINT_MAX >> 11) >> 11) >= 0x3FF + +typedef unsigned int sph_u32; +typedef int sph_s32; + +#define SPH_C32(x) ((sph_u32)(x ## U)) + +#else + +typedef unsigned long sph_u32; +typedef long sph_s32; + +#define SPH_C32(x) ((sph_u32)(x ## UL)) + +#endif + +#if !SPH_NO_64 + +/* + * We want a 64-bit type. We use "unsigned long" if it is wide enough (as + * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9), + * "unsigned long long" otherwise, if available. We use ULLONG_MAX to + * test whether "unsigned long long" is available; we also know that + * gcc features this type, even if the libc header do not know it. + */ + +#if ((ULONG_MAX >> 31) >> 31) >= 3 + +typedef unsigned long sph_u64; +typedef long sph_s64; + +#define SPH_C64(x) ((sph_u64)(x ## UL)) + +#define SPH_64 1 + +#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__ + +typedef unsigned long long sph_u64; +typedef long long sph_s64; + +#define SPH_C64(x) ((sph_u64)(x ## ULL)) + +#define SPH_64 1 + +#else + +/* + * No 64-bit type... + */ + +#endif + +#endif + +#endif + +/* + * If the "unsigned long" type has length 64 bits or more, then this is + * a "true" 64-bit architectures. This is also true with Visual C on + * amd64, even though the "long" type is limited to 32 bits. + */ +#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64) +#define SPH_64_TRUE 1 +#endif + +/* + * Implementation note: some processors have specific opcodes to perform + * a rotation. Recent versions of gcc recognize the expression above and + * use the relevant opcodes, when appropriate. + */ + +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#if SPH_64 + +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#endif + +#ifndef DOXYGEN_IGNORE +/* + * Define SPH_INLINE to be an "inline" qualifier, if available. We define + * some small macro-like functions which benefit greatly from being inlined. + */ +#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__ +#define SPH_INLINE inline +#elif defined _MSC_VER +#define SPH_INLINE __inline +#else +#define SPH_INLINE +#endif +#endif + +/* + * We define some macros which qualify the architecture. These macros + * may be explicit set externally (e.g. as compiler parameters). The + * code below sets those macros if they are not already defined. + * + * Most macros are boolean, thus evaluate to either zero or non-zero. + * The SPH_UPTR macro is special, in that it evaluates to a C type, + * or is not defined. + * + * SPH_UPTR if defined: unsigned type to cast pointers into + * + * SPH_UNALIGNED non-zero if unaligned accesses are efficient + * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian + * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian + * SPH_LITTLE_FAST non-zero if little-endian decoding is fast + * SPH_BIG_FAST non-zero if big-endian decoding is fast + * + * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit + * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN + * _must_ be non-zero in those situations. The 32-bit and 64-bit types + * _must_ also have an exact width. + * + * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode + * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode + * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc + * SPH_I386_GCC x86-compatible (32-bit) with gcc + * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C + * SPH_AMD64_GCC x86-compatible (64-bit) with gcc + * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C + * SPH_PPC32_GCC PowerPC, 32-bit, with gcc + * SPH_PPC64_GCC PowerPC, 64-bit, with gcc + * + * TODO: enhance automatic detection, for more architectures and compilers. + * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with + * some very fast functions (e.g. MD4) when using unaligned input data. + * The CPU-specific-with-GCC macros are useful only for inline assembly, + * normally restrained to this header file. + */ + +/* + * 32-bit x86, aka "i386 compatible". + */ +#if defined __i386__ || defined _M_IX86 + +#define SPH_DETECT_UNALIGNED 1 +#define SPH_DETECT_LITTLE_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u32 +#ifdef __GNUC__ +#define SPH_DETECT_I386_GCC 1 +#endif +#ifdef _MSC_VER +#define SPH_DETECT_I386_MSVC 1 +#endif + +/* + * 64-bit x86, hereafter known as "amd64". + */ +#elif defined __x86_64 || defined _M_X64 + +#define SPH_DETECT_UNALIGNED 1 +#define SPH_DETECT_LITTLE_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u64 +#ifdef __GNUC__ +#define SPH_DETECT_AMD64_GCC 1 +#endif +#ifdef _MSC_VER +#define SPH_DETECT_AMD64_MSVC 1 +#endif + +/* + * 64-bit Sparc architecture (implies v9). + */ +#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \ + || defined __sparcv9 + +#define SPH_DETECT_BIG_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u64 +#ifdef __GNUC__ +#define SPH_DETECT_SPARCV9_GCC_64 1 +#define SPH_DETECT_LITTLE_FAST 1 +#endif + +/* + * 32-bit Sparc. + */ +#elif (defined __sparc__ || defined __sparc) \ + && !(defined __sparcv9 || defined __arch64__) + +#define SPH_DETECT_BIG_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u32 +#if defined __GNUC__ && defined __sparc_v9__ +#define SPH_DETECT_SPARCV9_GCC_32 1 +#define SPH_DETECT_LITTLE_FAST 1 +#endif + +/* + * ARM, little-endian. + */ +#elif defined __arm__ && __ARMEL__ + +#define SPH_DETECT_LITTLE_ENDIAN 1 + +/* + * MIPS, little-endian. + */ +#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__ + +#define SPH_DETECT_LITTLE_ENDIAN 1 + +/* + * MIPS, big-endian. + */ +#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__ + +#define SPH_DETECT_BIG_ENDIAN 1 + +/* + * PowerPC. + */ +#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \ + || defined _ARCH_PPC + +/* + * Note: we do not declare cross-endian access to be "fast": even if + * using inline assembly, implementation should still assume that + * keeping the decoded word in a temporary is faster than decoding + * it again. + */ +#if defined __GNUC__ +#if SPH_64_TRUE +#define SPH_DETECT_PPC64_GCC 1 +#else +#define SPH_DETECT_PPC32_GCC 1 +#endif +#endif + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +#define SPH_DETECT_BIG_ENDIAN 1 +#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +#define SPH_DETECT_LITTLE_ENDIAN 1 +#endif + +/* + * Itanium, 64-bit. + */ +#elif defined __ia64 || defined __ia64__ \ + || defined __itanium__ || defined _M_IA64 + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +#define SPH_DETECT_BIG_ENDIAN 1 +#else +#define SPH_DETECT_LITTLE_ENDIAN 1 +#endif +#if defined __LP64__ || defined _LP64 +#define SPH_DETECT_UPTR sph_u64 +#else +#define SPH_DETECT_UPTR sph_u32 +#endif + +#endif + +#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64 +#define SPH_DETECT_SPARCV9_GCC 1 +#endif + +#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED +#define SPH_UNALIGNED SPH_DETECT_UNALIGNED +#endif +#if defined SPH_DETECT_UPTR && !defined SPH_UPTR +#define SPH_UPTR SPH_DETECT_UPTR +#endif +#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN +#define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN +#endif +#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN +#define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN +#endif +#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST +#define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST +#endif +#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST +#define SPH_BIG_FAST SPH_DETECT_BIG_FAST +#endif +#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32 +#define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32 +#endif +#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64 +#define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64 +#endif +#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC +#define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC +#endif +#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC +#define SPH_I386_GCC SPH_DETECT_I386_GCC +#endif +#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC +#define SPH_I386_MSVC SPH_DETECT_I386_MSVC +#endif +#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC +#define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC +#endif +#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC +#define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC +#endif +#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC +#define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC +#endif +#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC +#define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC +#endif + +#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST +#define SPH_LITTLE_FAST 1 +#endif +#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST +#define SPH_BIG_FAST 1 +#endif + +#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN) +#error SPH_UPTR defined, but endianness is not known. +#endif + +#if SPH_I386_GCC && !SPH_NO_ASM + +/* + * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit + * values. + */ + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); + return x; +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + return ((sph_u64)sph_bswap32((sph_u32)x) << 32) + | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); +} + +#endif + +#elif SPH_AMD64_GCC && !SPH_NO_ASM + +/* + * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit + * and 64-bit values. + */ + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); + return x; +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x)); + return x; +} + +#endif + +/* + * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough + * to generate proper opcodes for endianness swapping with the pure C + * implementation below. + * + +#elif SPH_I386_MSVC && !SPH_NO_ASM + +static __inline sph_u32 __declspec(naked) __fastcall +sph_bswap32(sph_u32 x) +{ + __asm { + bswap ecx + mov eax,ecx + ret + } +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + return ((sph_u64)sph_bswap32((sph_u32)x) << 32) + | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); +} + +#endif + + * + * [end of disabled code] + */ + +#else + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + x = SPH_T32((x << 16) | (x >> 16)); + x = ((x & SPH_C32(0xFF00FF00)) >> 8) + | ((x & SPH_C32(0x00FF00FF)) << 8); + return x; +} + +#if SPH_64 + +/** + * Byte-swap a 64-bit value. + * + * @param x the input value + * @return the byte-swapped value + */ +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + x = SPH_T64((x << 32) | (x >> 32)); + x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16) + | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16); + x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8) + | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8); + return x; +} + +#endif + +#endif + +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + +/* + * On UltraSPARC systems, native ordering is big-endian, but it is + * possible to perform little-endian read accesses by specifying the + * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use + * the opcode "lda [%reg]0x88,%dst", where %reg is the register which + * contains the source address and %dst is the destination register, + * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register + * to get the address space name. The latter format is better since it + * combines an addition and the actual access in a single opcode; but + * it requires the setting (and subsequent resetting) of %asi, which is + * slow. Some operations (i.e. MD5 compression function) combine many + * successive little-endian read accesses, which may share the same + * %asi setting. The macros below contain the appropriate inline + * assembly. + */ + +#define SPH_SPARCV9_SET_ASI \ + sph_u32 sph_sparcv9_asi; \ + __asm__ __volatile__ ( \ + "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi)); + +#define SPH_SPARCV9_RESET_ASI \ + __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi)); + +#define SPH_SPARCV9_DEC32LE(base, idx) ({ \ + sph_u32 sph_sparcv9_tmp; \ + __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \ + : "=r" (sph_sparcv9_tmp) : "r" (base)); \ + sph_sparcv9_tmp; \ + }) + +#endif + +static SPH_INLINE void +sph_enc16be(void *dst, unsigned val) +{ + ((unsigned char *)dst)[0] = (val >> 8); + ((unsigned char *)dst)[1] = val; +} + +static SPH_INLINE unsigned +sph_dec16be(const void *src) +{ + return ((unsigned)(((const unsigned char *)src)[0]) << 8) + | (unsigned)(((const unsigned char *)src)[1]); +} + +static SPH_INLINE void +sph_enc16le(void *dst, unsigned val) +{ + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = val >> 8; +} + +static SPH_INLINE unsigned +sph_dec16le(const void *src) +{ + return (unsigned)(((const unsigned char *)src)[0]) + | ((unsigned)(((const unsigned char *)src)[1]) << 8); +} + +/** + * Encode a 32-bit value into the provided buffer (big endian convention). + * + * @param dst the destination buffer + * @param val the 32-bit value to encode + */ +static SPH_INLINE void +sph_enc32be(void *dst, sph_u32 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; +#else + if (((SPH_UPTR)dst & 3) == 0) { +#if SPH_LITTLE_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; + } else { + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; + } +#endif +#else + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (big endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (32-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc32be_aligned(void *dst, sph_u32 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u32 *)dst = sph_bswap32(val); +#elif SPH_BIG_ENDIAN + *(sph_u32 *)dst = val; +#else + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (big endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32be(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif +#else + if (((SPH_UPTR)src & 3) == 0) { +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif + } else { + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); + } +#endif +#else + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (big endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (32-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32be_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#elif SPH_BIG_ENDIAN + return *(const sph_u32 *)src; +#else + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (little endian convention). + * + * @param dst the destination buffer + * @param val the 32-bit value to encode + */ +static SPH_INLINE void +sph_enc32le(void *dst, sph_u32 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; +#else + if (((SPH_UPTR)dst & 3) == 0) { +#if SPH_BIG_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; + } else { + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + } +#endif +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (little endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (32-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc32le_aligned(void *dst, sph_u32 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u32 *)dst = val; +#elif SPH_BIG_ENDIAN + *(sph_u32 *)dst = sph_bswap32(val); +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (little endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32le(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif +#else + if (((SPH_UPTR)src & 3) == 0) { +#if SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + sph_u32 tmp; + + /* + * "__volatile__" is needed here because without it, + * gcc-3.4.3 miscompiles the code and performs the + * access before the test on the address, thus triggering + * a bus error... + */ + __asm__ __volatile__ ( + "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * On PowerPC, this turns out not to be worth the effort: the inline + * assembly makes GCC optimizer uncomfortable, which tends to nullify + * the decoding gains. + * + * For most hash functions, using this inline assembly trick changes + * hashing speed by less than 5% and often _reduces_ it. The biggest + * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is + * less then 10%. The speed gain on CubeHash is probably due to the + * chronic shortage of registers that CubeHash endures; for the other + * functions, the generic code appears to be efficient enough already. + * +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ( + "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap32(*(const sph_u32 *)src); +#endif +#else + return *(const sph_u32 *)src; +#endif + } else { + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); + } +#endif +#else + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (little endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (32-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32le_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return *(const sph_u32 *)src; +#elif SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap32(*(const sph_u32 *)src); +#endif +#else + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); +#endif +} + +#if SPH_64 + +/** + * Encode a 64-bit value into the provided buffer (big endian convention). + * + * @param dst the destination buffer + * @param val the 64-bit value to encode + */ +static SPH_INLINE void +sph_enc64be(void *dst, sph_u64 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; +#else + if (((SPH_UPTR)dst & 7) == 0) { +#if SPH_LITTLE_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; + } else { + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; + } +#endif +#else + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (big endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (64-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc64be_aligned(void *dst, sph_u64 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u64 *)dst = sph_bswap64(val); +#elif SPH_BIG_ENDIAN + *(sph_u64 *)dst = val; +#else + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (big endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64be(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif +#else + if (((SPH_UPTR)src & 7) == 0) { +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif + } else { + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); + } +#endif +#else + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (big endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (64-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64be_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#elif SPH_BIG_ENDIAN + return *(const sph_u64 *)src; +#else + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (little endian convention). + * + * @param dst the destination buffer + * @param val the 64-bit value to encode + */ +static SPH_INLINE void +sph_enc64le(void *dst, sph_u64 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; +#else + if (((SPH_UPTR)dst & 7) == 0) { +#if SPH_BIG_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; + } else { + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); + } +#endif +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (little endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (64-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc64le_aligned(void *dst, sph_u64 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u64 *)dst = val; +#elif SPH_BIG_ENDIAN + *(sph_u64 *)dst = sph_bswap64(val); +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (little endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64le(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif +#else + if (((SPH_UPTR)src & 7) == 0) { +#if SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ( + "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif SPH_PPC32_GCC && !SPH_NO_ASM + return (sph_u64)sph_dec32le_aligned(src) + | ((sph_u64)sph_dec32le_aligned( + (const char *)src + 4) << 32); +#elif SPH_PPC64_GCC && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ( + "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap64(*(const sph_u64 *)src); +#endif +#else + return *(const sph_u64 *)src; +#endif + } else { + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); + } +#endif +#else + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (little endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (64-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64le_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return *(const sph_u64 *)src; +#elif SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif SPH_PPC32_GCC && !SPH_NO_ASM + return (sph_u64)sph_dec32le_aligned(src) + | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32); +#elif SPH_PPC64_GCC && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap64(*(const sph_u64 *)src); +#endif +#else + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); +#endif +} + +#endif + +#endif /* Doxygen excluded block */ + +#endif diff --git a/uint256.h b/uint256.h new file mode 100644 index 0000000..2a252c9 --- /dev/null +++ b/uint256.h @@ -0,0 +1,784 @@ +// Copyright (c) 2009-2010 Satoshi Nakamoto +// Copyright (c) 2009-2012 The Bitcoin developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +#ifndef BITCOIN_UINT256_H +#define BITCOIN_UINT256_H + +#include +#include +#include +#include +#include +#include + +typedef long long int64; +typedef unsigned long long uint64; + + +inline int Testuint256AdHoc(std::vector vArg); + + + +/** Base class without constructors for uint256 and uint160. + * This makes the compiler let you use it in a union. + */ +template +class base_uint +{ +protected: + enum { WIDTH=BITS/32 }; + uint32_t pn[WIDTH]; +public: + + bool operator!() const + { + for (int i = 0; i < WIDTH; i++) + if (pn[i] != 0) + return false; + return true; + } + + const base_uint operator~() const + { + base_uint ret; + for (int i = 0; i < WIDTH; i++) + ret.pn[i] = ~pn[i]; + return ret; + } + + const base_uint operator-() const + { + base_uint ret; + for (int i = 0; i < WIDTH; i++) + ret.pn[i] = ~pn[i]; + ret++; + return ret; + } + + double getdouble() const + { + double ret = 0.0; + double fact = 1.0; + for (int i = 0; i < WIDTH; i++) { + ret += fact * pn[i]; + fact *= 4294967296.0; + } + return ret; + } + + base_uint& operator=(uint64 b) + { + pn[0] = (unsigned int)b; + pn[1] = (unsigned int)(b >> 32); + for (int i = 2; i < WIDTH; i++) + pn[i] = 0; + return *this; + } + + base_uint& operator^=(const base_uint& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] ^= b.pn[i]; + return *this; + } + + base_uint& operator&=(const base_uint& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] &= b.pn[i]; + return *this; + } + + base_uint& operator|=(const base_uint& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] |= b.pn[i]; + return *this; + } + + base_uint& operator^=(uint64 b) + { + pn[0] ^= (unsigned int)b; + pn[1] ^= (unsigned int)(b >> 32); + return *this; + } + + base_uint& operator|=(uint64 b) + { + pn[0] |= (unsigned int)b; + pn[1] |= (unsigned int)(b >> 32); + return *this; + } + + base_uint& operator<<=(unsigned int shift) + { + base_uint a(*this); + for (int i = 0; i < WIDTH; i++) + pn[i] = 0; + int k = shift / 32; + shift = shift % 32; + for (int i = 0; i < WIDTH; i++) + { + if (i+k+1 < WIDTH && shift != 0) + pn[i+k+1] |= (a.pn[i] >> (32-shift)); + if (i+k < WIDTH) + pn[i+k] |= (a.pn[i] << shift); + } + return *this; + } + + base_uint& operator>>=(unsigned int shift) + { + base_uint a(*this); + for (int i = 0; i < WIDTH; i++) + pn[i] = 0; + int k = shift / 32; + shift = shift % 32; + for (int i = 0; i < WIDTH; i++) + { + if (i-k-1 >= 0 && shift != 0) + pn[i-k-1] |= (a.pn[i] << (32-shift)); + if (i-k >= 0) + pn[i-k] |= (a.pn[i] >> shift); + } + return *this; + } + + base_uint& operator+=(const base_uint& b) + { + uint64 carry = 0; + for (int i = 0; i < WIDTH; i++) + { + uint64 n = carry + pn[i] + b.pn[i]; + pn[i] = n & 0xffffffff; + carry = n >> 32; + } + return *this; + } + + base_uint& operator-=(const base_uint& b) + { + *this += -b; + return *this; + } + + base_uint& operator+=(uint64 b64) + { + base_uint b; + b = b64; + *this += b; + return *this; + } + + base_uint& operator-=(uint64 b64) + { + base_uint b; + b = b64; + *this += -b; + return *this; + } + + + base_uint& operator++() + { + // prefix operator + int i = 0; + while (++pn[i] == 0 && i < WIDTH-1) + i++; + return *this; + } + + const base_uint operator++(int) + { + // postfix operator + const base_uint ret = *this; + ++(*this); + return ret; + } + + base_uint& operator--() + { + // prefix operator + int i = 0; + while (--pn[i] == -1 && i < WIDTH-1) + i++; + return *this; + } + + const base_uint operator--(int) + { + // postfix operator + const base_uint ret = *this; + --(*this); + return ret; + } + + + friend inline bool operator<(const base_uint& a, const base_uint& b) + { + for (int i = base_uint::WIDTH-1; i >= 0; i--) + { + if (a.pn[i] < b.pn[i]) + return true; + else if (a.pn[i] > b.pn[i]) + return false; + } + return false; + } + + friend inline bool operator<=(const base_uint& a, const base_uint& b) + { + for (int i = base_uint::WIDTH-1; i >= 0; i--) + { + if (a.pn[i] < b.pn[i]) + return true; + else if (a.pn[i] > b.pn[i]) + return false; + } + return true; + } + + friend inline bool operator>(const base_uint& a, const base_uint& b) + { + for (int i = base_uint::WIDTH-1; i >= 0; i--) + { + if (a.pn[i] > b.pn[i]) + return true; + else if (a.pn[i] < b.pn[i]) + return false; + } + return false; + } + + friend inline bool operator>=(const base_uint& a, const base_uint& b) + { + for (int i = base_uint::WIDTH-1; i >= 0; i--) + { + if (a.pn[i] > b.pn[i]) + return true; + else if (a.pn[i] < b.pn[i]) + return false; + } + return true; + } + + friend inline bool operator==(const base_uint& a, const base_uint& b) + { + for (int i = 0; i < base_uint::WIDTH; i++) + if (a.pn[i] != b.pn[i]) + return false; + return true; + } + + friend inline bool operator==(const base_uint& a, uint64 b) + { + if (a.pn[0] != (unsigned int)b) + return false; + if (a.pn[1] != (unsigned int)(b >> 32)) + return false; + for (int i = 2; i < base_uint::WIDTH; i++) + if (a.pn[i] != 0) + return false; + return true; + } + + friend inline bool operator!=(const base_uint& a, const base_uint& b) + { + return (!(a == b)); + } + + friend inline bool operator!=(const base_uint& a, uint64 b) + { + return (!(a == b)); + } + + + + std::string GetHex() const + { + char psz[sizeof(pn)*2 + 1]; + for (unsigned int i = 0; i < sizeof(pn); i++) + sprintf(psz + i*2, "%02x", ((unsigned char*)pn)[sizeof(pn) - i - 1]); + return std::string(psz, psz + sizeof(pn)*2); + } + + void SetHex(const char* psz) + { + for (int i = 0; i < WIDTH; i++) + pn[i] = 0; + + // skip leading spaces + while (isspace(*psz)) + psz++; + + // skip 0x + if (psz[0] == '0' && tolower(psz[1]) == 'x') + psz += 2; + + // hex string to uint + static const unsigned char phexdigit[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, 0,0xa,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xa,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0,0,0,0,0 }; + const char* pbegin = psz; + while (phexdigit[(unsigned char)*psz] || *psz == '0') + psz++; + psz--; + unsigned char* p1 = (unsigned char*)pn; + unsigned char* pend = p1 + WIDTH * 4; + while (psz >= pbegin && p1 < pend) + { + *p1 = phexdigit[(unsigned char)*psz--]; + if (psz >= pbegin) + { + *p1 |= (phexdigit[(unsigned char)*psz--] << 4); + p1++; + } + } + } + + void SetHex(const std::string& str) + { + SetHex(str.c_str()); + } + + std::string ToString() const + { + return (GetHex()); + } + + unsigned char* begin() + { + return (unsigned char*)&pn[0]; + } + + unsigned char* end() + { + return (unsigned char*)&pn[WIDTH]; + } + + const unsigned char* begin() const + { + return (unsigned char*)&pn[0]; + } + + const unsigned char* end() const + { + return (unsigned char*)&pn[WIDTH]; + } + + unsigned int size() const + { + return sizeof(pn); + } + + uint64 Get64(int n=0) const + { + return pn[2*n] | (uint64)pn[2*n+1] << 32; + } + +// unsigned int GetSerializeSize(int nType=0, int nVersion=PROTOCOL_VERSION) const + unsigned int GetSerializeSize(int nType, int nVersion) const + { + return sizeof(pn); + } + + template +// void Serialize(Stream& s, int nType=0, int nVersion=PROTOCOL_VERSION) const + void Serialize(Stream& s, int nType, int nVersion) const + { + s.write((char*)pn, sizeof(pn)); + } + + template +// void Unserialize(Stream& s, int nType=0, int nVersion=PROTOCOL_VERSION) + void Unserialize(Stream& s, int nType, int nVersion) + { + s.read((char*)pn, sizeof(pn)); + } + + + friend class uint160; + friend class uint256; + friend inline int Testuint256AdHoc(std::vector vArg); +}; + +typedef base_uint<160> base_uint160; +typedef base_uint<256> base_uint256; + + + +// +// uint160 and uint256 could be implemented as templates, but to keep +// compile errors and debugging cleaner, they're copy and pasted. +// + + + +////////////////////////////////////////////////////////////////////////////// +// +// uint160 +// + +/** 160-bit unsigned integer */ +class uint160 : public base_uint160 +{ +public: + typedef base_uint160 basetype; + + uint160() + { + for (int i = 0; i < WIDTH; i++) + pn[i] = 0; + } + + uint160(const basetype& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] = b.pn[i]; + } + + uint160& operator=(const basetype& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] = b.pn[i]; + return *this; + } + + uint160(uint64 b) + { + pn[0] = (unsigned int)b; + pn[1] = (unsigned int)(b >> 32); + for (int i = 2; i < WIDTH; i++) + pn[i] = 0; + } + + uint160& operator=(uint64 b) + { + pn[0] = (unsigned int)b; + pn[1] = (unsigned int)(b >> 32); + for (int i = 2; i < WIDTH; i++) + pn[i] = 0; + return *this; + } + + explicit uint160(const std::string& str) + { + SetHex(str); + } + + explicit uint160(const std::vector& vch) + { + if (vch.size() == sizeof(pn)) + memcpy(pn, &vch[0], sizeof(pn)); + else + *this = 0; + } +}; + +inline bool operator==(const uint160& a, uint64 b) { return (base_uint160)a == b; } +inline bool operator!=(const uint160& a, uint64 b) { return (base_uint160)a != b; } +inline const uint160 operator<<(const base_uint160& a, unsigned int shift) { return uint160(a) <<= shift; } +inline const uint160 operator>>(const base_uint160& a, unsigned int shift) { return uint160(a) >>= shift; } +inline const uint160 operator<<(const uint160& a, unsigned int shift) { return uint160(a) <<= shift; } +inline const uint160 operator>>(const uint160& a, unsigned int shift) { return uint160(a) >>= shift; } + +inline const uint160 operator^(const base_uint160& a, const base_uint160& b) { return uint160(a) ^= b; } +inline const uint160 operator&(const base_uint160& a, const base_uint160& b) { return uint160(a) &= b; } +inline const uint160 operator|(const base_uint160& a, const base_uint160& b) { return uint160(a) |= b; } +inline const uint160 operator+(const base_uint160& a, const base_uint160& b) { return uint160(a) += b; } +inline const uint160 operator-(const base_uint160& a, const base_uint160& b) { return uint160(a) -= b; } + +inline bool operator<(const base_uint160& a, const uint160& b) { return (base_uint160)a < (base_uint160)b; } +inline bool operator<=(const base_uint160& a, const uint160& b) { return (base_uint160)a <= (base_uint160)b; } +inline bool operator>(const base_uint160& a, const uint160& b) { return (base_uint160)a > (base_uint160)b; } +inline bool operator>=(const base_uint160& a, const uint160& b) { return (base_uint160)a >= (base_uint160)b; } +inline bool operator==(const base_uint160& a, const uint160& b) { return (base_uint160)a == (base_uint160)b; } +inline bool operator!=(const base_uint160& a, const uint160& b) { return (base_uint160)a != (base_uint160)b; } +inline const uint160 operator^(const base_uint160& a, const uint160& b) { return (base_uint160)a ^ (base_uint160)b; } +inline const uint160 operator&(const base_uint160& a, const uint160& b) { return (base_uint160)a & (base_uint160)b; } +inline const uint160 operator|(const base_uint160& a, const uint160& b) { return (base_uint160)a | (base_uint160)b; } +inline const uint160 operator+(const base_uint160& a, const uint160& b) { return (base_uint160)a + (base_uint160)b; } +inline const uint160 operator-(const base_uint160& a, const uint160& b) { return (base_uint160)a - (base_uint160)b; } + +inline bool operator<(const uint160& a, const base_uint160& b) { return (base_uint160)a < (base_uint160)b; } +inline bool operator<=(const uint160& a, const base_uint160& b) { return (base_uint160)a <= (base_uint160)b; } +inline bool operator>(const uint160& a, const base_uint160& b) { return (base_uint160)a > (base_uint160)b; } +inline bool operator>=(const uint160& a, const base_uint160& b) { return (base_uint160)a >= (base_uint160)b; } +inline bool operator==(const uint160& a, const base_uint160& b) { return (base_uint160)a == (base_uint160)b; } +inline bool operator!=(const uint160& a, const base_uint160& b) { return (base_uint160)a != (base_uint160)b; } +inline const uint160 operator^(const uint160& a, const base_uint160& b) { return (base_uint160)a ^ (base_uint160)b; } +inline const uint160 operator&(const uint160& a, const base_uint160& b) { return (base_uint160)a & (base_uint160)b; } +inline const uint160 operator|(const uint160& a, const base_uint160& b) { return (base_uint160)a | (base_uint160)b; } +inline const uint160 operator+(const uint160& a, const base_uint160& b) { return (base_uint160)a + (base_uint160)b; } +inline const uint160 operator-(const uint160& a, const base_uint160& b) { return (base_uint160)a - (base_uint160)b; } + +inline bool operator<(const uint160& a, const uint160& b) { return (base_uint160)a < (base_uint160)b; } +inline bool operator<=(const uint160& a, const uint160& b) { return (base_uint160)a <= (base_uint160)b; } +inline bool operator>(const uint160& a, const uint160& b) { return (base_uint160)a > (base_uint160)b; } +inline bool operator>=(const uint160& a, const uint160& b) { return (base_uint160)a >= (base_uint160)b; } +inline bool operator==(const uint160& a, const uint160& b) { return (base_uint160)a == (base_uint160)b; } +inline bool operator!=(const uint160& a, const uint160& b) { return (base_uint160)a != (base_uint160)b; } +inline const uint160 operator^(const uint160& a, const uint160& b) { return (base_uint160)a ^ (base_uint160)b; } +inline const uint160 operator&(const uint160& a, const uint160& b) { return (base_uint160)a & (base_uint160)b; } +inline const uint160 operator|(const uint160& a, const uint160& b) { return (base_uint160)a | (base_uint160)b; } +inline const uint160 operator+(const uint160& a, const uint160& b) { return (base_uint160)a + (base_uint160)b; } +inline const uint160 operator-(const uint160& a, const uint160& b) { return (base_uint160)a - (base_uint160)b; } + + + + + + +////////////////////////////////////////////////////////////////////////////// +// +// uint256 +// + +/** 256-bit unsigned integer */ +class uint256 : public base_uint256 +{ +public: + typedef base_uint256 basetype; + + uint256() + { + for (int i = 0; i < WIDTH; i++) + pn[i] = 0; + } + + uint256(const basetype& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] = b.pn[i]; + } + + uint256& operator=(const basetype& b) + { + for (int i = 0; i < WIDTH; i++) + pn[i] = b.pn[i]; + return *this; + } + + uint256(uint64 b) + { + pn[0] = (unsigned int)b; + pn[1] = (unsigned int)(b >> 32); + for (int i = 2; i < WIDTH; i++) + pn[i] = 0; + } + + uint256& operator=(uint64 b) + { + pn[0] = (unsigned int)b; + pn[1] = (unsigned int)(b >> 32); + for (int i = 2; i < WIDTH; i++) + pn[i] = 0; + return *this; + } + + explicit uint256(const std::string& str) + { + SetHex(str); + } + + explicit uint256(const std::vector& vch) + { + if (vch.size() == sizeof(pn)) + memcpy(pn, &vch[0], sizeof(pn)); + else + *this = 0; + } +}; + +inline bool operator==(const uint256& a, uint64 b) { return (base_uint256)a == b; } +inline bool operator!=(const uint256& a, uint64 b) { return (base_uint256)a != b; } +inline const uint256 operator<<(const base_uint256& a, unsigned int shift) { return uint256(a) <<= shift; } +inline const uint256 operator>>(const base_uint256& a, unsigned int shift) { return uint256(a) >>= shift; } +inline const uint256 operator<<(const uint256& a, unsigned int shift) { return uint256(a) <<= shift; } +inline const uint256 operator>>(const uint256& a, unsigned int shift) { return uint256(a) >>= shift; } + +inline const uint256 operator^(const base_uint256& a, const base_uint256& b) { return uint256(a) ^= b; } +inline const uint256 operator&(const base_uint256& a, const base_uint256& b) { return uint256(a) &= b; } +inline const uint256 operator|(const base_uint256& a, const base_uint256& b) { return uint256(a) |= b; } +inline const uint256 operator+(const base_uint256& a, const base_uint256& b) { return uint256(a) += b; } +inline const uint256 operator-(const base_uint256& a, const base_uint256& b) { return uint256(a) -= b; } + +inline bool operator<(const base_uint256& a, const uint256& b) { return (base_uint256)a < (base_uint256)b; } +inline bool operator<=(const base_uint256& a, const uint256& b) { return (base_uint256)a <= (base_uint256)b; } +inline bool operator>(const base_uint256& a, const uint256& b) { return (base_uint256)a > (base_uint256)b; } +inline bool operator>=(const base_uint256& a, const uint256& b) { return (base_uint256)a >= (base_uint256)b; } +inline bool operator==(const base_uint256& a, const uint256& b) { return (base_uint256)a == (base_uint256)b; } +inline bool operator!=(const base_uint256& a, const uint256& b) { return (base_uint256)a != (base_uint256)b; } +inline const uint256 operator^(const base_uint256& a, const uint256& b) { return (base_uint256)a ^ (base_uint256)b; } +inline const uint256 operator&(const base_uint256& a, const uint256& b) { return (base_uint256)a & (base_uint256)b; } +inline const uint256 operator|(const base_uint256& a, const uint256& b) { return (base_uint256)a | (base_uint256)b; } +inline const uint256 operator+(const base_uint256& a, const uint256& b) { return (base_uint256)a + (base_uint256)b; } +inline const uint256 operator-(const base_uint256& a, const uint256& b) { return (base_uint256)a - (base_uint256)b; } + +inline bool operator<(const uint256& a, const base_uint256& b) { return (base_uint256)a < (base_uint256)b; } +inline bool operator<=(const uint256& a, const base_uint256& b) { return (base_uint256)a <= (base_uint256)b; } +inline bool operator>(const uint256& a, const base_uint256& b) { return (base_uint256)a > (base_uint256)b; } +inline bool operator>=(const uint256& a, const base_uint256& b) { return (base_uint256)a >= (base_uint256)b; } +inline bool operator==(const uint256& a, const base_uint256& b) { return (base_uint256)a == (base_uint256)b; } +inline bool operator!=(const uint256& a, const base_uint256& b) { return (base_uint256)a != (base_uint256)b; } +inline const uint256 operator^(const uint256& a, const base_uint256& b) { return (base_uint256)a ^ (base_uint256)b; } +inline const uint256 operator&(const uint256& a, const base_uint256& b) { return (base_uint256)a & (base_uint256)b; } +inline const uint256 operator|(const uint256& a, const base_uint256& b) { return (base_uint256)a | (base_uint256)b; } +inline const uint256 operator+(const uint256& a, const base_uint256& b) { return (base_uint256)a + (base_uint256)b; } +inline const uint256 operator-(const uint256& a, const base_uint256& b) { return (base_uint256)a - (base_uint256)b; } + +inline bool operator<(const uint256& a, const uint256& b) { return (base_uint256)a < (base_uint256)b; } +inline bool operator<=(const uint256& a, const uint256& b) { return (base_uint256)a <= (base_uint256)b; } +inline bool operator>(const uint256& a, const uint256& b) { return (base_uint256)a > (base_uint256)b; } +inline bool operator>=(const uint256& a, const uint256& b) { return (base_uint256)a >= (base_uint256)b; } +inline bool operator==(const uint256& a, const uint256& b) { return (base_uint256)a == (base_uint256)b; } +inline bool operator!=(const uint256& a, const uint256& b) { return (base_uint256)a != (base_uint256)b; } +inline const uint256 operator^(const uint256& a, const uint256& b) { return (base_uint256)a ^ (base_uint256)b; } +inline const uint256 operator&(const uint256& a, const uint256& b) { return (base_uint256)a & (base_uint256)b; } +inline const uint256 operator|(const uint256& a, const uint256& b) { return (base_uint256)a | (base_uint256)b; } +inline const uint256 operator+(const uint256& a, const uint256& b) { return (base_uint256)a + (base_uint256)b; } +inline const uint256 operator-(const uint256& a, const uint256& b) { return (base_uint256)a - (base_uint256)b; } + + + + + + + + + + +#ifdef TEST_UINT256 + +inline int Testuint256AdHoc(std::vector vArg) +{ + uint256 g(0); + + + printf("%s\n", g.ToString().c_str()); + g--; printf("g--\n"); + printf("%s\n", g.ToString().c_str()); + g--; printf("g--\n"); + printf("%s\n", g.ToString().c_str()); + g++; printf("g++\n"); + printf("%s\n", g.ToString().c_str()); + g++; printf("g++\n"); + printf("%s\n", g.ToString().c_str()); + g++; printf("g++\n"); + printf("%s\n", g.ToString().c_str()); + g++; printf("g++\n"); + printf("%s\n", g.ToString().c_str()); + + + + uint256 a(7); + printf("a=7\n"); + printf("%s\n", a.ToString().c_str()); + + uint256 b; + printf("b undefined\n"); + printf("%s\n", b.ToString().c_str()); + int c = 3; + + a = c; + a.pn[3] = 15; + printf("%s\n", a.ToString().c_str()); + uint256 k(c); + + a = 5; + a.pn[3] = 15; + printf("%s\n", a.ToString().c_str()); + b = 1; + b <<= 52; + + a |= b; + + a ^= 0x500; + + printf("a %s\n", a.ToString().c_str()); + + a = a | b | (uint256)0x1000; + + + printf("a %s\n", a.ToString().c_str()); + printf("b %s\n", b.ToString().c_str()); + + a = 0xfffffffe; + a.pn[4] = 9; + + printf("%s\n", a.ToString().c_str()); + a++; + printf("%s\n", a.ToString().c_str()); + a++; + printf("%s\n", a.ToString().c_str()); + a++; + printf("%s\n", a.ToString().c_str()); + a++; + printf("%s\n", a.ToString().c_str()); + + a--; + printf("%s\n", a.ToString().c_str()); + a--; + printf("%s\n", a.ToString().c_str()); + a--; + printf("%s\n", a.ToString().c_str()); + uint256 d = a--; + printf("%s\n", d.ToString().c_str()); + printf("%s\n", a.ToString().c_str()); + a--; + printf("%s\n", a.ToString().c_str()); + a--; + printf("%s\n", a.ToString().c_str()); + + d = a; + + printf("%s\n", d.ToString().c_str()); + for (int i = uint256::WIDTH-1; i >= 0; i--) printf("%08x", d.pn[i]); printf("\n"); + + uint256 neg = d; + neg = ~neg; + printf("%s\n", neg.ToString().c_str()); + + + uint256 e = uint256("0xABCDEF123abcdef12345678909832180000011111111"); + printf("\n"); + printf("%s\n", e.ToString().c_str()); + + + printf("\n"); + uint256 x1 = uint256("0xABCDEF123abcdef12345678909832180000011111111"); + uint256 x2; + printf("%s\n", x1.ToString().c_str()); + for (int i = 0; i < 270; i += 4) + { + x2 = x1 << i; + printf("%s\n", x2.ToString().c_str()); + } + + printf("\n"); + printf("%s\n", x1.ToString().c_str()); + for (int i = 0; i < 270; i += 4) + { + x2 = x1; + x2 >>= i; + printf("%s\n", x2.ToString().c_str()); + } + + + for (int i = 0; i < 100; i++) + { + uint256 k = (~uint256(0) >> i); + printf("%s\n", k.ToString().c_str()); + } + + for (int i = 0; i < 100; i++) + { + uint256 k = (~uint256(0) << i); + printf("%s\n", k.ToString().c_str()); + } + + return (0); +} + +#endif + +#endif diff --git a/util.c b/util.c new file mode 100644 index 0000000..3f392bf --- /dev/null +++ b/util.c @@ -0,0 +1,1316 @@ +/* + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. See COPYING for more details. + */ + +#define _GNU_SOURCE +#include "cpuminer-config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(WIN32) +#include +#include +#else +#include +#include +#include +#include +#endif +#include "compat.h" +#include "miner.h" +#include "elist.h" + +struct data_buffer { + void *buf; + size_t len; +}; + +struct upload_buffer { + const void *buf; + size_t len; + size_t pos; +}; + +struct header_info { + char *lp_path; + char *reason; + char *stratum_url; +}; + +struct tq_ent { + void *data; + struct list_head q_node; +}; + +struct thread_q { + struct list_head q; + + bool frozen; + + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +void applog(int prio, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + +#ifdef HAVE_SYSLOG_H + if (use_syslog) { + va_list ap2; + char *buf; + int len; + + va_copy(ap2, ap); + len = vsnprintf(NULL, 0, fmt, ap2) + 1; + va_end(ap2); + buf = alloca(len); + if (vsnprintf(buf, len, fmt, ap) >= 0) + syslog(prio, "%s", buf); + } +#else + if (0) {} +#endif + else { + char *f; + int len; + time_t now; + struct tm tm, *tm_p; + + time(&now); + + pthread_mutex_lock(&applog_lock); + tm_p = localtime(&now); + memcpy(&tm, tm_p, sizeof(tm)); + pthread_mutex_unlock(&applog_lock); + + len = (int)(40 + strlen(fmt) + 2); + f = (char*)alloca(len); + sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d] %s\n", + tm.tm_year + 1900, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + fmt); + pthread_mutex_lock(&applog_lock); + vfprintf(stderr, f, ap); /* atomic write to stderr */ + fflush(stderr); + pthread_mutex_unlock(&applog_lock); + } + va_end(ap); +} + +static void databuf_free(struct data_buffer *db) +{ + if (!db) + return; + + free(db->buf); + + memset(db, 0, sizeof(*db)); +} + +static size_t all_data_cb(const void *ptr, size_t size, size_t nmemb, + void *user_data) +{ + struct data_buffer *db = (struct data_buffer *)user_data; + size_t len = size * nmemb; + size_t oldlen, newlen; + void *newmem; + static const unsigned char zero = 0; + + oldlen = db->len; + newlen = oldlen + len; + + newmem = realloc(db->buf, newlen + 1); + if (!newmem) + return 0; + + db->buf = newmem; + db->len = newlen; + memcpy((char*)db->buf + oldlen, ptr, len); + memcpy((char*)db->buf + newlen, &zero, 1); /* null terminate */ + + return len; +} + +static size_t upload_data_cb(void *ptr, size_t size, size_t nmemb, + void *user_data) +{ + struct upload_buffer *ub = (struct upload_buffer *)user_data; + unsigned int len = (unsigned int)(size * nmemb); + + if (len > ub->len - ub->pos) + len = (unsigned int)(ub->len - ub->pos); + + if (len) { + memcpy(ptr, (char*)ub->buf + ub->pos, len); + ub->pos += len; + } + + return len; +} + +#if LIBCURL_VERSION_NUM >= 0x071200 +static int seek_data_cb(void *user_data, curl_off_t offset, int origin) +{ + struct upload_buffer *ub = (struct upload_buffer *)user_data; + + switch (origin) { + case SEEK_SET: + ub->pos = (size_t)offset; + break; + case SEEK_CUR: + ub->pos += (size_t)offset; + break; + case SEEK_END: + ub->pos = ub->len + (size_t)offset; + break; + default: + return 1; /* CURL_SEEKFUNC_FAIL */ + } + + return 0; /* CURL_SEEKFUNC_OK */ +} +#endif + +static size_t resp_hdr_cb(void *ptr, size_t size, size_t nmemb, void *user_data) +{ + struct header_info *hi = (struct header_info *)user_data; + size_t remlen, slen, ptrlen = size * nmemb; + char *rem, *val = NULL, *key = NULL; + void *tmp; + + val = (char*)calloc(1, ptrlen); + key = (char*)calloc(1, ptrlen); + if (!key || !val) + goto out; + + tmp = memchr(ptr, ':', ptrlen); + if (!tmp || (tmp == ptr)) /* skip empty keys / blanks */ + goto out; + slen = (size_t)((char*)tmp - (char*)ptr); + if ((slen + 1) == ptrlen) /* skip key w/ no value */ + goto out; + memcpy(key, ptr, slen); /* store & nul term key */ + key[slen] = 0; + + rem = (char*)ptr + slen + 1; /* trim value's leading whitespace */ + remlen = ptrlen - slen - 1; + while ((remlen > 0) && (isspace(*rem))) { + remlen--; + rem++; + } + + memcpy(val, rem, remlen); /* store value, trim trailing ws */ + val[remlen] = 0; + while ((*val) && (isspace(val[strlen(val) - 1]))) { + val[strlen(val) - 1] = 0; + } + if (!*val) /* skip blank value */ + goto out; + + if (!strcasecmp("X-Long-Polling", key)) { + hi->lp_path = val; /* steal memory reference */ + val = NULL; + } + + if (!strcasecmp("X-Reject-Reason", key)) { + hi->reason = val; /* steal memory reference */ + val = NULL; + } + + if (!strcasecmp("X-Stratum", key)) { + hi->stratum_url = val; /* steal memory reference */ + val = NULL; + } + +out: + free(key); + free(val); + return ptrlen; +} + +#if LIBCURL_VERSION_NUM >= 0x070f06 +static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd, + curlsocktype purpose) +{ + int keepalive = 1; + int tcp_keepcnt = 3; + int tcp_keepidle = 50; + int tcp_keepintvl = 50; +#ifdef WIN32 + DWORD outputBytes; +#endif + +#ifndef WIN32 + if (unlikely(setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive, + sizeof(keepalive)))) + return 1; +#ifdef __linux + if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPCNT, + &tcp_keepcnt, sizeof(tcp_keepcnt)))) + return 1; + if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, + &tcp_keepidle, sizeof(tcp_keepidle)))) + return 1; + if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, + &tcp_keepintvl, sizeof(tcp_keepintvl)))) + return 1; +#endif /* __linux */ +#ifdef __APPLE_CC__ + if (unlikely(setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, + &tcp_keepintvl, sizeof(tcp_keepintvl)))) + return 1; +#endif /* __APPLE_CC__ */ +#else /* WIN32 */ + struct tcp_keepalive vals; + vals.onoff = 1; + vals.keepalivetime = tcp_keepidle * 1000; + vals.keepaliveinterval = tcp_keepintvl * 1000; + if (unlikely(WSAIoctl(fd, SIO_KEEPALIVE_VALS, &vals, sizeof(vals), + NULL, 0, &outputBytes, NULL, NULL))) + return 1; +#endif /* WIN32 */ + + return 0; +} +#endif + +json_t *json_rpc_call(CURL *curl, const char *url, + const char *userpass, const char *rpc_req, + bool longpoll_scan, bool longpoll, int *curl_err) +{ + json_t *val, *err_val, *res_val; + int rc; + struct data_buffer all_data = {0}; + struct upload_buffer upload_data; + json_error_t err; + struct curl_slist *headers = NULL; + char len_hdr[64]; + char curl_err_str[CURL_ERROR_SIZE]; + long timeout = longpoll ? opt_timeout : 30; + struct header_info hi = {0}; + bool lp_scanning = longpoll_scan && !have_longpoll; + + /* it is assumed that 'curl' is freshly [re]initialized at this pt */ + + if (opt_protocol) + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); + curl_easy_setopt(curl, CURLOPT_URL, url); + if (opt_cert) + curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert); + curl_easy_setopt(curl, CURLOPT_ENCODING, ""); + curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); + curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, all_data_cb); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data); + curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_data_cb); + curl_easy_setopt(curl, CURLOPT_READDATA, &upload_data); +#if LIBCURL_VERSION_NUM >= 0x071200 + curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, &seek_data_cb); + curl_easy_setopt(curl, CURLOPT_SEEKDATA, &upload_data); +#endif + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout); + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, resp_hdr_cb); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &hi); + if (opt_proxy) { + curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy); + curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type); + } + if (userpass) { + curl_easy_setopt(curl, CURLOPT_USERPWD, userpass); + curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); + } +#if LIBCURL_VERSION_NUM >= 0x070f06 + if (longpoll) + curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb); +#endif + curl_easy_setopt(curl, CURLOPT_POST, 1); + + if (opt_protocol) + applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req); + + upload_data.buf = rpc_req; + upload_data.len = strlen(rpc_req); + upload_data.pos = 0; + sprintf(len_hdr, "Content-Length: %lu", + (unsigned long) upload_data.len); + + headers = curl_slist_append(headers, "Content-Type: application/json"); + headers = curl_slist_append(headers, len_hdr); + headers = curl_slist_append(headers, "User-Agent: " USER_AGENT); + headers = curl_slist_append(headers, "X-Mining-Extensions: midstate"); + headers = curl_slist_append(headers, "Accept:"); /* disable Accept hdr*/ + headers = curl_slist_append(headers, "Expect:"); /* disable Expect hdr*/ + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + + rc = curl_easy_perform(curl); + if (curl_err != NULL) + *curl_err = rc; + if (rc) { + if (!(longpoll && rc == CURLE_OPERATION_TIMEDOUT)) + applog(LOG_ERR, "HTTP request failed: %s", curl_err_str); + goto err_out; + } + + /* If X-Stratum was found, activate Stratum */ + if (want_stratum && hi.stratum_url && + !strncasecmp(hi.stratum_url, "stratum+tcp://", 14) && + !(opt_proxy && opt_proxy_type == CURLPROXY_HTTP)) { + have_stratum = true; + tq_push(thr_info[stratum_thr_id].q, hi.stratum_url); + hi.stratum_url = NULL; + } + + /* If X-Long-Polling was found, activate long polling */ + if (lp_scanning && hi.lp_path && !have_stratum) { + have_longpoll = true; + tq_push(thr_info[longpoll_thr_id].q, hi.lp_path); + hi.lp_path = NULL; + } + + if (!all_data.buf) { + applog(LOG_ERR, "Empty data received in json_rpc_call."); + goto err_out; + } + + val = JSON_LOADS((const char*)all_data.buf, &err); + if (!val) { + applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text); + goto err_out; + } + + if (opt_protocol) { + char *s = json_dumps(val, JSON_INDENT(3)); + applog(LOG_DEBUG, "JSON protocol response:\n%s", s); + free(s); + } + + /* JSON-RPC valid response returns a non-null 'result', + * and a null 'error'. */ + res_val = json_object_get(val, "result"); + err_val = json_object_get(val, "error"); + + if (!res_val || json_is_null(res_val) || + (err_val && !json_is_null(err_val))) { + char *s; + + if (err_val) + s = json_dumps(err_val, JSON_INDENT(3)); + else + s = strdup("(unknown reason)"); + + applog(LOG_ERR, "JSON-RPC call failed: %s", s); + + free(s); + + goto err_out; + } + + if (hi.reason) + json_object_set_new(val, "reject-reason", json_string(hi.reason)); + + databuf_free(&all_data); + curl_slist_free_all(headers); + curl_easy_reset(curl); + return val; + +err_out: + free(hi.lp_path); + free(hi.reason); + free(hi.stratum_url); + databuf_free(&all_data); + curl_slist_free_all(headers); + curl_easy_reset(curl); + return NULL; +} + +char *bin2hex(const unsigned char *p, size_t len) +{ + unsigned int i; + char *s = (char*)malloc((len * 2) + 1); + if (!s) + return NULL; + + for (i = 0; i < len; i++) + sprintf(s + (i * 2), "%02x", (unsigned int) p[i]); + + return s; +} + +bool hex2bin(unsigned char *p, const char *hexstr, size_t len) +{ + char hex_byte[3]; + char *ep; + + hex_byte[2] = '\0'; + + while (*hexstr && len) { + if (!hexstr[1]) { + applog(LOG_ERR, "hex2bin str truncated"); + return false; + } + hex_byte[0] = hexstr[0]; + hex_byte[1] = hexstr[1]; + *p = (unsigned char) strtol(hex_byte, &ep, 16); + if (*ep) { + applog(LOG_ERR, "hex2bin failed on '%s'", hex_byte); + return false; + } + p++; + hexstr += 2; + len--; + } + + return (len == 0 && *hexstr == 0) ? true : false; +} + +/* Subtract the `struct timeval' values X and Y, + storing the result in RESULT. + Return 1 if the difference is negative, otherwise 0. */ +int timeval_subtract(struct timeval *result, struct timeval *x, + struct timeval *y) +{ + /* Perform the carry for the later subtraction by updating Y. */ + if (x->tv_usec < y->tv_usec) { + int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; + y->tv_usec -= 1000000 * nsec; + y->tv_sec += nsec; + } + if (x->tv_usec - y->tv_usec > 1000000) { + int nsec = (x->tv_usec - y->tv_usec) / 1000000; + y->tv_usec += 1000000 * nsec; + y->tv_sec -= nsec; + } + + /* Compute the time remaining to wait. + * `tv_usec' is certainly positive. */ + result->tv_sec = x->tv_sec - y->tv_sec; + result->tv_usec = x->tv_usec - y->tv_usec; + + /* Return 1 if result is negative. */ + return x->tv_sec < y->tv_sec; +} + +bool fulltest(const uint32_t *hash, const uint32_t *target) +{ + int i; + bool rc = true; + + for (i = 7; i >= 0; i--) { + if (hash[i] > target[i]) { + rc = false; + break; + } + if (hash[i] < target[i]) { + rc = true; + break; + } + } + + if (opt_debug) { + uint32_t hash_be[8], target_be[8]; + char *hash_str, *target_str; + + for (i = 0; i < 8; i++) { + be32enc(hash_be + i, hash[7 - i]); + be32enc(target_be + i, target[7 - i]); + } + hash_str = bin2hex((unsigned char *)hash_be, 32); + target_str = bin2hex((unsigned char *)target_be, 32); + + applog(LOG_DEBUG, "DEBUG: %s\nHash: %s\nTarget: %s", + rc ? "hash <= target" + : "hash > target (false positive)", + hash_str, + target_str); + + free(hash_str); + free(target_str); + } + + return rc; +} + +void diff_to_target(uint32_t *target, double diff) +{ + uint64_t m; + int k; + + for (k = 6; k > 0 && diff > 1.0; k--) + diff /= 4294967296.0; + m = (uint64_t)(4294901760.0 / diff); + if (m == 0 && k == 6) + memset(target, 0xff, 32); + else { + memset(target, 0, 32); + target[k] = (uint32_t)m; + target[k + 1] = (uint32_t)(m >> 32); + } +} + +#ifdef WIN32 +#define socket_blocks() (WSAGetLastError() == WSAEWOULDBLOCK) +#else +#define socket_blocks() (errno == EAGAIN || errno == EWOULDBLOCK) +#endif + +static bool send_line(curl_socket_t sock, char *s) +{ + ssize_t len, sent = 0; + + len = (ssize_t)strlen(s); + s[len++] = '\n'; + + while (len > 0) { + struct timeval timeout = {0, 0}; + ssize_t n; + fd_set wd; + + FD_ZERO(&wd); + FD_SET(sock, &wd); + if (select((int)sock + 1, NULL, &wd, NULL, &timeout) < 1) + return false; + n = send(sock, s + sent, len, 0); + if (n < 0) { + if (!socket_blocks()) + return false; + n = 0; + } + sent += n; + len -= n; + } + + return true; +} + +bool stratum_send_line(struct stratum_ctx *sctx, char *s) +{ + bool ret = false; + + if (opt_protocol) + applog(LOG_DEBUG, "> %s", s); + + pthread_mutex_lock(&sctx->sock_lock); + ret = send_line(sctx->sock, s); + pthread_mutex_unlock(&sctx->sock_lock); + + return ret; +} + +static bool socket_full(curl_socket_t sock, int timeout) +{ + struct timeval tv; + fd_set rd; + + FD_ZERO(&rd); + FD_SET(sock, &rd); + tv.tv_sec = timeout; + tv.tv_usec = 0; + if (select((int)sock + 1, &rd, NULL, NULL, &tv) > 0) + return true; + return false; +} + +bool stratum_socket_full(struct stratum_ctx *sctx, int timeout) +{ + return strlen(sctx->sockbuf) || socket_full(sctx->sock, timeout); +} + +#define RBUFSIZE 2048 +#define RECVSIZE (RBUFSIZE - 4) + +static void stratum_buffer_append(struct stratum_ctx *sctx, const char *s) +{ + size_t old, snew; + + old = strlen(sctx->sockbuf); + snew = old + strlen(s) + 1; + if (snew >= sctx->sockbuf_size) { + sctx->sockbuf_size = snew + (RBUFSIZE - (snew % RBUFSIZE)); + sctx->sockbuf = (char*)realloc(sctx->sockbuf, sctx->sockbuf_size); + } + strcpy(sctx->sockbuf + old, s); +} + +char *stratum_recv_line(struct stratum_ctx *sctx) +{ + ssize_t len, buflen; + char *tok, *sret = NULL; + + if (!strstr(sctx->sockbuf, "\n")) { + bool ret = true; + time_t rstart; + + time(&rstart); + if (!socket_full(sctx->sock, 60)) { + applog(LOG_ERR, "stratum_recv_line timed out"); + goto out; + } + do { + char s[RBUFSIZE]; + ssize_t n; + + memset(s, 0, RBUFSIZE); + n = recv(sctx->sock, s, RECVSIZE, 0); + if (!n) { + ret = false; + break; + } + if (n < 0) { + if (!socket_blocks() || !socket_full(sctx->sock, 1)) { + ret = false; + break; + } + } else + stratum_buffer_append(sctx, s); + } while (time(NULL) - rstart < 60 && !strstr(sctx->sockbuf, "\n")); + + if (!ret) { + applog(LOG_ERR, "stratum_recv_line failed"); + goto out; + } + } + + buflen = (ssize_t)strlen(sctx->sockbuf); + tok = strtok(sctx->sockbuf, "\n"); + if (!tok) { + applog(LOG_ERR, "stratum_recv_line failed to parse a newline-terminated string"); + goto out; + } + sret = strdup(tok); + len = (ssize_t)strlen(sret); + + if (buflen > len + 1) + memmove(sctx->sockbuf, sctx->sockbuf + len + 1, buflen - len + 1); + else + sctx->sockbuf[0] = '\0'; + +out: + if (sret && opt_protocol) + applog(LOG_DEBUG, "< %s", sret); + return sret; +} + +#if LIBCURL_VERSION_NUM >= 0x071101 +static curl_socket_t opensocket_grab_cb(void *clientp, curlsocktype purpose, + struct curl_sockaddr *addr) +{ + curl_socket_t *sock = (curl_socket_t *)clientp; + *sock = socket(addr->family, addr->socktype, addr->protocol); + return *sock; +} +#endif + +bool stratum_connect(struct stratum_ctx *sctx, const char *url) +{ + CURL *curl; + int rc; + + pthread_mutex_lock(&sctx->sock_lock); + if (sctx->curl) + curl_easy_cleanup(sctx->curl); + sctx->curl = curl_easy_init(); + if (!sctx->curl) { + applog(LOG_ERR, "CURL initialization failed"); + pthread_mutex_unlock(&sctx->sock_lock); + return false; + } + curl = sctx->curl; + if (!sctx->sockbuf) { + sctx->sockbuf = (char*)calloc(RBUFSIZE, 1); + sctx->sockbuf_size = RBUFSIZE; + } + sctx->sockbuf[0] = '\0'; + pthread_mutex_unlock(&sctx->sock_lock); + + if (url != sctx->url) { + free(sctx->url); + sctx->url = strdup(url); + } + free(sctx->curl_url); + sctx->curl_url = (char*)malloc(strlen(url)); + sprintf(sctx->curl_url, "http%s", strstr(url, "://")); + + if (opt_protocol) + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); + curl_easy_setopt(curl, CURLOPT_URL, sctx->curl_url); + curl_easy_setopt(curl, CURLOPT_FRESH_CONNECT, 1); + curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 30); + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, sctx->curl_err_str); + curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1); + if (opt_proxy && opt_proxy_type != CURLPROXY_HTTP) { + curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy); + curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type); + } else if (getenv("http_proxy")) { + if (getenv("all_proxy")) + curl_easy_setopt(curl, CURLOPT_PROXY, getenv("all_proxy")); + else if (getenv("ALL_PROXY")) + curl_easy_setopt(curl, CURLOPT_PROXY, getenv("ALL_PROXY")); + else + curl_easy_setopt(curl, CURLOPT_PROXY, ""); + } +#if LIBCURL_VERSION_NUM >= 0x070f06 + curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb); +#endif +#if LIBCURL_VERSION_NUM >= 0x071101 + curl_easy_setopt(curl, CURLOPT_OPENSOCKETFUNCTION, opensocket_grab_cb); + curl_easy_setopt(curl, CURLOPT_OPENSOCKETDATA, &sctx->sock); +#endif + curl_easy_setopt(curl, CURLOPT_CONNECT_ONLY, 1); + + rc = curl_easy_perform(curl); + if (rc) { + applog(LOG_ERR, "Stratum connection failed: %s", sctx->curl_err_str); + curl_easy_cleanup(curl); + sctx->curl = NULL; + return false; + } + +#if LIBCURL_VERSION_NUM < 0x071101 + /* CURLINFO_LASTSOCKET is broken on Win64; only use it as a last resort */ + curl_easy_getinfo(curl, CURLINFO_LASTSOCKET, (long *)&sctx->sock); +#endif + + return true; +} + +void stratum_disconnect(struct stratum_ctx *sctx) +{ + pthread_mutex_lock(&sctx->sock_lock); + if (sctx->curl) { + curl_easy_cleanup(sctx->curl); + sctx->curl = NULL; + sctx->sockbuf[0] = '\0'; + } + pthread_mutex_unlock(&sctx->sock_lock); +} + +static const char *get_stratum_session_id(json_t *val) +{ + json_t *arr_val; + int i, n; + + arr_val = json_array_get(val, 0); + if (!arr_val || !json_is_array(arr_val)) + return NULL; + n = json_array_size(arr_val); + for (i = 0; i < n; i++) { + const char *notify; + json_t *arr = json_array_get(arr_val, i); + + if (!arr || !json_is_array(arr)) + break; + notify = json_string_value(json_array_get(arr, 0)); + if (!notify) + continue; + if (!strcasecmp(notify, "mining.notify")) + return json_string_value(json_array_get(arr, 1)); + } + return NULL; +} + +bool stratum_subscribe(struct stratum_ctx *sctx) +{ + char *s, *sret = NULL; + const char *sid, *xnonce1; + int xn2_size; + json_t *val = NULL, *res_val, *err_val; + json_error_t err; + bool ret = false, retry = false; + +start: + s = (char*)malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0)); + if (retry) + sprintf(s, "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": []}"); + else if (sctx->session_id) + sprintf(s, "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": [\"" USER_AGENT "\", \"%s\"]}", sctx->session_id); + else + sprintf(s, "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": [\"" USER_AGENT "\"]}"); + + if (!stratum_send_line(sctx, s)) + goto out; + + if (!socket_full(sctx->sock, 30)) { + applog(LOG_ERR, "stratum_subscribe timed out"); + goto out; + } + + sret = stratum_recv_line(sctx); + if (!sret) + goto out; + + val = JSON_LOADS(sret, &err); + free(sret); + if (!val) { + applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text); + goto out; + } + + res_val = json_object_get(val, "result"); + err_val = json_object_get(val, "error"); + + if (!res_val || json_is_null(res_val) || + (err_val && !json_is_null(err_val))) { + if (opt_debug || retry) { + free(s); + if (err_val) + s = json_dumps(err_val, JSON_INDENT(3)); + else + s = strdup("(unknown reason)"); + applog(LOG_ERR, "JSON-RPC call failed: %s", s); + } + goto out; + } + + sid = get_stratum_session_id(res_val); + if (opt_debug && !sid) + applog(LOG_DEBUG, "Failed to get Stratum session id"); + xnonce1 = json_string_value(json_array_get(res_val, 1)); + if (!xnonce1) { + applog(LOG_ERR, "Failed to get extranonce1"); + goto out; + } + xn2_size = json_integer_value(json_array_get(res_val, 2)); + if (!xn2_size) { + applog(LOG_ERR, "Failed to get extranonce2_size"); + goto out; + } + + pthread_mutex_lock(&sctx->work_lock); + free(sctx->session_id); + free(sctx->xnonce1); + sctx->session_id = sid ? strdup(sid) : NULL; + sctx->xnonce1_size = strlen(xnonce1) / 2; + sctx->xnonce1 = (unsigned char*)malloc(sctx->xnonce1_size); + hex2bin(sctx->xnonce1, xnonce1, sctx->xnonce1_size); + sctx->xnonce2_size = xn2_size; + sctx->next_diff = 1.0; + pthread_mutex_unlock(&sctx->work_lock); + + if (opt_debug && sid) + applog(LOG_DEBUG, "Stratum session id: %s", sctx->session_id); + + ret = true; + +out: + free(s); + if (val) + json_decref(val); + + if (!ret) { + if (sret && !retry) { + retry = true; + goto start; + } + } + + return ret; +} + +bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass) +{ + json_t *val = NULL, *res_val, *err_val; + char *s, *sret; + json_error_t err; + bool ret = false; + + s = (char*)malloc(80 + strlen(user) + strlen(pass)); + sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}", + user, pass); + + if (!stratum_send_line(sctx, s)) + goto out; + + while (1) { + sret = stratum_recv_line(sctx); + if (!sret) + goto out; + if (!stratum_handle_method(sctx, sret)) + break; + free(sret); + } + + val = JSON_LOADS(sret, &err); + free(sret); + if (!val) { + applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text); + goto out; + } + + res_val = json_object_get(val, "result"); + err_val = json_object_get(val, "error"); + + if (!res_val || json_is_false(res_val) || + (err_val && !json_is_null(err_val))) { + applog(LOG_ERR, "Stratum authentication failed"); + goto out; + } + + ret = true; + +out: + free(s); + if (val) + json_decref(val); + + return ret; +} + +static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) +{ + const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *ntime, *nreward; + size_t coinb1_size, coinb2_size; + bool clean, ret = false; + int merkle_count, i; + json_t *merkle_arr; + unsigned char **merkle; + + job_id = json_string_value(json_array_get(params, 0)); + prevhash = json_string_value(json_array_get(params, 1)); + coinb1 = json_string_value(json_array_get(params, 2)); + coinb2 = json_string_value(json_array_get(params, 3)); + merkle_arr = json_array_get(params, 4); + if (!merkle_arr || !json_is_array(merkle_arr)) + goto out; + merkle_count = json_array_size(merkle_arr); + version = json_string_value(json_array_get(params, 5)); + nbits = json_string_value(json_array_get(params, 6)); + ntime = json_string_value(json_array_get(params, 7)); + clean = json_is_true(json_array_get(params, 8)); + nreward = json_string_value(json_array_get(params, 9)); + + if (!job_id || !prevhash || !coinb1 || !coinb2 || !version || !nbits || !ntime || + strlen(prevhash) != 64 || strlen(version) != 8 || + strlen(nbits) != 8 || strlen(ntime) != 8 || strlen(nreward) != 4) { + applog(LOG_ERR, "Stratum notify: invalid parameters"); + goto out; + } + merkle = (unsigned char**)malloc(merkle_count * sizeof(char *)); + for (i = 0; i < merkle_count; i++) { + const char *s = json_string_value(json_array_get(merkle_arr, i)); + if (!s || strlen(s) != 64) { + while (i--) + free(merkle[i]); + free(merkle); + applog(LOG_ERR, "Stratum notify: invalid Merkle branch"); + goto out; + } + merkle[i] = (unsigned char*)malloc(32); + hex2bin(merkle[i], s, 32); + } + + pthread_mutex_lock(&sctx->work_lock); + + coinb1_size = strlen(coinb1) / 2; + coinb2_size = strlen(coinb2) / 2; + sctx->job.coinbase_size = coinb1_size + sctx->xnonce1_size + + sctx->xnonce2_size + coinb2_size; + sctx->job.coinbase = (unsigned char*)realloc(sctx->job.coinbase, sctx->job.coinbase_size); + sctx->job.xnonce2 = sctx->job.coinbase + coinb1_size + sctx->xnonce1_size; + hex2bin(sctx->job.coinbase, coinb1, coinb1_size); + memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size); + if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id)) + memset(sctx->job.xnonce2, 0, sctx->xnonce2_size); + hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size); + + free(sctx->job.job_id); + sctx->job.job_id = strdup(job_id); + hex2bin(sctx->job.prevhash, prevhash, 32); + + for (i = 0; i < sctx->job.merkle_count; i++) + free(sctx->job.merkle[i]); + free(sctx->job.merkle); + sctx->job.merkle = merkle; + sctx->job.merkle_count = merkle_count; + + hex2bin(sctx->job.version, version, 4); + hex2bin(sctx->job.nbits, nbits, 4); + hex2bin(sctx->job.ntime, ntime, 4); + hex2bin(sctx->job.nreward, nreward, 2); + sctx->job.clean = clean; + + sctx->job.diff = sctx->next_diff; + + pthread_mutex_unlock(&sctx->work_lock); + + ret = true; + +out: + return ret; +} + +static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params) +{ + double diff; + + diff = json_number_value(json_array_get(params, 0)); + if (diff == 0) + return false; + + pthread_mutex_lock(&sctx->work_lock); + sctx->next_diff = diff; + pthread_mutex_unlock(&sctx->work_lock); + + if (opt_debug) + applog(LOG_DEBUG, "Stratum difficulty set to %g", diff); + + return true; +} + +static bool stratum_reconnect(struct stratum_ctx *sctx, json_t *params) +{ + json_t *port_val; + const char *host; + int port; + + host = json_string_value(json_array_get(params, 0)); + port_val = json_array_get(params, 1); + if (json_is_string(port_val)) + port = atoi(json_string_value(port_val)); + else + port = json_integer_value(port_val); + if (!host || !port) + return false; + + free(sctx->url); + sctx->url = (char*)malloc(32 + strlen(host)); + sprintf(sctx->url, "stratum+tcp://%s:%d", host, port); + + applog(LOG_NOTICE, "Server requested reconnection to %s", sctx->url); + + stratum_disconnect(sctx); + + return true; +} + +static bool stratum_get_version(struct stratum_ctx *sctx, json_t *id) +{ + char *s; + json_t *val; + bool ret; + + if (!id || json_is_null(id)) + return false; + + val = json_object(); + json_object_set(val, "id", id); + json_object_set_new(val, "error", json_null()); + json_object_set_new(val, "result", json_string(USER_AGENT)); + s = json_dumps(val, 0); + ret = stratum_send_line(sctx, s); + json_decref(val); + free(s); + + return ret; +} + +static bool stratum_show_message(struct stratum_ctx *sctx, json_t *id, json_t *params) +{ + char *s; + json_t *val; + bool ret; + + val = json_array_get(params, 0); + if (val) + applog(LOG_NOTICE, "MESSAGE FROM SERVER: %s", json_string_value(val)); + + if (!id || json_is_null(id)) + return true; + + val = json_object(); + json_object_set(val, "id", id); + json_object_set_new(val, "error", json_null()); + json_object_set_new(val, "result", json_true()); + s = json_dumps(val, 0); + ret = stratum_send_line(sctx, s); + json_decref(val); + free(s); + + return ret; +} + +bool stratum_handle_method(struct stratum_ctx *sctx, const char *s) +{ + json_t *val, *id, *params; + json_error_t err; + const char *method; + bool ret = false; + + val = JSON_LOADS(s, &err); + if (!val) { + applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text); + goto out; + } + + method = json_string_value(json_object_get(val, "method")); + if (!method) + goto out; + id = json_object_get(val, "id"); + params = json_object_get(val, "params"); + + if (!strcasecmp(method, "mining.notify")) { + ret = stratum_notify(sctx, params); + goto out; + } + if (!strcasecmp(method, "mining.set_difficulty")) { + ret = stratum_set_difficulty(sctx, params); + goto out; + } + if (!strcasecmp(method, "client.reconnect")) { + ret = stratum_reconnect(sctx, params); + goto out; + } + if (!strcasecmp(method, "client.get_version")) { + ret = stratum_get_version(sctx, id); + goto out; + } + if (!strcasecmp(method, "client.show_message")) { + ret = stratum_show_message(sctx, id, params); + goto out; + } + +out: + if (val) + json_decref(val); + + return ret; +} + +struct thread_q *tq_new(void) +{ + struct thread_q *tq; + + tq = (struct thread_q *)calloc(1, sizeof(*tq)); + if (!tq) + return NULL; + + INIT_LIST_HEAD(&tq->q); + pthread_mutex_init(&tq->mutex, NULL); + pthread_cond_init(&tq->cond, NULL); + + return tq; +} + +void tq_free(struct thread_q *tq) +{ + struct tq_ent *ent, *iter; + + if (!tq) + return; + + list_for_each_entry_safe(ent, iter, &tq->q, q_node, struct tq_ent, struct tq_ent) { + list_del(&ent->q_node); + free(ent); + } + + pthread_cond_destroy(&tq->cond); + pthread_mutex_destroy(&tq->mutex); + + memset(tq, 0, sizeof(*tq)); /* poison */ + free(tq); +} + +static void tq_freezethaw(struct thread_q *tq, bool frozen) +{ + pthread_mutex_lock(&tq->mutex); + + tq->frozen = frozen; + + pthread_cond_signal(&tq->cond); + pthread_mutex_unlock(&tq->mutex); +} + +void tq_freeze(struct thread_q *tq) +{ + tq_freezethaw(tq, true); +} + +void tq_thaw(struct thread_q *tq) +{ + tq_freezethaw(tq, false); +} + +bool tq_push(struct thread_q *tq, void *data) +{ + struct tq_ent *ent; + bool rc = true; + + ent = (struct tq_ent *)calloc(1, sizeof(*ent)); + if (!ent) + return false; + + ent->data = data; + INIT_LIST_HEAD(&ent->q_node); + + pthread_mutex_lock(&tq->mutex); + + if (!tq->frozen) { + list_add_tail(&ent->q_node, &tq->q); + } else { + free(ent); + rc = false; + } + + pthread_cond_signal(&tq->cond); + pthread_mutex_unlock(&tq->mutex); + + return rc; +} + +void *tq_pop(struct thread_q *tq, const struct timespec *abstime) +{ + struct tq_ent *ent; + void *rval = NULL; + int rc; + + pthread_mutex_lock(&tq->mutex); + + if (!list_empty(&tq->q)) + goto pop; + + if (abstime) + rc = pthread_cond_timedwait(&tq->cond, &tq->mutex, abstime); + else + rc = pthread_cond_wait(&tq->cond, &tq->mutex); + if (rc) + goto out; + if (list_empty(&tq->q)) + goto out; + +pop: + ent = list_entry(tq->q.next, struct tq_ent, q_node); + rval = ent->data; + + list_del(&ent->q_node); + free(ent); + +out: + pthread_mutex_unlock(&tq->mutex); + return rval; +}