diff --git a/README.rst b/README.rst index a21895f952..916f4f39bb 100644 --- a/README.rst +++ b/README.rst @@ -9,13 +9,92 @@ Welcome to uszipcode Documentation ================================== -``uszipcode`` is the most powerful and easy to use zipcode information searchengine in Python. Besides geometry data (also boundary info), several useful census data points are also served: `population`, `population density`, `total wage`, `average annual wage`, `house of units`, `land area`, `water area`. The geometry and geocoding data I am using is from google map API on Oct 2015. To know more about the data, `click here `_. `Another pupolar zipcode Python extension `_ has lat, lng accuracy issue, which doesn't give me reliable results of searching by coordinate and radius. +``uszipcode`` is the **most powerful and easy to use programmable zipcode database, and also a searchengine** in Python. Besides geometry data (also boundary info), several useful census data points are also served: `population`, `population density`, `total wage`, `average annual wage`, `house of units`, `land area`, `water area`. The geometry and geocoding data I am using is from google map API on Mar 2016. `To know more about the data, click here `_. Another `popular zipcode Python extension `_ has lat, lng accuracy issue, which doesn't give me reliable results of searching by coordinate and radius. **Highlight**: -1. `Rich methods `_ are provided for getting zipcode anyway you want. -2. `Fuzzy city name and state name `_ allows you to search **WITHOUT using exactly accurate input**. **This is very helpful if you need to build a web app with it**. -3. You can easily `sort your results `_ by `population`, `area`, `wealthy` and etc... +- `Rich information `_ of zipcode is available. + +.. code-block:: python + + >>> from uszipcode import ZipcodeSearchEngine + >>> search = ZipcodeSearchEngine() + >>> zipcode = search.by_zipcode("10001") + >>> print(zipcode) + { + "City": "New York", + "Density": 34035.48387096774, + "HouseOfUnits": 12476, + "LandArea": 0.62, + "Latitude": 40.75368539999999, + "Longitude": -73.9991637, + "NEBoundLatitude": 40.8282129, + "NEBoundLongitude": -73.9321059, + "Population": 21102, + "SWBoundLatitude": 40.743451, + "SWBoungLongitude": -74.00794499999998, + "State": "NY", + "TotalWages": 1031960117.0, + "WaterArea": 0.0, + "Wealthy": 48903.42702113544, + "Zipcode": "10001", + "ZipcodeType": "Standard" + } + +- `Rich search methods `_ are provided for getting zipcode in the way you want. + +.. code-block:: python + + # Search zipcode within 30 miles, ordered from closest to farthest + >>> res = search.by_coordinate(39.122229, -77.133578, radius=30, returns=5) + >>> len(res) # by default 5 results returned + 5 + >>> for zipcode in res: + ... # do whatever you want... + + # Find top 10 population zipcode + >>> res = search.by_population(lower=0, upper=999999999, + ... sort_by="Population", ascending=False, returns=10) + + # Find top 10 largest land area zipcode + >>> res = search.by_landarea(lower=0, upper=999999999, + ... sort_by="LandArea", ascending=False, returns=10) + + # Find top 10 most wealthy zipcode in new york + >>> res = search.find(city="newyork", wealthy_lower=100000, + ... sort_by="Wealthy", returns=10) # at least $100,000 annual income + +- `Fuzzy city name and state name search `_ **enables case, space insensitive, typo tolerant input**. **You don't have to know the correct spelling of the city or state**. This is very helpful if you need to build a web app with it. + +.. code-block:: python + + # Looking for Chicago and IL, but entered wrong spelling. + >>> res = search.by_city_and_state("cicago", "il") + >>> len(res) # 56 zipcodes in Chicago + 56 + >>> zipcode = res[0] + >>> zipcode.City + 'Chicago' + >>> zipcode.State + 'IL' + +- You can easily `sort your results `_ by `population`, `area`, `wealthy` and etc... + +.. code-block:: python + + # Find top 10 population zipcode + >>> res = search.by_population(lower=0, upper=999999999, + ... sort_by="Population", ascending=False, returns=10) + >>> for zipcode in res: + ... # do whatever you want... + +- Easy export to csv. Result set can be easily export to csv. + +.. code-block:: python + + # Find all zipcode in new york + >>> res = search.by_city(city="New York", returns=0) + >>> search.export_to_csv(res, "result.csv") **Quick Links** diff --git a/create_doctree.py b/create_doctree.py index 3594923d0b..4c52307f4e 100644 --- a/create_doctree.py +++ b/create_doctree.py @@ -1,23 +1,22 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import print_function -from docfly import Docfly -import os, shutil +import docfly +# Uncomment this if you follow Sanhe's Sphinx Doc Style Guide +#--- Manually Made Doc --- +# doc = docfly.DocTree("source") +# doc.fly(table_of_content_header="Table of Content (目录)") + +#--- Api Reference Doc --- package_name = "uszipcode" -try: - shutil.rmtree(os.path.join("source", package_name)) -except Exception as e: - print(e) - -docfly = Docfly( - package_name, +doc = docfly.ApiReferenceDoc( + package_name, dst="source", ignore=[ - "%s.zzz_manual_install.py" % package_name, "%s.packages" % package_name, + "%s.zzz_manual_install.py" % package_name, ] ) -docfly.fly() \ No newline at end of file +doc.fly() \ No newline at end of file diff --git a/source/index.rst b/source/index.rst index 3db2728585..6a770dcf61 100644 --- a/source/index.rst +++ b/source/index.rst @@ -37,7 +37,7 @@ Start the search engine, do some basic search:: False -Context manager works too (to keep connection safe, RECOMMENDED):: +Context manager works too (automatically disconnect database. RECOMMENDED):: >>> with ZipcodeSearchEngine() as search: ... zipcode = search.by_zipcode(10030) @@ -62,13 +62,13 @@ Context manager works too (to keep connection safe, RECOMMENDED):: "ZipcodeType": "Standard" } -For all available zipcode attributes, :class:`click here `. - There are two method you may need: -1. You can use ``to_json()`` method to return json encoded string. -2. You can use ``to_dict()`` method to return dictionary data. - +- You can use :meth:`~Zipcode.to_json()` method to return json encoded string. +- You can use :meth:`~Zipcode.to_dict()` method to return dictionary data. +- You can use :meth:`~Zipcode.to_OrderedDict()` method to return ordered dictionary data. +- You can use :meth:`~Zipcode.keys()` method to return available attribute list. +- You can use :meth:`~Zipcode.values()` method to return attributes' values. .. _search_way: @@ -88,6 +88,7 @@ Here's the list of the ways you can search zipcode: - `by estimated total annual wage `_ - `by estimated average total annual wage `_ - `by estimated house of units `_ +- `advance search search `_ You also should know `this trick `_ to sort your results. @@ -115,12 +116,11 @@ Short state name also works: .. code-block:: python >>> res = search.by_city_and_state("cicago", "il") # smartly guess what you are looking for - >>> len(res) + >>> len(res) # 56 zipcodes in Chicago 56 >>> zipcode = res[0] >>> zipcode.City 'Chicago' - >>> zipcode.State 'IL' @@ -280,35 +280,98 @@ You can search all zipcode by defining its total house of units lower bound or u .. code-block:: python >>> res = search.by_house(lower=20000) - >>> for zipcode in res: - ... # do whatever you want... -.. _keyword: +.. _find: -Sortby, Descending and Returns Keyword +Advance Search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``by_prefix``, ``by_population``, ``by_density``, ``by_totalwages``, ``by_wealthy``, ``by_house`` methods support ``sortby``, ``descending`` and ``returns`` keyword. +In addition, above methods can mix each other to implement very advance search: + +**Find most people-living zipcode in New York** + +.. code-block:: python + + res = search.find( + city="new york", + sort_by="Population", ascending=False, + ) + +**Find all zipcode in California that prefix is "999"** + +.. code-block:: python + + res = search.find( + state="califor", + prefix="95", + sort_by="HouseOfUnits", ascending=False, + returns=100, + ) + +**Find top 10 richest zipcode near Silicon Valley** -- ``sortby``: string, default ``"Zipcode"``,the order of attributes that query results been returned -- ``descending``: boolean, default False, is in descending order -- ``returns``: maxiumum number of zipcode can be returned, use 0 for unlimited +.. code-block:: python + + # Find top 10 richest zipcode near Silicon Valley + lat, lng = 37.391184, -122.082235 + radius = 100 + res = search.find( + lat=lat, + lng=lng, + radius=radius, + sort_by="Wealthy", ascending=False, + returns=10, + ) + +**Find zipcode that average personal annual income greater than $100,000 near Silicon Valley, order by distance** + +.. code-block:: python + + lat, lng = 37.391184, -122.082235 + radius = 100 + res = search.find( + lat=lat, + lng=lng, + radius=radius, + wealthy_lower=60000, + sort_by="Dist", + ascending=True, + returns=0, + ) + + +.. _sort: + +Sort result +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``by_city_and_state``, ``by_city``, ``by_state``, ``by_prefix``, ``by_population``, ``by_density``, ``by_totalwages``, ``by_wealthy``, ``by_house`` methods all support ``sort_by``, ``ascending`` keyword. -Here's an example to find the top 100 richest zipcode, sorted by average annual wage: +- ``sort_by``: attribute name(s), case insensitive. Accepts an attribute name or a list for a nested sort. By default ordered by ``Zipcode``. All valid attribute name is :class:`listed here ` +- ``ascending``: boolean or list, default ``True``, sort ascending vs. descending. Specify list for multiple sort orders .. code-block:: python - >>> res = search.by_wealthy(lower=100000, sortby="Wealthy", descending=True, returns=100) + # Search zipcode that average annual income per person greater than $100,000 + >>> res = search.by_wealthy(lower=100000, sort_by="Wealthy", ascending=True) >>> for zipcode in res: - ... # do whatever you want... + ... print(zipcode.Wealthy) # should be in ascending order + -.. include:: about.rst +.. _limit: +Restrict number of results to return +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Every search method support ``returns`` keyword to limit number of results to return. Zero is for unlimited. The default limit is 5. -Indices and tables -================== +Here's an example to find the top 10 most people zipcode, sorted by population: -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +.. code-block:: python + + # Find the top 10 population zipcode + >>> res = search.by_population(upper=999999999, sort_by="population", ascending=False, returns=10) + >>> len(res) + 10 + >>> for zipcode in res: + ... print(zipcode.Population) # should be in descending order +.. include:: about.rst \ No newline at end of file diff --git a/source/uszipcode/__init__.rst b/source/uszipcode/__init__.rst index b339cc9dcd..636226b6bc 100644 --- a/source/uszipcode/__init__.rst +++ b/source/uszipcode/__init__.rst @@ -2,13 +2,14 @@ uszipcode ========= .. automodule:: uszipcode - :members: + :members: -subpackage and modules ----------------------- +sub packages and modules +------------------------ .. toctree:: :maxdepth: 1 - data - searchengine \ No newline at end of file + data + searchengine + \ No newline at end of file diff --git a/source/uszipcode/data/__init__.rst b/source/uszipcode/data/__init__.rst index f6cc7bdbae..4f151e4ce5 100644 --- a/source/uszipcode/data/__init__.rst +++ b/source/uszipcode/data/__init__.rst @@ -2,11 +2,12 @@ data ==== .. automodule:: uszipcode.data - :members: + :members: -subpackage and modules ----------------------- +sub packages and modules +------------------------ .. toctree:: :maxdepth: 1 + \ No newline at end of file diff --git a/source/uszipcode/searchengine.rst b/source/uszipcode/searchengine.rst index 65e4b2c460..08a4817381 100644 --- a/source/uszipcode/searchengine.rst +++ b/source/uszipcode/searchengine.rst @@ -2,4 +2,4 @@ searchengine ============ .. automodule:: uszipcode.searchengine - :members: \ No newline at end of file + :members: \ No newline at end of file diff --git a/tests/test_all.py b/tests/test_all.py new file mode 100644 index 0000000000..7cbb6ab351 --- /dev/null +++ b/tests/test_all.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +elementary_path unittest. +""" + +if __name__ == "__main__": + import py + py.test.cmdline.main("--tb=native") # use native python trace back \ No newline at end of file diff --git a/tests/test_fuzzywuzzy.py b/tests/test_fuzzywuzzy.py index eb23236c10..bbf68f8429 100644 --- a/tests/test_fuzzywuzzy.py +++ b/tests/test_fuzzywuzzy.py @@ -13,6 +13,8 @@ def test_all(): assert res[2][0] == "a cow boy" +#--- Unittest --- if __name__ == "__main__": import py - py.test.cmdline.main("--tb=native -s") \ No newline at end of file + import os + py.test.cmdline.main("%s --tb=native -s" % os.path.basename(__file__)) \ No newline at end of file diff --git a/tests/test_haversine.py b/tests/test_haversine.py index 7c30892a97..1e13fc9177 100644 --- a/tests/test_haversine.py +++ b/tests/test_haversine.py @@ -13,6 +13,8 @@ def test_all(): assert abs(great_circle(lyon, paris, miles=True)/243.589575 - 1.0) <= delta +#--- Unittest --- if __name__ == "__main__": import py - py.test.cmdline.main("--tb=native -s") \ No newline at end of file + import os + py.test.cmdline.main("%s --tb=native -s" % os.path.basename(__file__)) \ No newline at end of file diff --git a/tests/test_magic_method.py b/tests/test_magic_method.py new file mode 100644 index 0000000000..2400471de0 --- /dev/null +++ b/tests/test_magic_method.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import pytest +from uszipcode.searchengine import Zipcode, ZipcodeSearchEngine + + +def test_result_to_csv(): + with ZipcodeSearchEngine() as search: + res = search.by_prefix("100") + search.export_to_csv(res, "result.csv") + + try: + os.remove("result.csv") + except: + pass + + +def test_all(): + with ZipcodeSearchEngine() as search: + res = search.all() + search.export_to_csv(res, "result.csv") + + try: + os.remove("result.csv") + except: + pass + +#--- Unittest --- +if __name__ == "__main__": + import py + import os + py.test.cmdline.main("%s --tb=native -s" % os.path.basename(__file__)) diff --git a/tests/test_searchengine.py b/tests/test_searchengine.py index d972ec8157..0ff4dd23e9 100644 --- a/tests/test_searchengine.py +++ b/tests/test_searchengine.py @@ -1,114 +1,349 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from uszipcode.searchengine import ZipcodeSearchEngine -from pprint import pprint as ppt - - -def test_ZipcodeSearchEngine(): - with ZipcodeSearchEngine() as search: - zipcode = search.by_zipcode(20876) - assert zipcode.Zipcode == "20876" - assert zipcode.City == "Germantown" - assert zipcode.State == "MD" - - res = search.by_coordinate(39.122229, -77.133578, radius=30) - zipcode = res[0] - assert zipcode.Zipcode == "20855" - assert zipcode.City == "Derwood" - assert zipcode.State == "MD" - - res = search.by_coordinate(39.122229, -77.133578, radius=100, returns=0) - assert len(res) == 3531 - - res = search.by_city_and_state("kersen", "kensas") - zipcode = res[0] - assert zipcode.Zipcode == "67561" - assert zipcode.City == "Nickerson" - assert zipcode.State == "KS" - - res = search.by_state("RI") - zipcode = res[0] - assert zipcode.Zipcode == "02804" - assert zipcode.City == "Ashaway" - assert zipcode.State == "RI" - - res = search.by_city("Vienna") - zipcode = res[0] - assert zipcode.Zipcode == "04360" - assert zipcode.City == "Vienna" - assert zipcode.State == "ME" - - res = search.by_prefix("208", - sortby="Zipcode", descending=True, returns=0) - assert len(res) == 34 - - res = search.by_pattern("100", - sortby="Zipcode", descending=True, returns=0) - assert len(res) == 97 - - res = search.by_population(lower=100000, - sortby="Population", descending=False, returns=0) - assert len(res) == 10 - - res = search.by_density(lower=10000, - sortby="Density", descending=False, returns=0) - assert len(res) == 631 - - res = search.by_landarea(lower=1000, - sortby="LandArea", descending=False, returns=0) - assert len(res) == 181 - - res = search.by_waterarea(lower=100, - sortby="WaterArea", descending=False, returns=0) - assert len(res) == 30 - - res = search.by_totalwages(lower=1000**3, - sortby="Population", descending=True, returns=0) - assert len(res) == 155 - - res = search.by_wealthy(lower=100000, - sortby="Population", descending=True, returns=0) - assert len(res) == 41 - - res = search.by_house(lower=20000, - sortby="Population", descending=True, returns=0) - assert len(res) == 741 - - -def test_edge_case(): - with ZipcodeSearchEngine() as search: - zipcode = search.by_zipcode(00000) - assert bool(zipcode) is False - - res = search.by_coordinate(39.122229, -77.133578, radius=0.01) - assert res == [] - - res = search.by_city_and_state("unknown", "MD") - assert res == [] - - res = search.by_prefix("00000") - assert res == [] - - res = search.by_pattern("00000") - assert res == [] - - res = search.by_population(upper=-1) - assert res == [] - - res = search.by_density(upper=-1) - assert res == [] - - res = search.by_totalwages(upper=-1) - assert res == [] - - res = search.by_wealthy(upper=-1) - assert res == [] - - res = search.by_house(upper=-1) - assert res == [] +import pytest +from uszipcode.searchengine import Zipcode, ZipcodeSearchEngine +from uszipcode.packages.haversine import great_circle +def is_all_ascending(array): + """Assert that this is a strictly asceding array. + """ + for i, j in zip(array[1:], array[:-1]): + if (i is not None) and (j is not None): + assert i - j >= 0 + +def is_all_descending(array): + """Assert that this is a strictly desceding array. + """ + for i, j in zip(array[1:], array[:-1]): + if (i is not None) and (j is not None): + assert i - j <= 0 + + +class TestZipcode(object): + def test_init(self): + z = Zipcode(Zipcode="10001") + assert z.Zipcode == "10001" + assert z.ZipcodeType is None + + def test_make(self): + z = Zipcode._make(["Zipcode", ], ["10001", ]) + assert z.Zipcode == "10001" + assert z.ZipcodeType is None + + def test_hash(self): + z1 = Zipcode(Zipcode="10001") + z2 = Zipcode(Zipcode="10001") + assert hash(z1) == hash(z2) + assert hash(Zipcode()) + + s = set([z1, z2]) + assert len(s) == 1 + + def test_compare(self): + z1 = Zipcode(Zipcode="10001") + z2 = Zipcode(Zipcode="10002") + z3 = Zipcode() + + assert z1 == z1 + assert z1 != z2 + assert z1 < z2 + assert z2 >= z1 + + with pytest.raises(ValueError): + z1 > z3 + with pytest.raises(ValueError): + z3 > z3 + + def test_iter(self): + z = Zipcode() + for i in list(z): + assert i is None + + for i, j in zip(z.keys(), Zipcode.__keys__): + assert i == j + + def test_output(self): + z = Zipcode(Zipcode="10001", ZipcodeType="Standard") + + +class TestZipcodeSearchEngine(object): + def test_sql_create_order_by(self): + with ZipcodeSearchEngine() as search: + sql = search._sql_create_order_by("Zipcode", True) + assert sql == "\n\tORDER BY Zipcode ASC" + + sql = search._sql_create_order_by( + ["latitude", "longitude"], [False, False]) + assert sql == "\n\tORDER BY Latitude DESC, Longitude DESC" + + sql = search._sql_create_order_by("Hello", True) + assert sql == "" + + def test_sql_create_limit(self): + with ZipcodeSearchEngine() as search: + sql = search._sql_create_limit(1) + assert sql == "\n\tLIMIT 1" + + sql = search._sql_create_limit(0) + assert sql == "" + + def test_sql_create_lower_upper(self): + with ZipcodeSearchEngine() as search: + with pytest.raises(ValueError): + sql = search._sql_create_lower_upper("Population", None, None) + with pytest.raises(ValueError): + sql = search._sql_create_lower_upper("Population", "SQL", "SQL") + + sql = search._sql_create_lower_upper("Population", 0, None) + assert sql == "Population >= 0" + + sql = search._sql_create_lower_upper("Population", None, 999999) + assert sql == "Population <= 999999" + + sql = search._sql_create_lower_upper("Population", 0, 999999) + assert sql == "Population >= 0 AND Population <= 999999" + + def test_search_by_zipcode(self): + with ZipcodeSearchEngine() as search: + for zipcode in [10001, "10001"]: + z = search.by_zipcode(zipcode) + assert z.Zipcode == "10001" + assert z.State == "NY" + assert z.City == "New York" + + z = search.by_zipcode(99999) + assert bool(z) is False + + def test_search_by_coordinate(self): + with ZipcodeSearchEngine() as search: + # 在马里兰选一个坐标, 返回1000条, 但实际上不到1000条 + lat, lng = 39.114407, -77.205758 + + # 返回的结果必须按照距离是从小到大的 + res1 = search.by_coordinate(lat, lng, ascending=True, returns=1000) + len(res1) < 1000 + dist_array = [great_circle((lat, lng), (z.Latitude, z.Longitude), miles=True) for z in res1] + is_all_ascending(dist_array) + + res2 = search.by_coordinate(lat, lng, ascending=False, returns=1000) + dist_array = [great_circle((lat, lng), (z.Latitude, z.Longitude), miles=True) for z in res2] + is_all_descending(dist_array) + + # 当returns = 0时, 返回所有符合条件的 + res3 = search.by_coordinate(lat, lng, returns=0) + assert len(res1) == len(res3) + + # 当没有符合条件的zipcode时, 返回空列表 + res3 = search.by_coordinate(lat, lng, radius=-1) + assert len(res3) == 0 + + def test_find_state(self): + with ZipcodeSearchEngine() as search: + assert search._find_state("mary", best_match=True) == ["MD", ] + + result = set(search._find_state("virgin", best_match=False)) + assert result == set(["VI", "WV", "VA"]) + + assert search._find_state("newyork", best_match=False) == ["NY", ] + + with pytest.raises(ValueError): + search._find_state("THIS IS NOT A STATE!", best_match=True) + + with pytest.raises(ValueError): + search._find_state("THIS IS NOT A STATE!", best_match=False) + + def test_find_city(self): + with ZipcodeSearchEngine() as search: + assert search._find_city("phonix", best_match=True) == [ + "Phoenix", ] + assert search._find_city("kerson", best_match=False) == [ + "Dickerson Run", "Dickerson", "Nickerson", "Emerson", "Everson" + ] + assert search._find_city("kersen", state="kensas", best_match=False) == [ + "Nickerson", ] + + def test_by_city_and_state(self): + with ZipcodeSearchEngine() as search: + # Arlington, VA + res = search.by_city_and_state(city="arlingten", state="virgnea") + for z in res: + z.City == "Arlington" + z.State == "VA" + assert len(res) == 5 + + # There's no city in VI + with pytest.raises(ValueError): + search.by_city_and_state(city="Arlington", state="vi") + + def test_by_city(self): + with ZipcodeSearchEngine() as search: + res = search.by_city("vienna") + s = set() + for z in res: + assert z.City == "Vienna" + s.add(z.State) + assert s == set(["ME", "MD", "VA"]) + + def test_by_state(self): + with ZipcodeSearchEngine() as search: + res = search.by_state("RI") + z = res[0] + assert z.Zipcode == "02804" + assert z.City == "Ashaway" + assert z.State == "RI" + + def test_by_prefix(self): + """Test sort_by, ascending keyword. + """ + with ZipcodeSearchEngine() as search: + prefix = "208" + sort_key = "Population" + res = search.by_prefix(prefix, + sort_by=sort_key, ascending=True, returns=0) + l = list() + for z in res: + assert z.Zipcode.startswith(prefix) # example prefix + l.append(z[sort_key]) + l_sorted = list(l) + l_sorted.sort() + assert l == l_sorted + + res = search.by_prefix("100", + sort_by=["Wealthy", ], ascending=[False, ]) + + def test_by_pattern(self): + with ZipcodeSearchEngine() as search: + res = search.by_pattern("100", returns=0) + assert len(res) == 97 + + def test_by_density(self): + with ZipcodeSearchEngine() as search: + res = search.by_density(lower=10000, + sort_by="Density", ascending=False, returns=0) + assert len(res) == 631 + + def test_by_landarea(self): + with ZipcodeSearchEngine() as search: + res = search.by_landarea(lower=1000, + sort_by="LandArea", ascending=False, returns=0) + assert len(res) == 181 + + def test_by_waterarea(self): + with ZipcodeSearchEngine() as search: + res = search.by_waterarea(lower=100, + sort_by="WaterArea", ascending=False, returns=0) + assert len(res) == 30 + + def test_by_totalwages(self): + with ZipcodeSearchEngine() as search: + res = search.by_totalwages(lower=1000**3, + sort_by="TotalWages", ascending=False, returns=0) + assert len(res) == 155 + + def test_by_wealthy(self): + with ZipcodeSearchEngine() as search: + res = search.by_wealthy(lower=100000, + sort_by="Wealthy", ascending=False, returns=0) + assert len(res) == 41 + + def test_by_house(self): + with ZipcodeSearchEngine() as search: + res = search.by_house(lower=20000, + sort_by="HouseOfUnits", ascending=False, returns=0) + assert len(res) == 741 + + def test_find(self): + with ZipcodeSearchEngine() as search: + # Find most people living zipcode in New York + res = search.find( + city="new york", + sort_by="Population", ascending=False, + ) + is_all_descending([z.Population for z in res]) + + # Find all zipcode in California that prefix is "999" + res = search.find( + state="califor", + prefix="95", + sort_by="HouseOfUnits", ascending=False, + returns=100, + ) + assert len(res) == 100 + for z in res: + assert z.State == "CA" + assert z.Zipcode.startswith("95") + is_all_descending([z.HouseOfUnits for z in res]) + + # Find top 10 richest zipcode near Silicon Valley + lat, lng = 37.391184, -122.082235 + radius = 100 + res = search.find( + lat=lat, + lng=lng, + radius=radius, + sort_by="Wealthy", ascending=False, + returns=10, + ) + assert len(res) == 10 + for z in res: + assert great_circle((lat, lng), (z.Latitude, z.Longitude)) <= radius + is_all_descending([z.Wealthy for z in res]) + + # Find zipcode that average personal annual income greater than + # 100000 near Silicon Valley, order by distance + lat, lng = 37.391184, -122.082235 + radius = 100 + res = search.find( + lat=lat, + lng=lng, + radius=radius, + wealthy_lower=60000, + sort_by=None, + returns=0, + ) + assert len(res) > 5 + for z in res: + assert z.Wealthy >= 60000 + is_all_ascending([ + great_circle((lat, lng), (z.Latitude, z.Longitude)) for z in res + ]) + + def test_edge_case(self): + with ZipcodeSearchEngine() as search: + zipcode = search.by_zipcode(00000) + assert bool(zipcode) is False + + res = search.by_coordinate(39.122229, -77.133578, radius=0.01) + assert res == [] + + res = search.by_city_and_state("unknown", "MD") + assert res == [] + + res = search.by_prefix("00000") + assert res == [] + + res = search.by_pattern("00000") + assert res == [] + + res = search.by_population(upper=-1) + assert res == [] + + res = search.by_density(upper=-1) + assert res == [] + + res = search.by_totalwages(upper=-1) + assert res == [] + + res = search.by_wealthy(upper=-1) + assert res == [] + + res = search.by_house(upper=-1) + assert res == [] + +#--- Unittest --- if __name__ == "__main__": import py - py.test.cmdline.main("--tb=native -s") \ No newline at end of file + import os + py.test.cmdline.main("%s --tb=native -s" % os.path.basename(__file__)) diff --git a/uszipcode/__init__.py b/uszipcode/__init__.py index 6d30d3b5a4..5e7647cee4 100644 --- a/uszipcode/__init__.py +++ b/uszipcode/__init__.py @@ -1,9 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from .searchengine import ZipcodeSearchEngine +from __future__ import print_function +try: + from .searchengine import ZipcodeSearchEngine +except Exception as e: + print(e) + -__version__ = "0.1.1" +__version__ = "0.1.2" __short_description__ = ("USA zipcode programmable database, includes " "up-to-date census and geometry information.") __license__ = "MIT" diff --git a/uszipcode/packages/six.py b/uszipcode/packages/six.py new file mode 100644 index 0000000000..ffa3fe166a --- /dev/null +++ b/uszipcode/packages/six.py @@ -0,0 +1,838 @@ +"""Utilities for writing code that runs on Python 2 and 3""" + +# Copyright (c) 2010-2015 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.9.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), + MovedModule("winreg", "_winreg"), +] +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), +] +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return iter(d.iterkeys(**kw)) + + def itervalues(d, **kw): + return iter(d.itervalues(**kw)) + + def iteritems(d, **kw): + return iter(d.iteritems(**kw)) + + def iterlists(d, **kw): + return iter(d.iterlists(**kw)) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + def u(s): + return s + unichr = chr + if sys.version_info[1] <= 1: + def int2byte(i): + return bytes((i,)) + else: + # This is about 2x faster than the implementation above on 3.2+ + int2byte = operator.methodcaller("to_bytes", 1, "big") + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + _assertCountEqual = "assertCountEqual" + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + def byte2int(bs): + return ord(bs[0]) + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + + def reraise(tp, value, tb=None): + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + + exec_("""def reraise(tp, value, tb=None): + raise tp, value, tb +""") + + +if sys.version_info[:2] == (3, 2): + exec_("""def raise_from(value, from_value): + if from_value is None: + raise value + raise value from from_value +""") +elif sys.version_info[:2] > (3, 2): + exec_("""def raise_from(value, from_value): + raise value from from_value +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + def wrapper(f): + f = functools.wraps(wrapped, assigned, updated)(f) + f.__wrapped__ = wrapped + return f + return wrapper +else: + wraps = functools.wraps + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(meta): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def python_2_unicode_compatible(klass): + """ + A decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/uszipcode/searchengine.py b/uszipcode/searchengine.py index 4e15f9fb81..0c067b050b 100644 --- a/uszipcode/searchengine.py +++ b/uszipcode/searchengine.py @@ -4,32 +4,38 @@ import json import math import sqlite3 -from heapq import * +from heapq import heappush, heappop +from functools import total_ordering from collections import OrderedDict try: from .data import ( DB_FILE, STATE_ABBR_SHORT_TO_LONG, STATE_ABBR_LONG_TO_SHORT) from .packages.haversine import great_circle - from .packages.fuzzywuzzy.process import extractOne + from .packages.fuzzywuzzy.process import extract, extractOne + from .packages.six import integer_types, string_types except: from uszipcode.data import ( DB_FILE, STATE_ABBR_SHORT_TO_LONG, STATE_ABBR_LONG_TO_SHORT) from uszipcode.packages.haversine import great_circle - from uszipcode.packages.fuzzywuzzy.process import extractOne + from uszipcode.packages.fuzzywuzzy.process import extract, extractOne + from uszipcode.packages.six import integer_types, string_types +@total_ordering class Zipcode(object): - """Zipcode class. Attributes includes: + """Zipcode data container class. + + Attributes: - Zipcode: 5 digits string zipcode - ZipcodeType: Standard or Po Box - City: city full name - State: 2 letter short state name - Population: estimate population - - Density: estimate population per square miles (on land only) + - Density:estimate population per square miles (on land only) - TotalWages: estimate annual total wage - - Wealthy: estimate average annual wage + - Wealthy: estimate average annual wage = TotalWages/Population - HouseOfUnits: estimate number of house unit - LandArea: land area in square miles - WaterArea: marine area in square miles @@ -40,27 +46,82 @@ class Zipcode(object): - SWBoundLatitude: south west bound latitude - SWBoungLongitude: south west bound longitude - There are two method you may need: + Data typer converter methods: - - You can use :meth:`~Zipcode.to_json` method to return json encoded string. - - You can use :meth:`~Zipcode.to_dict` method to return dictionary data. - """ + - You can use :meth:`~Zipcode.to_json()` method to return json encoded string. + - You can use :meth:`~Zipcode.to_dict()` method to return dictionary data. + - You can use :meth:`~Zipcode.to_OrderedDict()` method to return ordered dictionary data. + - You can use :meth:`~Zipcode.keys()` method to return available attribute list. + - You can use :meth:`~Zipcode.values()` method to return attributes' values. - def __init__(self, keys, values): - for k, v in zip(keys, values): - object.__setattr__(self, k, v) - try: - self.Density = self.Population / self.LandArea - except: - self.Density = None - try: - self.Wealthy = self.TotalWages / self.Population - except: - self.Wealthy = None + It is hashable, sortable. So ``sort`` and ``set`` method is supported. + """ + __keys__ = [ + "Zipcode", + "ZipcodeType", + "City", + "State", + "Population", + "Density", + "TotalWages", + "Wealthy", + "HouseOfUnits", + "LandArea", + "WaterArea", + "Latitude", + "Longitude", + "NEBoundLatitude", + "NEBoundLongitude", + "SWBoundLatitude", + "SWBoungLongitude", + ] + + def __init__(self, + Zipcode=None, # 5 digits string zipcode + ZipcodeType=None, # Standard or Po Box + City=None, # city full name + State=None, # 2 letter short state name + Population=None, # estimate population + # estimate population per square miles (on land only) + Density=None, + TotalWages=None, # estimate annual total wage + Wealthy=None, # estimate average annual wage = TotalWages/Population + HouseOfUnits=None, # estimate number of house unit + LandArea=None, # land area in square miles + WaterArea=None, # marine area in square miles + Latitude=None, # latitude + Longitude=None, # longitude + NEBoundLatitude=None, # north east bound latitude + NEBoundLongitude=None, # north east bound longitude + SWBoundLatitude=None, # south west bound latitude + SWBoungLongitude=None, # south west bound longitude + *args, + **kwargs + ): + self.Zipcode = Zipcode + self.ZipcodeType = ZipcodeType + self.City = City + self.State = State + self.Population = Population + self.Density = Density + self.TotalWages = TotalWages + self.Wealthy = Wealthy + self.HouseOfUnits = HouseOfUnits + self.LandArea = LandArea + self.WaterArea = WaterArea + self.Latitude = Latitude + self.Longitude = Longitude + self.NEBoundLatitude = NEBoundLatitude + self.NEBoundLongitude = NEBoundLongitude + self.SWBoundLatitude = SWBoundLatitude + self.SWBoungLongitude = SWBoungLongitude + + @classmethod + def _make(cls, keys, values): + return cls(**dict(zip(keys, values))) def __str__(self): - return json.dumps(self.__dict__, - sort_keys=True, indent=4, separators=(",", ": ")) + return json.dumps(self.__dict__, sort_keys=True, indent=4) def __repr__(self): return json.dumps(self.__dict__, sort_keys=True) @@ -69,22 +130,76 @@ def __getitem__(self, key): return self.__dict__[key] def to_dict(self): + """To Python Dictionary. + """ return self.__dict__ + def to_OrderedDict(self): + """To Python OrderedDict. + """ + od = OrderedDict() + for key in Zipcode.__keys__: + od[key] = self.__dict__.get(key) + return od + + def __iter__(self): + return iter(self.values()) + + def keys(self): + """Return Zipcode's available attributes' name in list. + """ + return list(Zipcode.__keys__) + + def values(self): + """Return Zipcode's available attributes' value in list. + """ + values = list() + for key in Zipcode.__keys__: + values.append(self.__dict__.get(key)) + return values + + def items(self): + """Return Zipcode's available attributes' name value pair in list. + """ + items = list() + for key in Zipcode.__keys__: + items.append((key, self.__dict__.get(key))) + return items + def to_json(self): + """To json string. + """ return self.__str__() def __nonzero__(self): - if "Zipcode" in self.__dict__: - return True - else: - return False + """For Python2 bool() method. + """ + return self.Zipcode is not None def __bool__(self): - if "Zipcode" in self.__dict__: - return True + """For Python3 bool() method. + """ + return self.Zipcode is not None + + def __lt__(self, other): + """For > comparison operator. + """ + if (self.Zipcode is None) or (other.Zipcode is None): + raise ValueError( + "Empty Zipcode instance doesn't support comparison.") else: - return False + return self.Zipcode < other.Zipcode + + def __eq__(self, other): + """For == comparison operator. + """ + return self.Zipcode is other.Zipcode + + def __hash__(self): + """For hash() method + """ + return hash(self.__dict__["Zipcode"]) + _DEFAULT_LIMIT = 5 @@ -109,7 +224,8 @@ class ZipcodeSearchEngine(object): - :meth:`ZipcodeSearchEngine.by_wealthy` - :meth:`ZipcodeSearchEngine.by_house` """ - _standard_only_param = "AND ZipcodeType = 'Standard'" + DEFAULT_SQL = "SELECT * FROM zipcode" + DEFAULT_LIMIT = 5 def __init__(self): self.connect = sqlite3.connect(DB_FILE) @@ -118,6 +234,8 @@ def __init__(self): self.all_column = [record[1] for record in self.cursor.execute( "PRAGMA table_info(zipcode)")] + self.all_column_lowercase = [column.lower() + for column in self.all_column] self.all_state_short = [key for key in STATE_ABBR_SHORT_TO_LONG] self.all_state_long = [value for value in STATE_ABBR_LONG_TO_SHORT] @@ -129,57 +247,237 @@ def __exit__(self, *exc_info): def close(self): """Closs engine. + + **中文文档** + + 断开与数据库的连接。 """ self.connect.close() - def get_sortby_sql(self, sortby, descending): - """Construct an ORDER BY SQL. + def _find_column(self, name): + """Find matching column name. If failed, return None. + + **中文文档** + + 找到与之匹配的正确的column name。 """ - if sortby in self.all_column: - if descending: - sortby_sql = " ORDER BY %s DESC" % sortby - else: - sortby_sql = " ORDER BY %s ASC" % sortby + name = name.lower().strip() + for column, column_lower in zip(self.all_column, self.all_column_lowercase): + if name == column_lower: + return column + return None + + #--- SQL Helper --- + def _sql_create_order_by(self, sort_by, ascending): + """Construct an 'ORDER BY' sql clause. + + :param sort_by: str or list of str, the column you want to sort + :param descending, bool or list of bool + + **中文文档** + + 创建sql中的ORDER BY的部分。 + """ + if sort_by is None: + return "" + + chunks = list() + if isinstance(sort_by, string_types) and isinstance(ascending, bool): + column = self._find_column(sort_by) + if column is not None: + if ascending: + chunks.append("%s ASC" % column) + else: + chunks.append("%s DESC" % column) + elif isinstance(sort_by, (tuple, list)) and isinstance(ascending, (tuple, list)) and (len(sort_by) == len(ascending)): + for column, order in zip(sort_by, ascending): + column = self._find_column(column) + if column is not None: + if order: + chunks.append("%s ASC" % column) + else: + chunks.append("%s DESC" % column) + else: + raise ValueError("invalid 'sort_by', 'descending' input.") + + if len(chunks): + return "\n\tORDER BY %s" % ", ".join(chunks) else: - sortby_sql = "" - return sortby_sql + return "" + + def _sql_create_limit(self, returns): + """Construct an 'LIMIT' sql clause. + + :param returns: int - def get_limit_sql(self, returns): - """Construct an LIMIT XXX SQL. + **中文文档** + + 创建sql中的LIMIT的部分。 """ - if not isinstance(returns, int): - raise TypeError("returns argument has to be an integer.") + if not isinstance(returns, integer_types): + raise TypeError("'returns' argument has to be an integer.") if returns >= 1: - limit_sql = "LIMIT %s" % returns + return "\n\tLIMIT %s" % returns else: - limit_sql = "" + return "" + + def _sql_create_lower_upper(self, column, lower, upper): + """Return >= and <= sql part. - return limit_sql + **中文文档** + + 返回SQL中用于比较值的部分。 + """ + if (lower is None) and (upper is None): + raise ValueError("'lower' and 'upper' cannot both be None!") + + sql_chunks = list() + + if isinstance(lower, (integer_types, float)): + sql_chunks.append("%s >= %s" % (column, lower)) + elif lower is None: + pass + else: + raise ValueError("'lower' and 'upper' has to be number or None!") + if (upper is not None) and isinstance(upper, (integer_types, float)): + sql_chunks.append("%s <= %s" % (column, upper)) + elif upper is None: + pass + else: + raise ValueError("'lower' and 'upper' has to be number or None!") + + return " AND ".join(sql_chunks) + + def _sql_modify_order_by(self, sql, sort_by, ascending): + return sql + self._sql_create_order_by(sort_by, ascending) + + def _sql_modify_limit(self, sql, returns): + return sql + self._sql_create_limit(returns) + + def _sql_modify_standard_only(self, sql, standard_only): + if standard_only: + if "WHERE" in sql: + return sql.replace("WHERE", "WHERE ZipcodeType = 'Standard' AND") + else: + return sql + "\n\tWHERE ZipcodeType = 'Standard'" + else: + return sql + + def export_to_csv(self, res, abspath): + """Write result to csv file. + + **中文文档** + + 将查询到的Zipcode结果写入csv文件。 + """ + import csv + + with open(abspath, "w") as csvfile: + writer = csv.DictWriter(csvfile, + delimiter=',', lineterminator="\n", + fieldnames=Zipcode.__keys__, + ) + writer.writeheader() + for z in res: + writer.writerow(z.to_dict()) + + #--- Search MetaData --- + def _find_state(self, state, best_match=True): + """Fuzzy search correct state. + + :param multiple: bool, when False, only one state will return. + otherwise, will return all matching states. + """ + result = list() + + # check if it is a abbreviate name + if state.upper() in self.all_state_short: + result.append(state.upper()) + # if not, find out what is the state that user looking for + else: + if best_match: + choice, confidence = extractOne( + state.lower(), self.all_state_long) + if confidence >= 70: + result.append(STATE_ABBR_LONG_TO_SHORT[choice]) + else: + for choice, confidence in extract(state.lower(), self.all_state_long): + if confidence >= 70: + result.append(STATE_ABBR_LONG_TO_SHORT[choice]) + + if len(result) == 0: + message = ("'%s' is not a valid state name, use 2 letter " + "short name or correct full name please.") + raise ValueError(message % state) + + return result + + def _find_city(self, city, state=None, best_match=True): + """Fuzzy search correct city. + + **中文文档** + + 如果给定了state, 则只在state里的城市中寻找, 否则, 在全国所有的城市中 + 寻找。 + """ + # find out what is the city that user looking for + if state: + state = self._find_state(state, best_match=True)[0] + select_sql = "SELECT DISTINCT City FROM zipcode WHERE State == '%s'" % state + else: + select_sql = "SELECT DISTINCT City FROM zipcode" + + all_city = [row[0] for row in self.cursor.execute(select_sql)] + if len(all_city) == 0: + raise ValueError("No city is available in state('%s')" % state) + + result = list() + + if best_match: + choice, confidence = extractOne(city.lower(), all_city) + if confidence >= 70: + result.append(choice) + else: + for choice, confidence in extract(city.lower(), all_city): + if confidence >= 70: + result.append(choice) + + if len(result) == 0: + raise ValueError("'%s' is not a valid city name" % city) + + return result + + #--- Search --- def by_zipcode(self, zipcode, standard_only=True): """Search zipcode information. :param zipcode: integer or string zipcode, no zero pad needed - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode + + **中文文档** + + 查询某一个Zipcode的具体信息。 """ # convert zipcode to 5 digits string zipcode = ("%s" % zipcode).zfill(5) # execute query select_sql = "SELECT * FROM zipcode WHERE Zipcode = '%s'" % zipcode - if standard_only: - select_sql = select_sql + self._standard_only_param + select_sql = self._sql_modify_standard_only(select_sql, standard_only) - res = list(self.cursor.execute(select_sql)) - if len(res) > 0: - return Zipcode(self.all_column, list(res[0])) + res = self.cursor.execute(select_sql).fetchall() + if len(res) == 1: + return Zipcode(**res[0]) + elif len(res) == 0: + return Zipcode() else: - return Zipcode([], []) + raise Exception("by_zipcode can not return multiple zipcode!") - def by_coordinate(self, lat, lng, radius=20, standard_only=True, - returns=_DEFAULT_LIMIT): + def by_coordinate(self, lat, lng, radius=50.0, ascending=True, standard_only=True, + returns=DEFAULT_LIMIT): """Search zipcode information near a coordinate on a map. May return multiple results. @@ -187,59 +485,25 @@ def by_coordinate(self, lat, lng, radius=20, standard_only=True, :param lng: center lngitude :param radius: for the inside implementation only, search zipcode within #radius units of lat, lng - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode :param returns: returns at most how many results - """ - # define lat lng boundary - dist_btwn_lat_deg = 69.172 - dist_btwn_lon_deg = math.cos(lat) * 69.172 - lat_degr_rad = abs(radius * 1.0 / dist_btwn_lat_deg) - lon_degr_rad = abs(radius * 1.0 / dist_btwn_lon_deg) - lat_lower = lat - lat_degr_rad - lat_upper = lat + lat_degr_rad - lng_lower = lng - lon_degr_rad - lng_upper = lng + lon_degr_rad + **中文文档** - # execute query - select_sql = \ - """ - SELECT * FROM zipcode - WHERE - Latitude >= %s - AND Latitude <= %s - AND Longitude >= %s - AND Longitude <= %s - """ % (lat_lower, lat_upper, lng_lower, lng_upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - - # use heap sort find 5 closest zipcode - heap = list() - for row in self.cursor.execute(select_sql): - dist = great_circle( - (row["Latitude"], row["Longitude"]), (lat, lng)) - heappush(heap, [dist, ] + list(row)) - - # generate results - res = list() - if returns >= 1: - for i in range(returns): - try: - res.append( - Zipcode(self.all_column, heappop(heap)[1:]) - ) - except: - pass - elif returns == 0: - while heap: - res.append( - Zipcode(self.all_column, heappop(heap)[1:]) - ) - return res + 1. 计算出在中心坐标处, 每一经度和纬度分别代表多少miles。 + 2. 以给定坐标为中心, 画出一个矩形, 长宽分别为半径的1.2倍左右, 找到该 + 矩形内所有的Zipcode。 + 3. 对这些Zipcode计算出他们的距离, 然后按照距离远近排序。距离超过我们 + 限定的半径的直接丢弃。 + """ + return self.find(lat=lat, lng=lng, radius=radius, standard_only=standard_only, sort_by=None, ascending=ascending, returns=returns) - def by_city_and_state(self, city, state, standard_only=True): + def by_city_and_state(self, city, state, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by City and State name. You can use either short state name and long state name. My engine use @@ -247,358 +511,508 @@ def by_city_and_state(self, city, state, standard_only=True): :param city: city name. :param state: 2 letter short name or long name. - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - """ - # check if it is a abbreviate name - if state.upper() in self.all_state_short: - state = state.upper() - # if not, find out what is the state that user looking for - else: - choice, confidence = extractOne(state.lower(), self.all_state_long) - if confidence < 70: - raise Exception("'%s' is not a valid statename, use 2 letter " - "short name or correct full name please." % state) - state = STATE_ABBR_LONG_TO_SHORT[choice] - - # find out what is the city that user looking for - select_sql = "SELECT City FROM zipcode WHERE State == '%s'" % state - all_city = [record[0] for record in self.cursor.execute(select_sql)] - - choice, confidence = extractOne(city.lower(), all_city) - if confidence < 70: - raise Exception("Cannot found '%s' in '%s'." % (city, state)) - else: - city = choice - - # execute query - select_sql = \ - """ - SELECT * FROM zipcode - WHERE - City = '%s' - AND State = '%s' - """ % (city, state) - if standard_only: - select_sql = select_sql + self._standard_only_param - - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - - return res + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True + :param returns: int, default 5 - def by_city(self, city, standard_only=True): - """Search zipcode information by City and State name. + **中文文档** - My engine use fuzzy match and guess what is you want. + 根据城市和州, 模糊查询。 + """ + return self.find(city=city, + state=state, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_city(self, city, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): + """Search zipcode information by City name. + + My engine use fuzzy match and guess what is the city you want. :param city: city name. - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - """ - # find out what is the city that user looking for - select_sql = "SELECT City FROM zipcode WHERE City == '%s'" % city - all_city = [record[0] for record in self.cursor.execute(select_sql)] - - choice, confidence = extractOne(city.lower(), all_city) - if confidence < 70: - raise Exception("Cannot found '%s' in '%s'." % (city, state)) - else: - city = choice - - # execute query - select_sql = \ - """ - SELECT * FROM zipcode - WHERE City = '%s' - """ % (city,) - if standard_only: - select_sql = select_sql + self._standard_only_param - - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True + :param returns: int, default 5 - return res + **中文文档** - def by_state(self, state, standard_only=True): + 根据城市, 模糊查询。 + """ + return self.find(city=city, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_state(self, state, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by State name. You can use either short state name and long state name. My engine use - fuzzy match and guess what is you want. + fuzzy match and guess what is the state you want. :param state: 2 letter short name or long name. - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - """ - # check if it is a abbreviate name - if state.upper() in self.all_state_short: - state = state.upper() - # if not, find out what is the state that user looking for - else: - choice, confidence = extractOne(state.lower(), self.all_state_long) - if confidence < 70: - raise Exception("'%s' is not a valid statename, use 2 letter " - "short name or correct full name please." % state) - state = STATE_ABBR_LONG_TO_SHORT[choice] - - # execute query - select_sql = \ - """ - SELECT * FROM zipcode - WHERE State = '%s' - """ % (state,) - if standard_only: - select_sql = select_sql + self._standard_only_param - - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True + :param returns: int, default 5 - return res + **中文文档** - def by_prefix(self, prefix, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据州, 模糊查询。 + """ + return self.find(state=state, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_prefix(self, prefix, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by first N numbers. :param prefix: first N zipcode number - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - # exam input - if not isinstance(prefix, str): - raise TypeError("prefix has to be a string") - if not prefix.isdigit(): - raise ValueError("prefix has to be a 1-5 letter digits") - # execute query - select_sql = "SELECT * FROM zipcode WHERE Zipcode LIKE '%s%%' " % prefix - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) + **中文文档** - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - - return res - - def by_pattern(self, pattern, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): - """Search zipcode information by first N numbers. + 根据Zipcode的前面几个字符模糊查询。 + """ + # exam input + return self.find(prefix=prefix, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_pattern(self, pattern, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): + """Search zipcode by wildcard. :param prefix: first N zipcode number - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - # exam input - if not isinstance(pattern, str): - raise TypeError("prefix has to be a string") - if not pattern.isdigit(): - raise ValueError("prefix has to be a 1-5 letter digits") - # execute query - select_sql = "SELECT * FROM zipcode WHERE Zipcode LIKE '%%%s%%' " % pattern - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - - return res + **中文文档** - def by_population(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据Zipcode的中间的字符模糊查询。 + """ + return self.find(pattern=pattern, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_population(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by population range. :param lower: minimal population :param upper: maximum population - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE - Population >= %f AND Population <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - return res + **中文文档** - def by_density(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据人口的上下限查询。 + """ + return self.find(population_lower=lower, + population_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_density(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by population density range. - population density = population / per square miles + population density = population per square miles :param lower: minimal population :param upper: maximum population - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE Density >= %f AND Density <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - return res + **中文文档** - def by_landarea(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据每平方Mile的人口密度模糊查询。 + """ + return self.find(density_lower=lower, + density_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_landarea(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by landarea range. - :param lower: minimal landarea - :param upper: maximum landarea - :param standard_only: boolean, default True, only returns standard + :param lower: minimal landarea in sqrt miles + :param upper: maximum landarea in sqrt miles + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE LandArea >= %f AND LandArea <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - return res + **中文文档** - def by_waterarea(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): - """Search zipcode information by landarea range. - - :param lower: minimal waterarea - :param upper: maximum waterarea - :param standard_only: boolean, default True, only returns standard + 根据陆地面积模糊查询。 + """ + return self.find(landarea_lower=lower, + landarea_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_waterarea(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): + """Search zipcode information by waterarea range. + + :param lower: minimal waterarea in sqrt miles + :param upper: maximum waterarea in sqrt miles + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE WaterArea >= %f AND WaterArea <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - return res + **中文文档** - def by_totalwages(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据水域面积模糊查询。 + """ + return self.find(waterarea_lower=lower, + waterarea_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_totalwages(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by total annual wages. :param lower: minimal total annual wages :param upper: maximum total annual wages - :param standard_only: boolean, default True, only returns standard + :param standard_only: bool, default True, only returns standard type zipcode - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE TotalWages >= %f AND TotalWages <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - return res + **中文文档** - def by_wealthy(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据总年度工资收入模糊查询。 + """ + return self.find(totalwages_lower=lower, + totalwages_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_wealthy(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by average annual wage (AAW). AAW = total wage / population :param lower: minimal AAW :param upper: maximum AAW - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param standard_only: bool, default True, only returns standard + type zipcode + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 - """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE - TotalWages / Population >= %f AND TotalWages / Population <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) - return res + **中文文档** - def by_house(self, lower=-1, upper=2**30, standard_only=True, - sortby="ZipCode", descending=False, returns=_DEFAULT_LIMIT): + 根据人均年收入模糊查询。 + """ + return self.find(wealthy_lower=lower, + wealthy_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def by_house(self, lower=-1, upper=2**30, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): """Search zipcode information by house of units. :param lower: minimal house of units :param upper: maximum house of units - :param sortby: string, default ``"Zipcode"`` - :param descending: boolean, default False + :param standard_only: bool, default True, only returns standard + type zipcode + :param sortby: str or list of str, default ``"Zipcode"`` + :param ascending: bool or list of bool, default True :param returns: int, default 5 + + **中文文档** + + 根据房屋数量, 包括Townhouse, Single House模糊查询。 """ - select_sql = \ - """ - SELECT * FROM zipcode WHERE - HouseOfUnits >= %f AND HouseOfUnits <= %f - """ % (lower, upper) - if standard_only: - select_sql = select_sql + self._standard_only_param - select_sql = select_sql + self.get_sortby_sql(sortby, descending) - select_sql = select_sql + self.get_limit_sql(returns) + return self.find(house_lower=lower, + house_upper=upper, + standard_only=standard_only, + sort_by=sort_by, + ascending=ascending, + returns=returns) + + def find(self, + lat=None, lng=None, radius=None, + city=None, state=None, + prefix=None, + pattern=None, + population_lower=None, population_upper=None, + density_lower=None, density_upper=None, + landarea_lower=None, landarea_upper=None, + waterarea_lower=None, waterarea_upper=None, + totalwages_lower=None, totalwages_upper=None, + wealthy_lower=None, wealthy_upper=None, + house_lower=None, house_upper=None, + standard_only=True, + sort_by="ZipCode", ascending=True, + returns=DEFAULT_LIMIT, + ): + """ + :params sort_by: can be attribute name or 'Dist'. + """ + where_chunks = list() + + #--- by_coordinate --- + if isinstance(lat, (integer_types, float)) and \ + isinstance(lat, (integer_types, float)) and \ + isinstance(radius, (integer_types, float)): + flag_by_coordinate = True + if radius <= 0: + return [] + + # define lat lng boundary + dist_btwn_lat_deg = 69.172 + dist_btwn_lon_deg = math.cos(lat) * 69.172 + lat_degr_rad = abs(radius * 1.0 / dist_btwn_lat_deg) + lon_degr_rad = abs(radius * 1.0 / dist_btwn_lon_deg) + + lat_lower = lat - lat_degr_rad + lat_upper = lat + lat_degr_rad + lng_lower = lng - lon_degr_rad + lng_upper = lng + lon_degr_rad + + where_chunks.append("Latitude >= %s" % lat_lower) + where_chunks.append("Latitude <= %s" % lat_upper) + where_chunks.append("Longitude >= %s" % lng_lower) + where_chunks.append("Longitude <= %s" % lng_upper) + + if (sort_by is None) or (sort_by == "Dist"): + flag_sort_by = False + else: + flag_sort_by = True + else: + flag_by_coordinate = False + + #--- by city or state --- + if (state is not None) and (city is not None): + state = self._find_state(state, best_match=True)[0] + city = self._find_city(city, state, best_match=True)[0] + where_chunks.append("State = '%s' AND City = '%s'" % (state, city)) + elif (state is not None) and (city is None): + state = self._find_state(state, best_match=True)[0] + where_chunks.append("State = '%s'" % state) + elif (state is None) and (city is not None): + city = self._find_city(city, None, best_match=True)[0] + where_chunks.append("City = '%s'" % city) + else: + pass + + #--- by prefix --- + if prefix is not None: + if not isinstance(prefix, string_types): + raise TypeError("prefix has to be a string") + if (not prefix.isdigit()) and (1 <= len(prefix) <= 5): + raise ValueError("prefix has to be a 1-5 letter digits") + where_chunks.append("Zipcode LIKE '%s%%'" % prefix) + + #--- by pattern --- + if pattern is not None: + if not isinstance(pattern, string_types): + raise TypeError("pattern has to be a string") + if (not pattern.isdigit()) and (1 <= len(pattern) <= 5): + raise ValueError("pattern has to be a 1-5 letter digits") + where_chunks.append("Zipcode LIKE '%%%s%%' " % pattern) + + #--- by population --- + try: + sql = self._sql_create_lower_upper( + "Population", population_lower, population_upper) + where_chunks.append(sql) + except ValueError: + pass + + #--- by density --- + try: + sql = self._sql_create_lower_upper( + "Density", density_lower, density_upper) + where_chunks.append(sql) + except ValueError: + pass + + #--- by land area --- + try: + sql = self._sql_create_lower_upper( + "LandArea", landarea_lower, landarea_upper) + where_chunks.append(sql) + except ValueError: + pass + + #--- by water area --- + try: + sql = self._sql_create_lower_upper( + "WaterArea", waterarea_lower, waterarea_upper) + where_chunks.append(sql) + except ValueError: + pass + + #--- by total wages --- + try: + sql = self._sql_create_lower_upper( + "TotalWages", totalwages_lower, totalwages_upper) + where_chunks.append(sql) + except ValueError: + pass + + #--- by wealthy --- + try: + sql = self._sql_create_lower_upper( + "Wealthy", wealthy_lower, wealthy_upper) + where_chunks.append(sql) + except ValueError: + pass + + #--- by house --- + try: + sql = self._sql_create_lower_upper( + "HouseOfUnits", house_lower, house_upper) + where_chunks.append(sql) + except ValueError: + pass + + select_sql = "SELECT * FROM zipcode \n\tWHERE %s" % " AND ".join( + where_chunks) + select_sql = self._sql_modify_standard_only(select_sql, standard_only) + select_sql = self._sql_modify_order_by(select_sql, sort_by, ascending) + + #--- solve coordinate and other search sort-by conflict --- + if flag_by_coordinate: + # has sort_by keyword, order by keyword + # 有sort_by关键字的情况下, 按关键字排序 + if flag_sort_by: + res = list() + for row in self.cursor.execute(select_sql): + dist = great_circle( + (row["Latitude"], row["Longitude"]), (lat, lng)) + if dist <= radius: + res.append(Zipcode(**row)) + if len(res) == returns: + return res + # no sort by keyword, then sort from cloest to farturest + # 没有sort_by关键字, 按距离远近排序 + else: + # use heap sort find top N closest zipcode + heap = list() + + for row in self.cursor.execute(select_sql): + dist = great_circle( + (row["Latitude"], row["Longitude"]), (lat, lng)) + if dist <= radius: + heappush(heap, (dist, row)) + + # generate results + res = list() + if returns >= 1: + try: + for i in range(returns): + res.append(Zipcode(**heappop(heap)[1])) + except IndexError: + pass + elif returns == 0: + while heap: + res.append(Zipcode(**heappop(heap)[1])) + + if ascending is False: # 按距离逆序输出 + res = res[::-1] + else: + select_sql = self._sql_modify_limit(select_sql, returns) + res = [Zipcode(**row) for row in self.cursor.execute(select_sql)] - res = list() - for row in self.cursor.execute(select_sql): - res.append(Zipcode(self.all_column, list(row))) + return res + + def all(self): + """Return all available zipcode data in this database. + + Warning! This may takes long. + + **中文文档** + + 返回所有Zipcode。 + """ + select_sql = "SELECT * FROM zipcode" + res = [Zipcode(**row) for row in self.cursor.execute(select_sql)] return res