-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
43 lines (36 loc) · 1.37 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib as plt
import re
data_url = "https://raw.githubusercontent.com/labrijisaad/exploratory-data-analysis-in-Python/main/airbnb.csv"
st.title("Airbnb Listings Dataset")
@st.cache
def clean_listings_data(url):
df = pd.read_csv(
url,
dtype= {
"listing_id": "string",
"name": "string",
"host_id": "string",
"host_id": "string",
"host_name": "string",
"neighborhood_full": "string",
"room_type": "category"
},
)
df = df.loc[:, "listing_id":"listing_added"]
df['last_review'] = pd.to_datetime(df['last_review'])
df['listing_added'] = pd.to_datetime(df['listing_added'])
df['room_type'] = df['room_type'].str.strip().str.lower().replace({"private":"private room"})
coords = df['coordinates'].str.split(",", expand=True).replace("[()]", "", regex=True)
df[["lat", "lon"]] = coords
df[["lat", "lon"]] = df[["lat", "lon"]].astype("float").fillna(0)
df['price'] = df['price'].str.replace("$", "", regex=False).fillna(0).astype('float')
return df
listings = clean_listings_data(data_url)
listings
st.write(listings["room_type"].value_counts())
st.subheader("Breakdown by Room Type")
st.bar_chart(listings["room_type"].value_counts())
st.write(np.histogram(listings['price']))