Mieten¶
Clean up rent data set
In [ ]:
import pandas as pd
# Load the dataset
rent_data = pd.read_csv("immo_data.csv")
rent_data = rent_data[rent_data["livingSpace"] > 5]
# Convert to string
rent_data["geo_plz"] = rent_data["geo_plz"].astype(str)
rent_data["geo_plz"] = rent_data["geo_plz"].str.zfill(5)
rent_data = rent_data[rent_data["geo_plz"].str.match(r'^\d{5}$')]
# Calculate price per sqm
rent_data["price_per_sqm"] = rent_data["totalRent"] / rent_data["livingSpace"]
# Remove outliers (too cheap or expensive)
rent_data = rent_data[(rent_data["price_per_sqm"] >= 3) & (rent_data["price_per_sqm"] <= 50)]
rent_data = rent_data.query("price_per_sqm" >= 3 & "price_per_sqm" <= 50)
# Show summary stats
print(rent_data.head())
print(rent_data.info())
print(rent_data.describe())
Mean sqm rent per PLZ¶
In [112]:
# Group by PLZ, taking the most common city and district
plz_avg_rent = rent_data.groupby("geo_plz").agg({
"price_per_sqm": "mean", # Average rent price per sqm
"regio2": lambda x: x.mode()[0] if not x.mode().empty else None, # Most common city
"regio3": lambda x: x.mode()[0] if not x.mode().empty else None # Most common district
}).reset_index()
plz_avg_rent['regio2'] = plz_avg_rent['regio2'].str.replace('_',' ')
plz_avg_rent['regio3'] = plz_avg_rent['regio3'].str.replace('_',' ')
# Show results
plz_avg_rent.head()
Out[112]:
geo_plz | price_per_sqm | regio2 | regio3 | |
---|---|---|---|---|
0 | 00852 | 6.044444 | Plauen | Dobenau |
1 | 00853 | 8.708040 | Plauen | Bahnhofsvorstadt |
2 | 01057 | 10.647677 | Dresden | Briesnitz |
3 | 01067 | 13.631661 | Dresden | Innere Altstadt |
4 | 01069 | 12.115053 | Dresden | Südvorstadt West |
Join with PLZ Geojson¶
In [113]:
import geopandas as gpd
germany_plz_geo = gpd.read_file("plz.geojson")
germany_plz_geo.head()
print(germany_plz_geo.crs)
EPSG:4326
In [114]:
germany_plz_map = germany_plz_geo.merge(plz_avg_rent, left_on="plz", right_on="geo_plz", how="left")
print(germany_plz_map.columns)
print("============")
print("============")
print(germany_plz_map['price_per_sqm'].max())
print(germany_plz_map['price_per_sqm'].min())
print(germany_plz_map.crs)
Index(['plz', 'note', 'einwohner', 'qkm', 'geometry', 'geo_plz', 'price_per_sqm', 'regio2', 'regio3'], dtype='object') ============ ============ 34.406888069510245 3.791208791208791 EPSG:4326
Create Map¶
In [115]:
import folium
m = folium.Map(location=[51, 10], zoom_start=6)
# Add Choropleth Layer
folium.Choropleth(
geo_data=germany_plz_map,
name="Median Rent per SQM",
data=germany_plz_map,
columns=["geo_plz", "price_per_sqm"],
key_on="feature.properties.plz",
fill_color="YlGnBu",
fill_opacity=0.8,
line_opacity=0.3,
legend_name="Median Rent Price per sqm (€)"
).add_to(m)
# Show map
m
Out[115]:
Make this Notebook Trusted to load map: File -> Trust Notebook