Mieten¶

Clean up rent data set

In [ ]:
import pandas as pd

# Load the dataset
rent_data = pd.read_csv("immo_data.csv")

rent_data = rent_data[rent_data["livingSpace"] > 5]

# Convert to string
rent_data["geo_plz"] = rent_data["geo_plz"].astype(str)
rent_data["geo_plz"] = rent_data["geo_plz"].str.zfill(5)
rent_data = rent_data[rent_data["geo_plz"].str.match(r'^\d{5}$')]

# Calculate price per sqm
rent_data["price_per_sqm"] = rent_data["totalRent"] / rent_data["livingSpace"]

# Remove outliers (too cheap or expensive)
rent_data = rent_data[(rent_data["price_per_sqm"] >= 3) & (rent_data["price_per_sqm"] <= 50)]

rent_data = rent_data.query("price_per_sqm" >= 3 & "price_per_sqm" <= 50)


# Show summary stats
print(rent_data.head())
print(rent_data.info())
print(rent_data.describe())

Mean sqm rent per PLZ¶

In [112]:
# Group by PLZ, taking the most common city and district
plz_avg_rent = rent_data.groupby("geo_plz").agg({
    "price_per_sqm": "mean",   # Average rent price per sqm
    "regio2": lambda x: x.mode()[0] if not x.mode().empty else None,  # Most common city
    "regio3": lambda x: x.mode()[0] if not x.mode().empty else None   # Most common district
}).reset_index()

plz_avg_rent['regio2'] = plz_avg_rent['regio2'].str.replace('_',' ')
plz_avg_rent['regio3'] = plz_avg_rent['regio3'].str.replace('_',' ')

# Show results
plz_avg_rent.head()
Out[112]:
geo_plz price_per_sqm regio2 regio3
0 00852 6.044444 Plauen Dobenau
1 00853 8.708040 Plauen Bahnhofsvorstadt
2 01057 10.647677 Dresden Briesnitz
3 01067 13.631661 Dresden Innere Altstadt
4 01069 12.115053 Dresden Südvorstadt West

Join with PLZ Geojson¶

In [113]:
import geopandas as gpd

germany_plz_geo = gpd.read_file("plz.geojson")
germany_plz_geo.head()
print(germany_plz_geo.crs)
EPSG:4326
In [114]:
germany_plz_map = germany_plz_geo.merge(plz_avg_rent, left_on="plz", right_on="geo_plz", how="left")
print(germany_plz_map.columns)
print("============")
print("============")
print(germany_plz_map['price_per_sqm'].max())
print(germany_plz_map['price_per_sqm'].min())

print(germany_plz_map.crs)
Index(['plz', 'note', 'einwohner', 'qkm', 'geometry', 'geo_plz',
       'price_per_sqm', 'regio2', 'regio3'],
      dtype='object')
============
============
34.406888069510245
3.791208791208791
EPSG:4326

Create Map¶

In [115]:
import folium
m = folium.Map(location=[51, 10], zoom_start=6)

# Add Choropleth Layer
folium.Choropleth(
    geo_data=germany_plz_map,
    name="Median Rent per SQM",
    data=germany_plz_map,
    columns=["geo_plz", "price_per_sqm"],
    key_on="feature.properties.plz",
    fill_color="YlGnBu",
    fill_opacity=0.8,
    line_opacity=0.3,
    legend_name="Median Rent Price per sqm (€)"
).add_to(m)

# Show map
m
Out[115]:
Make this Notebook Trusted to load map: File -> Trust Notebook