用Python分析北京市蛋殼公寓租房數(shù)據(jù)

導(dǎo)讀:近期,蛋殼公寓“爆雷”事件持續(xù)發(fā)酵,期間因拖欠房東房租與租客退款,蛋殼公寓陷入討債風(fēng)波,全國多地蛋殼公寓辦公區(qū)域出現(xiàn)大規(guī)模解約事件,而作為蛋殼公寓總部所在地北京,自然首當(dāng)其沖。
長租公寓暴雷,不少年輕人不得不流離失所,構(gòu)成疫情下的另一個(gè)經(jīng)濟(jì)寫照,事態(tài)何去何從,值得關(guān)注。本文從數(shù)據(jù)角度出發(fā),爬取了蛋殼公寓北京區(qū)域共6025條公寓數(shù)據(jù),清洗數(shù)據(jù),并進(jìn)行可視化分析,為大家了解蛋殼公寓提供一個(gè)新的視角。
01 數(shù)據(jù)獲取
def?get_danke(href):
???time.sleep(random.uniform(0,?1))??#設(shè)置延時(shí),避免對服務(wù)器產(chǎn)生壓力
???response?=?requests.get(url=href,?headers=headers)
???if?response.status_code?==?200:??#部分網(wǎng)頁會(huì)跳轉(zhuǎn)404,需要做判斷
???????res?=?response.content.decode('utf-8')
???????div?=?etree.HTML(res)
???????items?=?div.xpath("/html/body/div[3]/div[1]/div[2]/div[2]")
???????for?item?in?items:
???????????house_price=item.xpath("./div[3]/div[2]/div/span/div/text()")[0]
???????????house_area=item.xpath("./div[4]/div[1]/div[1]/label/text()")[0].replace('建筑面積:約','').replace('㎡(以現(xiàn)場勘察為準(zhǔn))','')
???????????house_id=item.xpath("./div[4]/div[1]/div[2]/label/text()")[0].replace('編號:','')
???????????house_type=item.xpath("./div[4]/div[1]/div[3]/label/text()")[0].replace('\n','').replace('?','').replace('戶型:','')
???????????house_floor=item.xpath("./div[4]/div[2]/div[3]/label/text()")[0].replace('樓層:','')
???????????house_postion_1=item.xpath("./div[4]/div[2]/div[4]/label/div/a[1]/text()")[0]
???????????house_postion_2=item.xpath("./div[4]/div[2]/div[4]/label/div/a[2]/text()")[0]
???????????house_postion_3=item.xpath("./div[4]/div[2]/div[4]/label/div/a[3]/text()")[0]
???????????house_subway=item.xpath("./div[4]/div[2]/div[5]/label/text()")[0]
???else:
???????house_price?=?None
???????house_area?=?None
???????house_id?=?None
???????house_type?=?None
???????house_floor?=?None
???????house_postion_1?=?None
???????house_postion_2?=?None
???????house_postion_3?=?None
???????house_subway?=?None
......
02 數(shù)據(jù)處理
1. 導(dǎo)入數(shù)據(jù)分析包
import?pandas?as?pd
import?numpy?as?np
from?pathlib?import?Path
import?re
2. 導(dǎo)入數(shù)據(jù)并合并
files?=?Path(r"D:\菜J學(xué)Python\數(shù)據(jù)分析\蛋殼公寓").glob("*.csv")
dfs?=?[pd.read_csv(f)?for?f?in?files]
df?=?pd.concat(dfs)
df.head()

3. 數(shù)據(jù)去重
df?=?df.drop_duplicates()
4. 查看數(shù)據(jù)
df.info()???<class?'pandas.core.frame.DataFrame'>
???Int64Index:6026?entries,?0?to?710
???Data?columns?(total?9?columns):
????#???Column??Non-Null?Count??Dtype?
???---??------??--------------??-----?
????0???價(jià)格??????6025?non-null???object
????1???面積??????6025?non-null???object
????2???編號??????6025?non-null???object
????3???戶型??????6025?non-null???object
????4???樓層??????6025?non-null???object
????5???位置16025?non-null???object
????6???位置26025?non-null???object
????7???小區(qū)??????6025?non-null???object
????8???地鐵??????6025?non-null???object
???dtypes:?object(9)
???memory?usage:?470.8+?KB
5. 數(shù)據(jù)類型轉(zhuǎn)換
#刪除包含臟數(shù)據(jù)的行
jg?=?df['價(jià)格']?!=?"價(jià)格"
df?=?df.loc[jg,:]
#將價(jià)格字段轉(zhuǎn)為數(shù)字類型
df["價(jià)格"]?=?df["價(jià)格"].astype("float64")
#將面積字段轉(zhuǎn)為數(shù)字類型
df["面積"]?=?df["面積"].astype("float64")
#提取所在樓層
df?=?df[df['樓層'].notnull()]
df['所在樓層']=df['樓層'].apply(lambda?x:x.split('/')[0])
df['所在樓層']?=?df['所在樓層'].astype("int32")
#提取總樓層
df['總樓層']=df['樓層'].apply(lambda?x:x.split('/')[1])
df['總樓層']?=?df['總樓層'].str.replace("層","").astype("int32")6. 地鐵字段清洗
def?get_subway_num(row):
???subway_num=row.count('號線')
???return?subway_num
def?get_subway_distance(row):
???distance=re.search(r'\d+(?=米)',row)
???if?distance==None:
???????return-1
???else:
???????return?distance.group()
df['地鐵數(shù)']=df['地鐵'].apply(get_subway_num)
df['距離地鐵距離']=df['地鐵'].apply(get_subway_distance)
df['距離地鐵距離']=df['距離地鐵距離'].astype("int32")7. 保存數(shù)據(jù)
df.to_excel(r"\菜J學(xué)Python\數(shù)據(jù)分析\蛋殼公寓.xlsx")
df.head()

03 數(shù)據(jù)可視化
1. 導(dǎo)入可視化相關(guān)包
import?matplotlib.pyplot?as?plt
import?seaborn?as?sns
%matplotlib?inline
plt.rcParams['font.sans-serif']?=?['SimHei']??#?設(shè)置加載的字體名
plt.rcParams['axes.unicode_minus']?=?False#?解決保存圖像是負(fù)號'-'顯示為方塊的問題?
import?jieba
from?pyecharts.charts?import?*
from?pyecharts?import?options?as?opts?
from?pyecharts.globals?import?ThemeType??
import?stylecloud
from?IPython.display?import?Image2. 各行政區(qū)公寓數(shù)量

df7?=?df["位置1"].value_counts()[:10]
df7?=?df7.sort_values(ascending=True)
df7?=?df7.tail(10)
print(df7.index.to_list())
print(df7.to_list())
c?=?(
???Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
???.add_xaxis(df7.index.to_list())
???.add_yaxis("",df7.to_list()).reversal_axis()?#X軸與y軸調(diào)換順序
???.set_global_opts(title_opts=opts.TitleOpts(title="各行政區(qū)公寓數(shù)量",subtitle="數(shù)據(jù)來源:蛋殼公寓?\t制圖:菜J學(xué)Python",pos_left?=?'left'),
??????????????????????xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=13)),?#更改橫坐標(biāo)字體大小
??????????????????????yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=13)),?#更改縱坐標(biāo)字體大小
??????????????????????)
???.set_series_opts(label_opts=opts.LabelOpts(font_size=16,position='right'))
???)
c.render_notebook()

3. 小區(qū)公寓數(shù)量TOP10
df7?=?df["小區(qū)"].value_counts()[:10]
df7?=?df7.sort_values(ascending=True)
df7?=?df7.tail(10)
print(df7.index.to_list())
print(df7.to_list())
c?=?(
???Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK,width="1100px",height="600px"))
???.add_xaxis(df7.index.to_list())
???.add_yaxis("",df7.to_list()).reversal_axis()?#X軸與y軸調(diào)換順序
???.set_global_opts(title_opts=opts.TitleOpts(title="小區(qū)公寓數(shù)量TOP10",subtitle="數(shù)據(jù)來源:蛋殼公寓?\t制圖:菜J學(xué)Python",pos_left?=?'left'),
??????????????????????xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=11)),?#更改橫坐標(biāo)字體大小
??????????????????????yaxis_opts=opts.AxisOpts(axislabel_opts={"rotate":30}),?#更改縱坐標(biāo)字體大小
??????????????????????)
???.set_series_opts(label_opts=opts.LabelOpts(font_size=16,position='right'))
???)
c.render_notebook()

4. 蛋殼公寓租金分布
#租金分段
df['租金分段']?=?pd.cut(df['價(jià)格'],[0,1000,2000,3000,4000,1000000],labels=['1000元以下','1000-2000元','2000-3000元','3000-4000元','4000元以上'],right=False)
df11?=?df["租金分段"].value_counts()
df11?=?df11.sort_values(ascending=False)
df11?=?df11.round(2)
print(df11)
c?=?(
???????Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
???????.add(
???????????"",
???????????[list(z)?for?z?in?zip(df11.index.to_list(),df11.to_list())],
???????????radius=["20%",?"80%"],???#圓環(huán)的粗細(xì)和大小
???????????rosetype='area'
???????)
???????.set_global_opts(legend_opts?=?opts.LegendOpts(is_show?=?False),title_opts=opts.TitleOpts(title="蛋殼公寓租金分布",subtitle="數(shù)據(jù)來源:蛋殼公寓\n制圖:菜J學(xué)Python",pos_top="0.5%",pos_left?=?'left'))
???????.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:go7utgvlrp%",font_size=16))
???)
c.render_notebook()

5. 各行政區(qū)租金分布
h?=?pd.pivot_table(df,index=['租金分段'],values=['價(jià)格'],
??????????????columns=['位置1'],aggfunc=['count'])
k?=?h.droplevel([0,1],axis=1)??#刪除指定的索引/列級別
c?=?(
???Polar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
???.add_schema(angleaxis_opts=opts.AngleAxisOpts(data=k.columns.tolist(),?type_="category"))
???.add("1000以下",h.values.tolist()[0],?type_="bar",?stack="stack0")
???.add("1000-2000元",h.values.tolist()[1],?type_="bar",?stack="stack0")
???.add("2000-3000元",?h.values.tolist()[2],?type_="bar",?stack="stack0")
???.add("3000-4000元",?h.values.tolist()[3],?type_="bar",?stack="stack0")
???.add("4000元以上",?h.values.tolist()[4],?type_="bar",?stack="stack0")
???.set_global_opts(title_opts=opts.TitleOpts(title="各行政區(qū)租金情況",subtitle="數(shù)據(jù)來源:蛋殼公寓\n制圖:菜J學(xué)Python"))
)
c.render_notebook()

6. 蛋殼公寓樓層分布
#?漏斗圖?
df['樓層分段']?=?pd.cut(df['所在樓層'],[0,10,20,30,40,1000000],labels=['10層以下','10-20層','20-30層','30-40層','40層以上'],right=False)
count?=?df['樓層分段'].value_counts()?#?pd.Series
print(count)
job?=?list(count.index)
job_count?=?count.values.tolist()
from?pyecharts.charts?import?Funnel
c?=?(
???Funnel(init_opts=opts.InitOpts(theme=ThemeType.DARK))
???.add("",?[list(i)?for?i?in?zip(job,job_count)])
???.set_global_opts(
???????title_opts=opts.TitleOpts(title="蛋殼公寓樓層分布",subtitle="數(shù)據(jù)來源:蛋殼公寓\n制圖:菜J學(xué)Python",pos_top="0.1%",pos_left?=?'left'),legend_opts?=?opts.LegendOpts(is_show?=?False))
???.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:go7utgvlrp%",font_size=16))
)
c.render_notebook()

7. 蛋殼公寓戶型分布
df2?=?df.groupby('戶型')['價(jià)格'].count()?
df2?=?df2.sort_values(ascending=False)[:10]
#?print(df2)
bar?=?Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
bar.add_xaxis(df2.index.to_list())
bar.add_yaxis("",df2.to_list())?#X軸與y軸調(diào)換順序
bar.set_global_opts(title_opts=opts.TitleOpts(title="蛋殼公寓戶型分布",subtitle="數(shù)據(jù)來源:蛋殼公寓\t制圖:菜J學(xué)Python",pos_top="2%",pos_left?=?'center'),
??????????????????xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=16)),?#更改橫坐標(biāo)字體大小
??????????????????yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=16)),?#更改縱坐標(biāo)字體大小
??????????????????)
bar.set_series_opts(label_opts=opts.LabelOpts(font_size=16,position='top'))
bar.render_notebook()

8. 蛋殼公寓面積分布
df['面積分段']?=?pd.cut(df['面積'],[0,10,20,30,40,1000000],labels=['10㎡以下','10-20㎡','20-30㎡','30-40㎡','40㎡以上'],right=False)
df2?=?df["面積分段"].astype("str").value_counts()
print(df2)
df2?=?df2.sort_values(ascending=False)
regions?=?df2.index.to_list()
values?=?df2.to_list()
c?=?(
???????Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
???????.add("",?list(zip(regions,values)))
???????.set_global_opts(legend_opts?=?opts.LegendOpts(is_show?=?False),title_opts=opts.TitleOpts(title="蛋殼公寓面積分布",subtitle="數(shù)據(jù)來源:蛋殼公寓\n制圖:菜J學(xué)Python",pos_top="0.5%",pos_left?=?'left'))
???????.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:go7utgvlrp%",font_size=14))
???)
c.render_notebook()

9. 蛋殼公寓商圈分布
#?繪制詞云圖
text1?=?get_cut_words(content_series=df1['位置2'])
stylecloud.gen_stylecloud(text='?'.join(text1),?max_words=100,
?????????????????????????collocations=False,
?????????????????????????font_path=r'C:\WINDOWS\FONTS\MSYH.TTC',
?????????????????????????icon_name='fas?fa-home',
?????????????????????????size=653,
?????????????????????????palette='cartocolors.diverging.ArmyRose_2',
?????????????????????????output_name='./1.png')
Image(filename='./1.png')
10. 相關(guān)性分析
color_map?=?sns.light_palette('orange',?as_cmap=True)??#light_palette調(diào)色板
df.corr().style.background_gradient(color_map)


干貨直達(dá)??

評論
圖片
表情
