데이콘에 수도권 대중교통자료로 한 게 있길래
고속도로 자료로 하면 어떻게 될까 궁금하기도하고
그냥 심심해서 해봄
## 단순 교통량이 아니라 통행속도를 고려해서 산정해야겠다.
계산은 했으나 dropna를 하기도하였고 흠..
1. interpol?
2. 통행시간의 산정방식.. 하나의 월데이터를 median 한걸 모아서 또 median 했음..
3. 통행속도의 이상치 기준을 mean-3*std 로 하니 30kmph 가 나옴
- 무작정 3std 를 쓰는게 맞나?
붉은색이 교통량이 많은 TG-TG
파란색이 항공정보도
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 8 13:59:54 2023
@author: Administrator
"""
import os
os.chdir(r'C:/python/UAM')
import pandas as pd
pd.options.display.float_format='{:.5f}'.format
df_tcs = pd.read_csv(r'tcs_tassportal.csv',encoding='UTF8')
df_location = pd.read_csv(r'df_location_region.csv',
dtype={'routeNo':str},
encoding='UTF8'
)
df_location = df_location[['routeNo','routeName','unitName','xValue','yValue','region_1','region_2','region_3']]
df_location['full_name'] = df_location['region_1']+' '+df_location['region_2']+' '+df_location['region_3']
#%% data summary(1)
df_tcs = df_tcs[['출발구분','도착구분','교통량']]
df_tcs.columns = ['O','D','volume']
df_tcs = df_tcs.groupby(['O','D']).sum().reset_index()
#%% data summary(2)
df_tcs.info()
df_location.info() # dtype 변경
df_location = df_location.astype({'xValue':'float64','yValue':'float64'})
print(df_tcs.describe())
print(df_location.describe()) # min 0값 있음
#%% tcs 행정구역 붙이기
df_tcs = pd.merge(df_tcs,df_location[['unitName','xValue','yValue','region_1']],left_on='O',right_on='unitName',how='left').rename(columns={'xValue':'O_x','yValue':'O_y','region_1':'O_region'}).drop('unitName',axis=1,)
df_tcs = pd.merge(df_tcs,df_location[['unitName','xValue','yValue','region_1']],left_on='D',right_on='unitName',how='left').rename(columns={'xValue':'D_x','yValue':'D_y','region_1':'D_region'}).drop('unitName',axis=1,)
df_tcs = df_tcs.dropna(axis=0)
print(df_tcs.describe())
#%% 이격거리 계산
from haversine import haversine
df_tcs['distance'] = df_tcs.apply(lambda row : haversine((row['O_y'],row['O_x']),(row['D_y'],row['D_x']), unit='km'),axis=1)
df_tcs = df_tcs[df_tcs['distance']>0.1]
#%% 지역간 통행 확인
df_sankey_all = df_tcs[['O_region','D_region','volume']].groupby(['O_region','D_region']).sum().reset_index()
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus']=False
#plt.figure(figsize=(20,20))
import pandas as pd
# Import the sankey function from the sankey module within pySankey
from pysankey import sankey
df_sankey_all['value'] = df_sankey_all['volume']/10000
df_sankey_all['value'] = df_sankey_all['value'].astype(int)
sankey(left=df_sankey_all["O_region"], right=df_sankey_all["D_region"],
leftWeight= df_sankey_all["value"], rightWeight= df_sankey_all["value"],
aspect=5, fontsize=20
)
fig = plt.gcf()
fig.set_size_inches(15, 15)
# Set the color of the background to white
fig.set_facecolor("w")
# Save the figure
fig.savefig("지역간통행(all).png", bbox_inches="tight", dpi=150)
#%% UAM 가능거리 지역간 통행 확인
df_uam = df_tcs[df_tcs['distance']<240]
df_uam = df_uam.sort_values(by='volume',ascending=False)
df_sankey_uam = df_uam[['O_region','D_region','volume']].groupby(['O_region','D_region']).sum().reset_index()
plt.figure(figsize=(50,50))
sankey(left=df_sankey_uam["O_region"], right=df_sankey_uam["D_region"],
leftWeight= df_sankey_uam["volume"], rightWeight= df_sankey_uam["volume"],
aspect=5, fontsize=20
)
fig = plt.gcf()
fig.set_size_inches(15, 15)
# Set the color of the background to white
fig.set_facecolor("w")
# Save the figure
fig.savefig("지역간통행(UAM).png", bbox_inches="tight", dpi=150)
#%% 이상치 기준으로 노선 선택 - 실패
import numpy as np
q_3 = np.quantile(df_uam['volume'],0.75)
q_1 = np.quantile(df_uam['volume'],0.25)
IQR = q_3-q_1
top_IQR = df_uam['volume'].max() + 1.5*IQR
bot_IQR = df_uam['volume'].min() - 1.5*IQR
top_STD = df_uam['volume'].mean() + 3*df_uam['volume'].std()
bot_STD = df_uam['volume'].mean() - 3*df_uam['volume'].std()
print('q_3', int(q_3))
print('max', df_uam['volume'].max())
print('IQR', int(top_IQR))
print('STD', int(top_STD))
#%% knee point - 실패
from kneed import KneeLocator
df_uam['rank'] = range(0,93782)
x = df_uam['rank'].values # 가로축 설정
y = df_uam['volume'].values # 세로축 설정
kn = KneeLocator(x, y, curve='convex', direction='decreasing', interp_method='polynomial')
print(f'Knee point: 상위 {kn.knee+1}번째 volume') # rank는 0부터 시작하므로 +1
plt.figure(figsize=(12,6))
plt.xlabel('Rank', fontsize=16)
plt.ylabel('volume', fontsize=16)
plt.plot(x, y, color='#FAC342', linestyle='-', marker='x')
plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed', color='r')
plt.grid(True)
fig.savefig("knee point.png", bbox_inches="tight", dpi=150)
#%% 양방향을 고려하는게 맞음 - 매칭하니 5개 노선
df_uam_target = df_uam.iloc[[0,3,1,2,4,6,5,7,10,11],:]
df_uam_target.to_csv('df_uam_target.csv',encoding='utf-8-sig')
#%% 분포 확인
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(12,6))
sns.histplot(x='volume', color='#739EE0', kde=True, data=df_uam, ax=ax)
ax.set_xlabel('volume', fontsize=16)
ax.set_ylabel('Count', fontsize=16)
ax.set_axisbelow(True)
ax.set_ylim([300,1500])
ax.yaxis.grid(color='gray', linestyle='dashed')
fig.savefig("histplot.png", bbox_inches="tight", dpi=150)
#%% 클러스터링 - 실패
df_uam_ml = pd.get_dummies(df_uam)
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
df_uam_ml_scaled = MinMaxScaler().fit_transform(df_uam_ml)
model = KMeans(n_clusters=2,random_state=0)
model.fit(df_uam_ml_scaled)
clust = model.predict(df_uam_ml_scaled)
df_uam['clust']=clust
df_uam_gb = df_uam.groupby(['clust'])['volume'].mean().reset_index()
#%% make shape file
from shapely.geometry import LineString
import geopandas as gpd
# Create LineString geometries
geometry = [LineString([(O_y, O_x), (D_y, D_x)])for O_y, O_x, D_y, D_x in zip(df_uam_target['O_x'], df_uam_target['O_y'], df_uam_target['D_x'], df_uam_target['D_y'])]
# Create GeoDataFrame
gdf = gpd.GeoDataFrame(df_uam_target, geometry=geometry, crs="EPSG:5179")
gdf.to_file(r'gis파일/UAM/df_uam_target.shp')
붉은색이 교통량이 많은 TG-TG
파란색이 항공정보도
#%% 카카오 행정구역
'''
import requests
import json
url = "https://dapi.kakao.com/v2/local/geo/coord2address.json?x=127.712381&y=34.981608&input_coord=WGS84"
header = {'Authorization': 'KakaoAK key'}
response = requests.get(url, headers=header)
tokens = response.json()
#tokens = pd.json_normalize(tokens)
print(response)
print(tokens)
tokens['documents'][0]['road_address']['region_1depth_name']
tokens['documents'][0]['road_address']['region_2depth_name']
tokens['documents'][0]['road_address']['region_3depth_name']
def get_region_name(df_tcs,key='KakaoAK 26e1cb037f951f97b258c4d2e91efda6'):
list_1 = []
list_2 = []
list_3 = []
header = {'Authorization': key}
for i, row in df_tcs.iterrows():
print(i)
yValue = row['xValue']
print(str(yValue))
xValue = row['yValue']
url = "https://dapi.kakao.com/v2/local/geo/coord2address.json?x="+str(yValue)+"&y="+str(xValue)+"&input_coord=WGS84"
print(url)
response = requests.get(url, headers=header)
tokens = response.json()
print(tokens)
region_1 = tokens['documents'][0]['address']['region_1depth_name']
print(region_1)
region_2 = tokens['documents'][0]['address']['region_2depth_name']
print(region_2)
region_3 = tokens['documents'][0]['address']['region_3depth_name']
print(region_3)
list_1.append(region_1)
list_2.append(region_2)
list_3.append(region_3)
return list_1, list_2, list_3
list_1, list_2, list_3 = get_region_name(df_location)
df_location['region_1'] = list_1
df_location['region_2'] = list_2
df_location['region_3'] = list_3
'''
#%%
''' 행정구역 전
import json
with open(r'C:/python/DSRC_ANALYSIS/tcs/locationinfoUnit.json', encoding='UTF8') as f:
data = json.load(f)
df = pd.json_normalize(data)
df.columns = df.columns.map(lambda x: x.split(".")[-1])
list_routeNo = []
list_unitName = []
list_yValue = []
list_routeName = []
list_label = []
list_xValue = []
for url, info in data.items():
# print(url)
# print(info)
routeNo = info["http://data.ex.co.kr:80/link/def/routeNo"][0]["value"]
unitName = info["http://data.ex.co.kr:80/link/def/unitName"][0]["value"]
routeName = info["http://data.ex.co.kr:80/link/def/routeName"][0]["value"]
yValue = info["http://data.ex.co.kr:80/link/def/yValue"][0]["value"]
xValue = info["http://data.ex.co.kr:80/link/def/xValue"][0]["value"]
label = info["http://www.w3.org/2000/01/rdf-schema#label"][0]["value"]
list_routeNo.append(routeNo)
list_unitName.append(unitName)
list_yValue.append(yValue)
list_routeName.append(routeName)
list_label.append(label)
list_xValue.append(xValue)
df_location = pd.DataFrame({'routeNo':list_routeNo,'unitName':list_unitName,'yValue':list_yValue,'routeName':list_routeName,'label':list_label,'xValue':list_xValue})
del data, df, f, info, label, list_label, list_routeName, list_routeNo, list_unitName, list_xValue, list_yValue, routeName, routeNo, unitName,url,xValue,yValue
#%% make table for na
y_0 = df_location[df_location['yValue']==0]
x_0 = df_location[df_location['xValue']==0]
df_location = df_location[df_location['unitName']!='가락(개)']
data=[['금왕꽃동네', 36.95467720377, 127.55095803565776],['덕평복합', 37.24128549288273, 127.38980700385493],['남풍세', 36.72291681541853, 127.14173272159204],
['가락2', 35.16518995835094, 128.8920326338494],['동충주', 37.06389810329857, 127.92092609728658]]
new_loc = pd.DataFrame(data,columns=['unitName','yValue','xValue'])
#%% fill na
for i, row in new_loc.iterrows():
unitName = row['unitName']
xValue = row['xValue']
yValue = row['yValue']
df_location.loc[df_location['unitName']==unitName,['xValue']] = xValue
df_location.loc[df_location['unitName']==unitName,['yValue']] = yValue
del unitName, xValue,yValue
del new_loc, data, i, row
y_0 = df_location[df_location['yValue']==0]
x_0 = df_location[df_location['xValue']==0]
'''
'발전의 의지 > 파이썬' 카테고리의 다른 글
교통사고 등급 클러스터링 (1) | 2023.12.21 |
---|---|
빅데이터분석기사 취득 (0) | 2023.12.18 |
그냥 저냥 하는 것 (0) | 2023.07.14 |
qgis 실무에 써보기 (0) | 2023.05.17 |
고속도로 DSRC 데이터 분석 (0) | 2023.04.05 |