init Extraction pivot de la bd status
This commit is contained in:
parent
b7ba10d860
commit
21149360e2
78
pivot_bdc_status.py
Normal file
78
pivot_bdc_status.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from pycen import con_gn
|
||||||
|
|
||||||
|
|
||||||
|
NAME_OUT = '/home/colas/Documents/tmp/v_bdc_status2.xlsx'
|
||||||
|
FileIn = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/BDC_STATUTS_15.xls'
|
||||||
|
db = False
|
||||||
|
|
||||||
|
if db :
|
||||||
|
sql = "SELECT * FROM taxonomie.v_bdc_status"
|
||||||
|
df = pd.read_sql_query(sql,con_gn)
|
||||||
|
if FileIn is not None :
|
||||||
|
df = pd.read_excel(FileIn)
|
||||||
|
df.columns = df.columns.str.lower()
|
||||||
|
geo = ['Isère','Rhône-Alpes','France','France métropolitaine',np.nan]
|
||||||
|
df = df[df.lb_adm_tr.isin(geo)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
df.drop(columns=['full_citation'],inplace=True)
|
||||||
|
df.replace({
|
||||||
|
r'[’]':"'",
|
||||||
|
r'[àáâãäå]':'a',
|
||||||
|
r'[èéêë]':'e',
|
||||||
|
r'[ìíîï]':'i',
|
||||||
|
r'[òóôõö]':'o',
|
||||||
|
r'[ùúûü]':'u',
|
||||||
|
r'[–]':"-"
|
||||||
|
},regex=True,inplace=True)
|
||||||
|
|
||||||
|
DF = df.copy()
|
||||||
|
|
||||||
|
# ['cd_nom', 'cd_ref', 'rq_statut', 'code_statut', 'label_statut',
|
||||||
|
# 'cd_type_statut', 'thematique', 'lb_type_statut', 'regroupement_type',
|
||||||
|
# 'cd_st_text', 'cd_sig', 'cd_doc', 'niveau_admin', 'cd_iso3166_1',
|
||||||
|
# 'cd_iso3166_2', 'doc_url', 'type_value']
|
||||||
|
pivot = pd.pivot_table(
|
||||||
|
DF,
|
||||||
|
values='code_statut',
|
||||||
|
index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr'
|
||||||
|
],
|
||||||
|
columns=['cd_type_statut'],
|
||||||
|
aggfunc=list,fill_value=None)
|
||||||
|
for c in pivot.columns:
|
||||||
|
pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]]
|
||||||
|
pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']]
|
||||||
|
pivot.DH.replace({'CDH':''},regex=True,inplace=True)
|
||||||
|
|
||||||
|
pivlib = pd.pivot_table(
|
||||||
|
DF,
|
||||||
|
values='label_statut',
|
||||||
|
index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr'
|
||||||
|
],
|
||||||
|
columns=['cd_type_statut'],
|
||||||
|
aggfunc=list,fill_value=None)
|
||||||
|
for c in pivlib.columns:
|
||||||
|
pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]]
|
||||||
|
pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']]
|
||||||
|
pivlib.DH.replace({'CDH':''},regex=True,inplace=True)
|
||||||
|
|
||||||
|
print('INIT writer')
|
||||||
|
with pd.ExcelWriter(NAME_OUT) as writer:
|
||||||
|
DF.to_excel(
|
||||||
|
writer,sheet_name='v_bdc_status'
|
||||||
|
)
|
||||||
|
# writer.save()
|
||||||
|
print('v_bdc_status OK !')
|
||||||
|
pivot.to_excel(
|
||||||
|
writer,sheet_name='pivot_table'
|
||||||
|
)
|
||||||
|
# writer.save()
|
||||||
|
print('pivot_table OK !')
|
||||||
|
pivlib.to_excel(
|
||||||
|
writer,sheet_name='pivot_libel'
|
||||||
|
)
|
||||||
|
writer.save()
|
||||||
|
print('pivot_libel OK !')
|
||||||
Loading…
x
Reference in New Issue
Block a user