diff --git a/pivot_bdc_status.py b/pivot_bdc_status.py new file mode 100644 index 0000000..850d5c0 --- /dev/null +++ b/pivot_bdc_status.py @@ -0,0 +1,78 @@ +import pandas as pd +import numpy as np +from pycen import con_gn + + +NAME_OUT = '/home/colas/Documents/tmp/v_bdc_status2.xlsx' +FileIn = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/BDC_STATUTS_15.xls' +db = False + +if db : + sql = "SELECT * FROM taxonomie.v_bdc_status" + df = pd.read_sql_query(sql,con_gn) +if FileIn is not None : + df = pd.read_excel(FileIn) + df.columns = df.columns.str.lower() + geo = ['Isère','Rhône-Alpes','France','France métropolitaine',np.nan] + df = df[df.lb_adm_tr.isin(geo)] + + + +df.drop(columns=['full_citation'],inplace=True) +df.replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[–]':"-" +},regex=True,inplace=True) + +DF = df.copy() + +# ['cd_nom', 'cd_ref', 'rq_statut', 'code_statut', 'label_statut', +# 'cd_type_statut', 'thematique', 'lb_type_statut', 'regroupement_type', +# 'cd_st_text', 'cd_sig', 'cd_doc', 'niveau_admin', 'cd_iso3166_1', +# 'cd_iso3166_2', 'doc_url', 'type_value'] +pivot = pd.pivot_table( + DF, + values='code_statut', + index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) +for c in pivot.columns: + pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]] +pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']] +pivot.DH.replace({'CDH':''},regex=True,inplace=True) + +pivlib = pd.pivot_table( + DF, + values='label_statut', + index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) +for c in pivlib.columns: + pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]] +pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']] +pivlib.DH.replace({'CDH':''},regex=True,inplace=True) + +print('INIT writer') +with pd.ExcelWriter(NAME_OUT) as writer: + DF.to_excel( + writer,sheet_name='v_bdc_status' + ) + # writer.save() + print('v_bdc_status OK !') + pivot.to_excel( + writer,sheet_name='pivot_table' + ) + # writer.save() + print('pivot_table OK !') + pivlib.to_excel( + writer,sheet_name='pivot_libel' + ) + writer.save() + print('pivot_libel OK !')