diff --git a/pivot_bdc_status/pivot_bdc_status.py b/pivot_bdc_status/pivot_bdc_status.py new file mode 100644 index 0000000..1437d00 --- /dev/null +++ b/pivot_bdc_status/pivot_bdc_status.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import requests +import numpy as np +import pandas as pd +import os + +def get_status(lst,con): + sql = """ + SELECT + t.cd_nom, + t.cd_ref, + t.regne, + t.phylum, + t.classe, + t.ordre, + t.famille, + t.group1_inpn, + t.group2_inpn, + t.group3_inpn, + t.nom_vern, + t.nom_complet, + t.nom_valide, + t.lb_nom, + --s.* + s.rq_statut, + s.code_statut, + s.cd_type_statut, + s.label_statut, + s.niveau_admin, + s.full_citation, + s.doc_url + FROM taxonomie.taxref t + JOIN taxonomie.v_bdc_status s USING (cd_nom) + WHERE t.cd_nom IN {cd_nom} + ;""".format(cd_nom = tuple(lst)) + return pd.read_sql_query(sql,con) + +def get_api_status(api,cd_nom:int): + res = requests.api.get('%s/%i'%(api,cd_nom)) + if res.status_code == 200: + return res.json() + else : + raise('Error : %i\tcd_nom : %i'%(res.status_code,cd_nom)) + +def get_taxon_status(lst,api): + from datetime import datetime as dt + init = dt.now() + st = [get_api_status(api,x) for x in lst] # TOO LONG + print(dt.now()-init) + phylo = { + 'cd_ref':[x['cd_ref'] for x in st], + 'nom_valide':[x['nom_valide'] if 'nom_valide' in x.keys() else None for x in st], + 'nom_vernac':[x['nom_vern'] if 'nom_vern' in x.keys() else None for x in st], + 'regne':[x['regne'] if 'regne' in x.keys() else None for x in st], + 'group1_inp':[x['group1_inpn'] if 'group1_inpn' in x.keys() else None for x in st], + 'group2_inp':[x['group2_inp'] if 'group2_inp' in x.keys() else None for x in st], + 'group3_inpn':[x['group3_inpn'] for x in st], + 'classe':[x['classe'] if 'classe' in x.keys() else None for x in st], + 'ordre':[x['ordre'] if 'ordre' in x.keys() else None for x in st], + 'famille':[x['famille'] if 'famille' in x.keys() else None for x in st]} + cd_status = { + 'AL':[ + [val['values'][v]['code_statut'] + for val in x['status']['AL']['text'].values() for v in val['values'] ] + if 'AL' in x['status'].keys() else None + for x in st + ], + 'BERN':[ + [val['values'][v]['code_statut'] + for val in x['status']['BERN']['text'].values() for v in val['values'] ] + if 'BERN' in x['status'].keys() else None + for x in st + ], + 'BONN':[ + [val['values'][v]['code_statut'] + for val in x['status']['BONN']['text'].values() for v in val['values'] ] + if 'BONN' in x['status'].keys() else None + for x in st + ], + 'DH':[ + [val['values'][v]['code_statut'] + for val in x['status']['DH']['text'].values() for v in val['values'] ] + if 'DH' in x['status'].keys() else None + for x in st + ], + 'DO':[ + [val['values'][v]['code_statut'] + for val in x['status']['DO']['text'].values() for v in val['values'] ] + if 'DO' in x['status'].keys() else None + for x in st + ], + 'LRE':[ + [val['values'][v]['code_statut'] + for val in x['status']['LRE']['text'].values() for v in val['values'] ] + if 'LRE' in x['status'].keys() else None + for x in st + ], + 'LRM':[ + [val['values'][v]['code_statut'] + for val in x['status']['LRM']['text'].values() for v in val['values'] ] + if 'LRM' in x['status'].keys() else None + for x in st + ], + 'LRN':[ + [val['values'][v]['code_statut'] + for val in x['status']['LRN']['text'].values() for v in val['values'] ] + if 'LRN' in x['status'].keys() else None + for x in st + ], + 'LRR':[ + [val['values'][v]['code_statut'] + for val in x['status']['LRR']['text'].values() for v in val['values'] ] + if 'LRR' in x['status'].keys() else None + for x in st + ], + 'PAPNAT':[ + [val['values'][v]['code_statut'] + for val in x['status']['PAPNAT']['text'].values() for v in val['values'] ] + if 'PAPNAT' in x['status'].keys() else None + for x in st + ], + 'PD':[ + [val['values'][v]['code_statut'] + for val in x['status']['PD']['text'].values() for v in val['values'] ] + if 'PD' in x['status'].keys() else None + for x in st + ], + 'PNA':[ + [val['values'][v]['code_statut'] + for val in x['status']['PNA']['text'].values() for v in val['values'] ] + if 'PNA' in x['status'].keys() else None + for x in st + ], + 'PR':[ + [val['values'][v]['code_statut'] + for val in x['status']['PR']['text'].values() for v in val['values'] ] + if 'PR' in x['status'].keys() else None + for x in st + ], + 'REGL':[ + [val['values'][v]['code_statut'] + for val in x['status']['REGL']['text'].values() for v in val['values'] ] + if 'REGL' in x['status'].keys() else None + for x in st + ], + 'REGLII':[ + [val['values'][v]['code_statut'] + for val in x['status']['REGLII']['text'].values() for v in val['values'] ] + if 'REGLII' in x['status'].keys() else None + for x in st + ], + 'REGLLUTTE':[ + [val['values'][v]['code_statut'] + for val in x['status']['REGLLUTTE']['text'].values() for v in val['values'] ] + if 'REGLLUTTE' in x['status'].keys() else None + for x in st + ], + 'REGLSO':[ + [val['values'][v]['code_statut'] + for val in x['status']['REGLSO']['text'].values() for v in val['values'] ] + if 'REGLSO' in x['status'].keys() else None + for x in st + ], + 'SCAP NAT':[ + [val['values'][v]['code_statut'] + for val in x['status']['SCAP NAT']['text'].values() for v in val['values'] ] + if 'SCAP NAT' in x['status'].keys() else None + for x in st + ], + 'SCAP REG':[ + [val['values'][v]['code_statut'] + for val in x['status']['SCAP REG']['text'].values() for v in val['values'] ] + if 'SCAP REG' in x['status'].keys() else None + for x in st + ], + 'SENSNAT':[ + [val['values'][v]['code_statut'] + for val in x['status']['SENSNAT']['text'].values() for v in val['values'] ] + if 'SENSNAT' in x['status'].keys() else None + for x in st + ], + 'ZDET':[ + [val['values'][v]['code_statut'] + for val in x['status']['ZDET']['text'].values() for v in val['values'] ] + if 'ZDET' in x['status'].keys() else None + for x in st + ], + 'exPNA':[ + [val['values'][v]['code_statut'] + for val in x['status']['exPNA']['text'].values() for v in val['values'] ] + if 'exPNA' in x['status'].keys() else None + for x in st + ] + } + return pd.DataFrame({**phylo,**cd_status}) + +dict_dep = { + '38':'Isère', + '42':'Loire', + '07':'Ardèche', + '26':'Drôme', +} + + +if __name__ == "__main__": + # Définition de la connection à la bdd GéoNature + from pycen import con_gn + # NOT USE FOR NOW - API Taxref + api_taxref = 'https://geonature.cen-isere.fr/taxhub/api/taxref' + + # Paramètres de chargement du fichier des taxons + PATH = '/home/colas/Documents/tmp/CHARVAS' + file = 'liste_sp_CHAR.xlsx' + sheet = 'liste_sp' + + # Liste des CD_NOM en entrée + cd_col = 'cd_ref' # Nom de la colonne à utiliser dans le feuillet ``sheet`` + + # Lecture des données + taxlist = pd.read_excel(os.path.join(PATH,file),sheet,usecols=[cd_col],header=0) + tab_sp = pd.read_excel(os.path.join(PATH,file),sheet,index_col=cd_col) + lst = taxlist[cd_col] + + # Récupération des statuts + df = get_status(taxlist[cd_col].astype(str),con_gn) + + # Distinction LRR [old vs new] région + is_lrr = df.cd_type_statut == 'LRR' + df.loc[is_lrr & (df.niveau_admin == 'Région'),'cd_type_statut'] = 'LRR_AURA' + df.loc[is_lrr & (df.niveau_admin == 'Ancienne région'),'cd_type_statut'] = 'LRR_RA' + del df['niveau_admin'] + + for c in ['cd_ref','cd_nom','lb_nom']: + if c in tab_sp.columns: + # if 'cd_nom' not in df.columns and c == 'cd_ref': continue + tab_sp.drop(c,axis=1,inplace=True) + + pivot = pd.pivot_table( + df, + values='code_statut', + index=['cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) + + for c in pivot.columns: + pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]] + if 'DH' in pivot.columns: + pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']] + pivot.DH.replace({'CDH':''},regex=True,inplace=True) + + pivot = tab_sp.merge(pivot,on=[cd_col],how='left') + + pivlib = pd.pivot_table( + df, + values='label_statut', + index=[ + 'cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) + for c in pivlib.columns: + pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]] + if 'DH' in pivot.columns: + pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']] + pivlib.DH.replace({'CDH':''},regex=True,inplace=True) + + pivlib = tab_sp.merge(pivlib,on=[cd_col],how='left') + + print('INIT writer') + NAME_OUT = os.path.join(PATH,sheet+'_status.xlsx') + with pd.ExcelWriter(NAME_OUT) as writer: + df.to_excel( + writer,sheet_name='v_bdc_status',index=False + ) + # writer.save() + print('v_bdc_status OK !') + pivot.to_excel( + writer,sheet_name='pivot_table' + ) + # writer.save() + print('pivot_table OK !') + pivlib.to_excel( + writer,sheet_name='pivot_libel' + ) + # writer.save() + print('pivot_libel OK !')