V2

2025-01-31 11:07:45 +01:00 · 2025-01-31 11:07:45 +01:00 · fdad4a0961
commit fdad4a0961
parent 21149360e2
1 changed files with 278 additions and 67 deletions
--- a/pivot_bdc_status.py
+++ b/pivot_bdc_status.py
@ -1,78 +1,289 @@
-import pandas as pd
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import requests
 import numpy as np
-from pycen import con_gn
+import pandas as pd
+import os

+def get_status(lst,con):
+    sql = """
+        SELECT 
+            t.cd_nom,
+            t.cd_ref,
+            t.regne,
+            t.phylum,
+            t.classe,
+            t.ordre,
+            t.famille,
+            t.group1_inpn,
+            t.group2_inpn,
+            t.group3_inpn,
+            t.nom_vern,
+            t.nom_complet,
+            t.nom_valide,
+            t.lb_nom,
+            --s.*
+            s.rq_statut,
+            s.code_statut,
+            s.cd_type_statut,
+            s.label_statut,
+            s.niveau_admin,
+            s.full_citation,
+            s.doc_url
+        FROM taxonomie.taxref t
+        JOIN taxonomie.v_bdc_status s USING (cd_nom)
+        WHERE t.cd_nom IN {cd_nom}
+    ;""".format(cd_nom = tuple(lst))
+    return pd.read_sql_query(sql,con)

-NAME_OUT = '/home/colas/Documents/tmp/v_bdc_status2.xlsx'
-FileIn = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/BDC_STATUTS_15.xls'
-db = False
+def get_api_status(api,cd_nom:int):
+    res = requests.api.get('%s/%i'%(api,cd_nom))
+    if res.status_code == 200:
+        return res.json()
+    else : 
+        raise('Error : %i\tcd_nom : %i'%(res.status_code,cd_nom))

-if db :
-    sql = "SELECT * FROM taxonomie.v_bdc_status"
-    df = pd.read_sql_query(sql,con_gn)
-if FileIn is not None :
-    df = pd.read_excel(FileIn)
-    df.columns = df.columns.str.lower()
-    geo = ['Isère','Rhône-Alpes','France','France métropolitaine',np.nan]
-    df = df[df.lb_adm_tr.isin(geo)]
+def get_taxon_status(lst,api):
+    from datetime import datetime as dt
+    init = dt.now()
+    st = [get_api_status(api,x) for x in lst] # TOO LONG
+    print(dt.now()-init)
+    phylo = {
+        'cd_ref':[x['cd_ref'] for x in st],
+        'nom_valide':[x['nom_valide'] if 'nom_valide' in x.keys() else None for x in st],
+        'nom_vernac':[x['nom_vern'] if 'nom_vern' in x.keys() else None for x in st],
+        'regne':[x['regne'] if 'regne' in x.keys() else None for x in st],
+        'group1_inp':[x['group1_inpn'] if 'group1_inpn' in x.keys() else None for x in st],
+        'group2_inp':[x['group2_inp'] if 'group2_inp' in x.keys() else None for x in st],
+        'group3_inpn':[x['group3_inpn'] for x in st],
+        'classe':[x['classe'] if 'classe' in x.keys() else None for x in st],
+        'ordre':[x['ordre'] if 'ordre' in x.keys() else None for x in st],
+        'famille':[x['famille'] if 'famille' in x.keys() else None for x in st]}
+    cd_status = {
+        'AL':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['AL']['text'].values() for v in val['values'] ]
+            if 'AL' in x['status'].keys() else None
+            for x in st
+        ],
+        'BERN':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['BERN']['text'].values() for v in val['values'] ]
+            if 'BERN' in x['status'].keys() else None
+            for x in st
+        ],
+        'BONN':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['BONN']['text'].values() for v in val['values'] ]
+            if 'BONN' in x['status'].keys() else None
+            for x in st
+        ],
+        'DH':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['DH']['text'].values() for v in val['values'] ]
+            if 'DH' in x['status'].keys() else None
+            for x in st
+        ],
+        'DO':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['DO']['text'].values() for v in val['values'] ]
+            if 'DO' in x['status'].keys() else None
+            for x in st
+        ],
+        'LRE':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['LRE']['text'].values() for v in val['values'] ]
+            if 'LRE' in x['status'].keys() else None
+            for x in st
+        ],
+        'LRM':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['LRM']['text'].values() for v in val['values'] ]
+            if 'LRM' in x['status'].keys() else None
+            for x in st
+        ],
+        'LRN':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['LRN']['text'].values() for v in val['values'] ]
+            if 'LRN' in x['status'].keys() else None
+            for x in st
+        ],
+        'LRR':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['LRR']['text'].values() for v in val['values'] ]
+            if 'LRR' in x['status'].keys() else None
+            for x in st
+        ],
+        'PAPNAT':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['PAPNAT']['text'].values() for v in val['values'] ]
+            if 'PAPNAT' in x['status'].keys() else None
+            for x in st
+        ],
+        'PD':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['PD']['text'].values() for v in val['values'] ]
+            if 'PD' in x['status'].keys() else None
+            for x in st
+        ],
+        'PNA':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['PNA']['text'].values() for v in val['values'] ]
+            if 'PNA' in x['status'].keys() else None
+            for x in st
+        ],
+        'PR':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['PR']['text'].values() for v in val['values'] ]
+            if 'PR' in x['status'].keys() else None
+            for x in st
+        ],
+        'REGL':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['REGL']['text'].values() for v in val['values'] ]
+            if 'REGL' in x['status'].keys() else None
+            for x in st
+        ],
+        'REGLII':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['REGLII']['text'].values() for v in val['values'] ]
+            if 'REGLII' in x['status'].keys() else None
+            for x in st
+        ],
+        'REGLLUTTE':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['REGLLUTTE']['text'].values() for v in val['values'] ]
+            if 'REGLLUTTE' in x['status'].keys() else None
+            for x in st
+        ],
+        'REGLSO':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['REGLSO']['text'].values() for v in val['values'] ]
+            if 'REGLSO' in x['status'].keys() else None
+            for x in st
+        ],
+        'SCAP NAT':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['SCAP NAT']['text'].values() for v in val['values'] ]
+            if 'SCAP NAT' in x['status'].keys() else None
+            for x in st
+        ],
+        'SCAP REG':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['SCAP REG']['text'].values() for v in val['values'] ]
+            if 'SCAP REG' in x['status'].keys() else None
+            for x in st
+        ], 
+        'SENSNAT':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['SENSNAT']['text'].values() for v in val['values'] ]
+            if 'SENSNAT' in x['status'].keys() else None
+            for x in st
+        ],
+        'ZDET':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['ZDET']['text'].values() for v in val['values'] ]
+            if 'ZDET' in x['status'].keys() else None
+            for x in st
+        ],
+        'exPNA':[
+            [val['values'][v]['code_statut'] 
+            for val in x['status']['exPNA']['text'].values() for v in val['values'] ]
+            if 'exPNA' in x['status'].keys() else None
+            for x in st
+        ]
+    }
+    return pd.DataFrame({**phylo,**cd_status})

+dict_dep = {
+    '38':'Isère',
+    '42':'Loire',
+    '07':'Ardèche',
+    '26':'Drôme',
+}
+    

+if __name__ == "__main__":
+    # Définition de la connection à la bdd GéoNature
+    from pycen import con_gn
+    # NOT USE FOR NOW - API Taxref
+    api_taxref = 'https://geonature.cen-isere.fr/taxhub/api/taxref'

-df.drop(columns=['full_citation'],inplace=True)
-df.replace({
-   r'[’]':"'",
-   r'[àáâãäå]':'a',
-   r'[èéêë]':'e',
-   r'[ìíîï]':'i',
-   r'[òóôõö]':'o',
-   r'[ùúûü]':'u',
-   r'[–]':"-"
-},regex=True,inplace=True)
+    # Paramètres de chargement du fichier des taxons
+    PATH = '/home/colas/Documents/tmp/CHARVAS'
+    file = 'liste_sp_CHAR.xlsx'
+    sheet = 'liste_sp'

-DF = df.copy()
+    # Liste des CD_NOM en entrée
+    cd_col = 'cd_ref'   # Nom de la colonne à utiliser dans le feuillet ``sheet``

-# ['cd_nom', 'cd_ref', 'rq_statut', 'code_statut', 'label_statut',
-# 'cd_type_statut', 'thematique', 'lb_type_statut', 'regroupement_type',
-# 'cd_st_text', 'cd_sig', 'cd_doc', 'niveau_admin', 'cd_iso3166_1',
-# 'cd_iso3166_2', 'doc_url', 'type_value']
-pivot = pd.pivot_table(
-    DF,
-    values='code_statut',
-    index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr'
-    ],
-    columns=['cd_type_statut'],
-    aggfunc=list,fill_value=None)
-for c in pivot.columns:
-    pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]]
-pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']]
-pivot.DH.replace({'CDH':''},regex=True,inplace=True)
+    # Lecture des données
+    taxlist = pd.read_excel(os.path.join(PATH,file),sheet,usecols=[cd_col],header=0)
+    tab_sp = pd.read_excel(os.path.join(PATH,file),sheet,index_col=cd_col)
+    lst =  taxlist[cd_col]
+    
+    # Récupération des statuts
+    df = get_status(taxlist[cd_col].astype(str),con_gn)
+    
+    # Distinction LRR [old vs new] région
+    is_lrr = df.cd_type_statut == 'LRR'
+    df.loc[is_lrr & (df.niveau_admin == 'Région'),'cd_type_statut'] = 'LRR_AURA'
+    df.loc[is_lrr & (df.niveau_admin == 'Ancienne région'),'cd_type_statut'] = 'LRR_RA'
+    del df['niveau_admin']

-pivlib = pd.pivot_table(
-    DF,
-    values='label_statut',
-    index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr'
-    ],
-    columns=['cd_type_statut'],
-    aggfunc=list,fill_value=None)
-for c in pivlib.columns:
-    pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]]
-pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']]
-pivlib.DH.replace({'CDH':''},regex=True,inplace=True)
+    for c in ['cd_ref','cd_nom','lb_nom']:
+        if c in tab_sp.columns:
+        # if 'cd_nom' not in df.columns and c == 'cd_ref': continue
+            tab_sp.drop(c,axis=1,inplace=True)

-print('INIT writer')
-with pd.ExcelWriter(NAME_OUT) as writer:
-    DF.to_excel(
-        writer,sheet_name='v_bdc_status'
-        )
-    # writer.save()
-    print('v_bdc_status OK !')
-    pivot.to_excel(
-        writer,sheet_name='pivot_table'
-        )
-    # writer.save()
-    print('pivot_table OK !')
-    pivlib.to_excel(
-        writer,sheet_name='pivot_libel'
-        )
-    writer.save()
-    print('pivot_libel OK !')
+    pivot = pd.pivot_table(
+        df,
+        values='code_statut',
+        index=['cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr'
+        ],
+        columns=['cd_type_statut'],
+        aggfunc=list,fill_value=None)
+
+    for c in pivot.columns:
+        pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]]
+    if 'DH' in pivot.columns:
+        pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']]
+        pivot.DH.replace({'CDH':''},regex=True,inplace=True)
+
+    pivot = tab_sp.merge(pivot,on=[cd_col],how='left')
+
+    pivlib = pd.pivot_table(
+        df,
+        values='label_statut',
+        index=[
+            'cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr'
+        ],
+        columns=['cd_type_statut'],
+        aggfunc=list,fill_value=None)
+    for c in pivlib.columns:
+        pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]]
+    if 'DH' in pivot.columns:
+        pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']]
+        pivlib.DH.replace({'CDH':''},regex=True,inplace=True)
+
+    pivlib = tab_sp.merge(pivlib,on=[cd_col],how='left')
+
+    print('INIT writer')
+    NAME_OUT = os.path.join(PATH,sheet+'_status.xlsx')
+    with pd.ExcelWriter(NAME_OUT) as writer:
+        df.to_excel(
+            writer,sheet_name='v_bdc_status',index=False
+            )
+        # writer.save()
+        print('v_bdc_status OK !')
+        pivot.to_excel(
+            writer,sheet_name='pivot_table'
+            )
+        # writer.save()
+        print('pivot_table OK !')
+        pivlib.to_excel(
+            writer,sheet_name='pivot_libel'
+            )
+        # writer.save()
+        print('pivot_libel OK !')