V2
This commit is contained in:
parent
21149360e2
commit
fdad4a0961
@ -1,78 +1,289 @@
|
||||
import pandas as pd
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
import requests
|
||||
import numpy as np
|
||||
from pycen import con_gn
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def get_status(lst,con):
|
||||
sql = """
|
||||
SELECT
|
||||
t.cd_nom,
|
||||
t.cd_ref,
|
||||
t.regne,
|
||||
t.phylum,
|
||||
t.classe,
|
||||
t.ordre,
|
||||
t.famille,
|
||||
t.group1_inpn,
|
||||
t.group2_inpn,
|
||||
t.group3_inpn,
|
||||
t.nom_vern,
|
||||
t.nom_complet,
|
||||
t.nom_valide,
|
||||
t.lb_nom,
|
||||
--s.*
|
||||
s.rq_statut,
|
||||
s.code_statut,
|
||||
s.cd_type_statut,
|
||||
s.label_statut,
|
||||
s.niveau_admin,
|
||||
s.full_citation,
|
||||
s.doc_url
|
||||
FROM taxonomie.taxref t
|
||||
JOIN taxonomie.v_bdc_status s USING (cd_nom)
|
||||
WHERE t.cd_nom IN {cd_nom}
|
||||
;""".format(cd_nom = tuple(lst))
|
||||
return pd.read_sql_query(sql,con)
|
||||
|
||||
NAME_OUT = '/home/colas/Documents/tmp/v_bdc_status2.xlsx'
|
||||
FileIn = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/BDC_STATUTS_15.xls'
|
||||
db = False
|
||||
def get_api_status(api,cd_nom:int):
|
||||
res = requests.api.get('%s/%i'%(api,cd_nom))
|
||||
if res.status_code == 200:
|
||||
return res.json()
|
||||
else :
|
||||
raise('Error : %i\tcd_nom : %i'%(res.status_code,cd_nom))
|
||||
|
||||
if db :
|
||||
sql = "SELECT * FROM taxonomie.v_bdc_status"
|
||||
df = pd.read_sql_query(sql,con_gn)
|
||||
if FileIn is not None :
|
||||
df = pd.read_excel(FileIn)
|
||||
df.columns = df.columns.str.lower()
|
||||
geo = ['Isère','Rhône-Alpes','France','France métropolitaine',np.nan]
|
||||
df = df[df.lb_adm_tr.isin(geo)]
|
||||
def get_taxon_status(lst,api):
|
||||
from datetime import datetime as dt
|
||||
init = dt.now()
|
||||
st = [get_api_status(api,x) for x in lst] # TOO LONG
|
||||
print(dt.now()-init)
|
||||
phylo = {
|
||||
'cd_ref':[x['cd_ref'] for x in st],
|
||||
'nom_valide':[x['nom_valide'] if 'nom_valide' in x.keys() else None for x in st],
|
||||
'nom_vernac':[x['nom_vern'] if 'nom_vern' in x.keys() else None for x in st],
|
||||
'regne':[x['regne'] if 'regne' in x.keys() else None for x in st],
|
||||
'group1_inp':[x['group1_inpn'] if 'group1_inpn' in x.keys() else None for x in st],
|
||||
'group2_inp':[x['group2_inp'] if 'group2_inp' in x.keys() else None for x in st],
|
||||
'group3_inpn':[x['group3_inpn'] for x in st],
|
||||
'classe':[x['classe'] if 'classe' in x.keys() else None for x in st],
|
||||
'ordre':[x['ordre'] if 'ordre' in x.keys() else None for x in st],
|
||||
'famille':[x['famille'] if 'famille' in x.keys() else None for x in st]}
|
||||
cd_status = {
|
||||
'AL':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['AL']['text'].values() for v in val['values'] ]
|
||||
if 'AL' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'BERN':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['BERN']['text'].values() for v in val['values'] ]
|
||||
if 'BERN' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'BONN':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['BONN']['text'].values() for v in val['values'] ]
|
||||
if 'BONN' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'DH':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['DH']['text'].values() for v in val['values'] ]
|
||||
if 'DH' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'DO':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['DO']['text'].values() for v in val['values'] ]
|
||||
if 'DO' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'LRE':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['LRE']['text'].values() for v in val['values'] ]
|
||||
if 'LRE' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'LRM':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['LRM']['text'].values() for v in val['values'] ]
|
||||
if 'LRM' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'LRN':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['LRN']['text'].values() for v in val['values'] ]
|
||||
if 'LRN' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'LRR':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['LRR']['text'].values() for v in val['values'] ]
|
||||
if 'LRR' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'PAPNAT':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['PAPNAT']['text'].values() for v in val['values'] ]
|
||||
if 'PAPNAT' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'PD':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['PD']['text'].values() for v in val['values'] ]
|
||||
if 'PD' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'PNA':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['PNA']['text'].values() for v in val['values'] ]
|
||||
if 'PNA' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'PR':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['PR']['text'].values() for v in val['values'] ]
|
||||
if 'PR' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'REGL':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['REGL']['text'].values() for v in val['values'] ]
|
||||
if 'REGL' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'REGLII':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['REGLII']['text'].values() for v in val['values'] ]
|
||||
if 'REGLII' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'REGLLUTTE':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['REGLLUTTE']['text'].values() for v in val['values'] ]
|
||||
if 'REGLLUTTE' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'REGLSO':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['REGLSO']['text'].values() for v in val['values'] ]
|
||||
if 'REGLSO' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'SCAP NAT':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['SCAP NAT']['text'].values() for v in val['values'] ]
|
||||
if 'SCAP NAT' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'SCAP REG':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['SCAP REG']['text'].values() for v in val['values'] ]
|
||||
if 'SCAP REG' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'SENSNAT':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['SENSNAT']['text'].values() for v in val['values'] ]
|
||||
if 'SENSNAT' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'ZDET':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['ZDET']['text'].values() for v in val['values'] ]
|
||||
if 'ZDET' in x['status'].keys() else None
|
||||
for x in st
|
||||
],
|
||||
'exPNA':[
|
||||
[val['values'][v]['code_statut']
|
||||
for val in x['status']['exPNA']['text'].values() for v in val['values'] ]
|
||||
if 'exPNA' in x['status'].keys() else None
|
||||
for x in st
|
||||
]
|
||||
}
|
||||
return pd.DataFrame({**phylo,**cd_status})
|
||||
|
||||
dict_dep = {
|
||||
'38':'Isère',
|
||||
'42':'Loire',
|
||||
'07':'Ardèche',
|
||||
'26':'Drôme',
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Définition de la connection à la bdd GéoNature
|
||||
from pycen import con_gn
|
||||
# NOT USE FOR NOW - API Taxref
|
||||
api_taxref = 'https://geonature.cen-isere.fr/taxhub/api/taxref'
|
||||
|
||||
df.drop(columns=['full_citation'],inplace=True)
|
||||
df.replace({
|
||||
r'[’]':"'",
|
||||
r'[àáâãäå]':'a',
|
||||
r'[èéêë]':'e',
|
||||
r'[ìíîï]':'i',
|
||||
r'[òóôõö]':'o',
|
||||
r'[ùúûü]':'u',
|
||||
r'[–]':"-"
|
||||
},regex=True,inplace=True)
|
||||
# Paramètres de chargement du fichier des taxons
|
||||
PATH = '/home/colas/Documents/tmp/CHARVAS'
|
||||
file = 'liste_sp_CHAR.xlsx'
|
||||
sheet = 'liste_sp'
|
||||
|
||||
DF = df.copy()
|
||||
# Liste des CD_NOM en entrée
|
||||
cd_col = 'cd_ref' # Nom de la colonne à utiliser dans le feuillet ``sheet``
|
||||
|
||||
# ['cd_nom', 'cd_ref', 'rq_statut', 'code_statut', 'label_statut',
|
||||
# 'cd_type_statut', 'thematique', 'lb_type_statut', 'regroupement_type',
|
||||
# 'cd_st_text', 'cd_sig', 'cd_doc', 'niveau_admin', 'cd_iso3166_1',
|
||||
# 'cd_iso3166_2', 'doc_url', 'type_value']
|
||||
pivot = pd.pivot_table(
|
||||
DF,
|
||||
values='code_statut',
|
||||
index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr'
|
||||
],
|
||||
columns=['cd_type_statut'],
|
||||
aggfunc=list,fill_value=None)
|
||||
for c in pivot.columns:
|
||||
pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]]
|
||||
pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']]
|
||||
pivot.DH.replace({'CDH':''},regex=True,inplace=True)
|
||||
# Lecture des données
|
||||
taxlist = pd.read_excel(os.path.join(PATH,file),sheet,usecols=[cd_col],header=0)
|
||||
tab_sp = pd.read_excel(os.path.join(PATH,file),sheet,index_col=cd_col)
|
||||
lst = taxlist[cd_col]
|
||||
|
||||
# Récupération des statuts
|
||||
df = get_status(taxlist[cd_col].astype(str),con_gn)
|
||||
|
||||
# Distinction LRR [old vs new] région
|
||||
is_lrr = df.cd_type_statut == 'LRR'
|
||||
df.loc[is_lrr & (df.niveau_admin == 'Région'),'cd_type_statut'] = 'LRR_AURA'
|
||||
df.loc[is_lrr & (df.niveau_admin == 'Ancienne région'),'cd_type_statut'] = 'LRR_RA'
|
||||
del df['niveau_admin']
|
||||
|
||||
pivlib = pd.pivot_table(
|
||||
DF,
|
||||
values='label_statut',
|
||||
index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr'
|
||||
],
|
||||
columns=['cd_type_statut'],
|
||||
aggfunc=list,fill_value=None)
|
||||
for c in pivlib.columns:
|
||||
pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]]
|
||||
pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']]
|
||||
pivlib.DH.replace({'CDH':''},regex=True,inplace=True)
|
||||
for c in ['cd_ref','cd_nom','lb_nom']:
|
||||
if c in tab_sp.columns:
|
||||
# if 'cd_nom' not in df.columns and c == 'cd_ref': continue
|
||||
tab_sp.drop(c,axis=1,inplace=True)
|
||||
|
||||
print('INIT writer')
|
||||
with pd.ExcelWriter(NAME_OUT) as writer:
|
||||
DF.to_excel(
|
||||
writer,sheet_name='v_bdc_status'
|
||||
)
|
||||
# writer.save()
|
||||
print('v_bdc_status OK !')
|
||||
pivot.to_excel(
|
||||
writer,sheet_name='pivot_table'
|
||||
)
|
||||
# writer.save()
|
||||
print('pivot_table OK !')
|
||||
pivlib.to_excel(
|
||||
writer,sheet_name='pivot_libel'
|
||||
)
|
||||
writer.save()
|
||||
print('pivot_libel OK !')
|
||||
pivot = pd.pivot_table(
|
||||
df,
|
||||
values='code_statut',
|
||||
index=['cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr'
|
||||
],
|
||||
columns=['cd_type_statut'],
|
||||
aggfunc=list,fill_value=None)
|
||||
|
||||
for c in pivot.columns:
|
||||
pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]]
|
||||
if 'DH' in pivot.columns:
|
||||
pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']]
|
||||
pivot.DH.replace({'CDH':''},regex=True,inplace=True)
|
||||
|
||||
pivot = tab_sp.merge(pivot,on=[cd_col],how='left')
|
||||
|
||||
pivlib = pd.pivot_table(
|
||||
df,
|
||||
values='label_statut',
|
||||
index=[
|
||||
'cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr'
|
||||
],
|
||||
columns=['cd_type_statut'],
|
||||
aggfunc=list,fill_value=None)
|
||||
for c in pivlib.columns:
|
||||
pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]]
|
||||
if 'DH' in pivot.columns:
|
||||
pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']]
|
||||
pivlib.DH.replace({'CDH':''},regex=True,inplace=True)
|
||||
|
||||
pivlib = tab_sp.merge(pivlib,on=[cd_col],how='left')
|
||||
|
||||
print('INIT writer')
|
||||
NAME_OUT = os.path.join(PATH,sheet+'_status.xlsx')
|
||||
with pd.ExcelWriter(NAME_OUT) as writer:
|
||||
df.to_excel(
|
||||
writer,sheet_name='v_bdc_status',index=False
|
||||
)
|
||||
# writer.save()
|
||||
print('v_bdc_status OK !')
|
||||
pivot.to_excel(
|
||||
writer,sheet_name='pivot_table'
|
||||
)
|
||||
# writer.save()
|
||||
print('pivot_table OK !')
|
||||
pivlib.to_excel(
|
||||
writer,sheet_name='pivot_libel'
|
||||
)
|
||||
# writer.save()
|
||||
print('pivot_libel OK !')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user