diff --git a/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py b/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py index 9629f13..1f49c1e 100644 --- a/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py +++ b/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py @@ -1,12 +1,192 @@ #!/usr/bin/env python3 # -*- coding: UTF-8 -*- -from pycen import update_to_sql, con_sicen +from pycen import update_to_sql, con_sicen,con_gn import geopandas as gpd import os DIR = '/home/colas/Documents/9_PROJETS/4_SICEN/GN_MIGRATION' +def form_cdnom(data): + dict_cdnom = { + 116744:521658, # Flore : Quercus petraea + } + data.replace({'cd_nom':dict_cdnom},inplace=True) + data.replace({'cd_ref':dict_cdnom},inplace=True) + +def form_complx_grp(data): + dict_cdnom = { + 9999005:105817, # Leucanthemum vulgare (#groupe) + 9999014:119097, # Rubus fruticosus (#groupe) + 9999017:717630, # Taraxacum officinale (#groupe) + 9999019:126573, # Thymus serpyllum (#groupe) + 9999020:129298, # Vicia sativa (#groupe) + 9999023:188772, # Acrocephalus palustris / scirpaceus (#complexe) + 9999031:441709, # Cairina moschata f. domestica (#forme) + 9999033:190350, # Carduelis flammea flammea / cabaret / Carduelis hornemanni (#complexe) + 9999037:191029, # Colias alfacariensis / hyale (#complexe) + 9999041:4503, # Corvus corone corone / cornix (#complexe) + 9999042:186239, # Eptesicus / Nyctalus sp. (#complexe) + 9999046:192539, # Felis silvestris / catus (#complexe) + 9999050:193993, # Leptidea sinapis / reali (#complexe) + 9999054:194481, # Martes martes / foina (#complexe) + 9999057:195005, # Myotis myotis / blythii (#complexe) + 9999063:444436, # Pelophylax kl. esculentus / lessonae (#complexe) + 9999064:4280, # Phylloscopus collybita tristis / "abietinus" (#complexe) + 9999066:196980, # Pyrgus malvae / malvoides (#complexe) + 9999074:197040, # Rana dalmatina / temporaria (#complexe) + 9999075:196296, # Pipistrellus nathusii / kuhlii (#complexe) + 9999080:194357, # Lysandra coridon / hispana (#complexe) + 9999082:195005, # Myotis daubentonii / Myotis mystacinus (#complexe) + 9999083:699094, # Pipistrellus / Miniopterus (#complexe) + } + lst_cdnom_old = [*dict_cdnom.keys()] + + if data.cd_nom.isin(lst_cdnom_old).any(): + data.loc[data.cd_nom.isin(lst_cdnom_old),'complexe_groupe'] = data[data.cd_nom.isin(lst_cdnom_old)].nom_complet + data.replace({'cd_nom':dict_cdnom},inplace=True) + + sql = 'SELECT cd_nom, nom_complet nom_new FROM taxonomie.taxref where cd_nom in {}'.format(tuple(dict_cdnom.values())) + tax = gpd.pd.read_sql(sql, con_gn) + data = data.merge(tax,how='left',on='cd_nom') + data.loc[data.nom_new.notna(),'nom_latin'] = data[data.nom_new.notna()].nom_new + data.drop(columns='nom_new',inplace=True) + +def form_precision(data): + dict_pre = { + 'GPS':0, + '0 à 10m':10, + '10 à 100m':100, + '100 à 500m':500, + 'lieu-dit':750, + 'commune':None, + } + is_com = data.precision =='commune' + rmq_null = data.rmq_localisation.isnull() + data.loc[is_com&rmq_null,'rmq_localisation'] = 'Localisation : commune' + data.loc[is_com&~rmq_null,'rmq_localisation'] = 'Localisation : commune ;'+data[is_com].rmq_localisation + data.replace({'precision':dict_pre},inplace=True) + data.precision = data.precision.astype('Int64') + + +def form_effectif(data): + eff_notna = data.effectif.notna() + efmin_notna = data.effectif_min.notna() + efmax_isna = data.effectif_max.isna() + data.loc[(~eff_notna)&efmin_notna,'effectif'] = data[(~eff_notna)&efmin_notna].effectif_min + data.loc[(~eff_notna)&(~efmax_isna),'effectif'] = data[(~eff_notna)&(~efmax_isna)].effectif_max + # data.loc[efmax_isna,'effectif_max'] = data[efmax_isna].effectif + data.effectif = data.effectif.astype('Int64') + data.effectif_max = data.effectif_max.astype('Int64') + +def form_date(data): + cols = data.columns[data.columns.str.contains('date')] + for col in cols: + data.loc[data[col].isna(),col] = None + +def recup_stadevie(data): + dict_repro = { + 'ODO_Exuvie/émergence':'Exuvie/émergence', + 'ODO_Immature':'Immature', + **dict.fromkeys(['ODO_Mâles+Femelles','ODO_Tandem','ODO_Territorial','ODO_Ponte'],'Adulte'), + **dict.fromkeys(['CHIR_Indéterminé','ODO_Indéterminé'],'Indéterminé') + } + if 'age_faune' in data.columns: + age_isna = data.age_faune.isna() + age_inrmq = data.rmq_observation.str.contains('Stade de vie') + data.loc[age_isna&age_inrmq,'age_faune'] = (data[age_isna&age_inrmq].rmq_observation + .str.split('Stade de vie :') + .str[1] + .str.split('|') + .str[0] + .str.strip()) + double_info = data.age_faune.isin(['Imago, adulte', 'Nymphe, immature']) + d2 = data[double_info].copy() + + data.age_faune = data.age_faune.str.replace('Imago, adulte','Imago') + data.age_faune = data.age_faune.str.replace('Nymphe, immature','Nymphe') + + d2.age_faune = d2.age_faune.str.replace('Imago, adulte','Adulte') + d2.age_faune = d2.age_faune.str.replace('Nymphe, immature','Immature') + + data = gpd.pd.concat([data,d2]) + + lst_age = [*dict_repro.keys()] + age_isna = data.age_faune.isna() + t1 = age_isna&(data.reprostatut_faune.isin(lst_age)) + data.loc[t1,'age_faune'] = data[t1].reprostatut_faune.replace(dict_repro) + + t2 = data.age_faune=='Exuvie/émergence' + is_exuvie = (data.rmq_observation + .replace(['é','E','É'],'e',regex=True) + .str.contains('exuvie',na=False)) + is_emerge = (data.rmq_observation + .replace(['é','E','É'],'e',regex=True) + .str.contains('emerge',na=False)) + rmq_isna = data.rmq_observation.isna() + + data.loc[is_exuvie&~is_emerge&t2,'age_faune'] = 'Exuvie' + data.loc[~is_exuvie&is_emerge&t2,'age_faune'] = 'Emergent' + data.loc[is_exuvie&is_emerge&t2,'age_faune'] = 'Exuvie' + data.loc[is_exuvie&is_emerge&rmq_isna&t2,'age_faune'] = 'Emergent' + + t3 = data.age_faune=='Oeuf/ponte/larve/nymphe/chenille...' + # Odonate + is_odo = data.ordre == 'Odonata' + data.loc[t3&is_odo,'age_faune'] = 'Exuvie' + # Amphibiens + is_amphi = data.group2_inpn == 'Amphibiens' + is_larve = data.rmq_observation.str.contains('larve',na=False) + is_ponte = data.rmq_observation.str.contains('ponte',na=False) + is_tetar = data.rmq_observation.replace('ê','e',regex=True).str.contains('ponte',na=False) + is_urode = data.ordre=='Urodela' + is_anure = data.ordre=='Anura' + rmq_na = data.rmq_observation.isna() + if 'obj_denombre' not in data.columns: + data['obj_denombre'] = None + data.loc[t3&is_amphi&is_larve,'age_faune'] = 'Larve' + data.loc[t3&is_amphi&is_ponte,'age_faune'] = 'Oeufs' + data.loc[t3&is_amphi&is_ponte,'obj_denombre'] = 'Ponte' + data.loc[t3&is_amphi&is_tetar,'age_faune'] = 'Têtard' + data.loc[t3&is_amphi&is_urode&rmq_na,'age_faune'] = 'Larve' + data.loc[t3&is_amphi&is_anure&rmq_na,'age_faune'] = 'Ponte' + return data + + +def recup_comptmt(data): + if 'reprostatut_faune' in data.columns: + compt_isna = data.reprostatut_faune.isna() + in_rmq = data.rmq_observation.str.contains('Comp. ind') + data.loc[compt_isna&in_rmq,'comportement'] = (data[compt_isna&in_rmq].rmq_observation + .str.split('Comp. ind. :') + .str[1] + .str.split('|') + .str[0] + .str.strip()) + + sex_isna = data.sexe_faune.isna() + comp_isna = data.comportement.isna() + lst_comp = ['ODO_Tandem','ODO_Territorial','INV_Accouplement','INV_Chant (orthoptères)','ODO_Ponte'] + t3 = (~compt_isna)&comp_isna&sex_isna&(data.reprostatut_faune.isin(lst_comp)) + data.loc[t3,'comportement'] = data[t3].reprostatut_faune + + +def isole_rnngl(): + return + + +def format_faune(data): + recup_comptmt(data) + return recup_stadevie(data) + +def format_data(data): + form_precision(data) + form_effectif(data) + form_cdnom(data) + form_date(data) + form_complx_grp(data) + return format_faune(data) + def export(path_name, data, format='csv'): detect_date = data.columns[data.columns.str.startswith('date')] data[detect_date] = data[detect_date].astype(str) @@ -17,25 +197,96 @@ def export(path_name, data, format='csv'): df.to_csv(path_name+'.%s'%format) else: raise('format non prévu') + +def serena_rnngl_site(): + from sqlalchemy import create_engine # pour lecture de la bd + from sqlalchemy.engine import URL + from shapely.geometry import Polygon + + usr = 'postgres' + pdw = 'postgres' + bdd = 'serenadb' + host = '172.17.0.2' + eng = URL.create('postgresql+psycopg2',username=usr,password=pdw,host=host,database=bdd) + conn = create_engine(eng) + sit = gpd.pd.read_sql_table('rnf_site',con=conn,schema='serenabase') + sit['poly'] = (sit + .site_poly.str[9:] + .str.split(',')) + + sit.loc[sit.poly.notna(),'geom'] = (sit.loc[sit.poly.notna(),'poly'] + .apply(lambda x: [xx.split(' ') for xx in x if xx]) + .apply(lambda x: [[float(xxx) for xxx in xx] for xx in x ]) + .apply(lambda x: Polygon(x))) + + return (sit + .set_geometry('geom',crs=4326) + .to_crs(2154) + .dropna(subset=['geom'])) + +if __name__ == "__main__": + + v_synthese_invertebre = 'v_synthese_invertebre' + v_synthese_vertebre = 'v_synthese_vertebre' + v_synthese_flore = 'v_synthese_flore' + + sit_rnngl = serena_rnngl_site() + sql_exclude_rnngl = " (rmq_localisation NOT ILIKE '%%lemps%%' OR NOT ST_INTERSECTS(geom,'SRID=2154;{}'))".format(sit_rnngl.unary_union) + sql_exclude_rnngl = " rmq_localisation NOT ILIKE '%%grand%%lemps%%'" + + sql_inv = "SELECT * FROM saisie.%s WHERE cd_nom <> '9999081'"%v_synthese_invertebre # 9999081 : Heterocera sp. + v_inv = gpd.read_postgis(sql_inv+" AND"+sql_exclude_rnngl,con_sicen) + sql_ver = "SELECT * FROM saisie.%s WHERE cd_nom <> '9999056'"%v_synthese_vertebre # 9999056 : Micromammalia sp. + v_ver = gpd.read_postgis(sql_ver+" AND"+sql_exclude_rnngl,con_sicen) + sql_flo = 'SELECT * FROM saisie.%s'%v_synthese_flore + v_flo = gpd.read_postgis(sql_flo+" WHERE"+sql_exclude_rnngl,con_sicen).dropna(how='all',axis=1) + + v_inv = format_data(v_inv) + format_data(v_ver) + format_data(v_flo) + + # INVERTEBRE + for etude in v_inv.etude.unique(): + exp_inv = v_inv[v_inv.etude==etude].copy() + exp_inv.dropna(how='all',inplace=True,axis=1) + if 'complexe_groupe' in exp_inv.columns: + exp_inv1 = exp_inv[exp_inv.complexe_groupe.notna()] + exp_inv2 = exp_inv[exp_inv.complexe_groupe.isna()].dropna(how='all',axis=1) + export(os.path.join(DIR,'INVERTEBRE',etude+'_GRP'),exp_inv1,format='csv') + export(os.path.join(DIR,'INVERTEBRE',etude),exp_inv2,format='csv') + else : + export(os.path.join(DIR,'INVERTEBRE',etude),exp_inv,format='csv') + + # VERTEBRE + for etude in v_ver.etude.unique(): + exp_ver = v_ver[v_ver.etude==etude].copy() + exp_ver.dropna(how='all',inplace=True,axis=1) + if 'complexe_groupe' in exp_ver.columns: + exp_ver1 = exp_ver[exp_ver.complexe_groupe.notna()] + exp_ver2 = exp_ver[exp_ver.complexe_groupe.isna()].dropna(how='all',axis=1) + export(os.path.join(DIR,'VERTEBRE',etude+'_GRP'),exp_ver1,format='csv') + export(os.path.join(DIR,'VERTEBRE',etude),exp_ver2,format='csv') + else : + export(os.path.join(DIR,'VERTEBRE',etude),exp_ver,format='csv') + + # FLORE + for etude in v_flo.etude.unique(): + exp_flo = v_flo[v_flo.etude==etude].copy() + exp_flo.dropna(how='all',inplace=True,axis=1) + if 'complexe_groupe' in exp_flo.columns: + exp_flo1 = exp_flo[exp_flo.complexe_groupe.notna()] + exp_flo2 = exp_flo[exp_flo.complexe_groupe.isna()].dropna(how='all',axis=1) + export(os.path.join(DIR,'FLORE',etude+'_GRP'),exp_flo1,format='csv') + export(os.path.join(DIR,'FLORE',etude),exp_flo2,format='csv') + else : + export(os.path.join(DIR,'FLORE',etude),exp_flo,format='csv') -v_synthese_invertebre = 'v_synthese_invertebre' -v_synthese_vertebre = 'v_synthese_vertebre' -v_synthese_flore = 'v_synthese_flore' +# export(os.path.join(DIR,v_synthese_invertebre+'2'),v_inv.dropna(how='all',axis=1)) +# export(os.path.join(DIR,v_synthese_vertebre),v_ver.dropna(how='all',axis=1)) +# export(os.path.join(DIR,v_synthese_flore),v_flo) -sql = 'SELECT * FROM saisie.%s'%v_synthese_invertebre -v_inv = gpd.read_postgis(sql,con_sicen) -sql = 'SELECT * FROM saisie.%s'%v_synthese_vertebre -v_ver = gpd.read_postgis(sql,con_sicen) -sql = 'SELECT * FROM saisie.%s'%v_synthese_flore -v_flo = gpd.read_postgis(sql,con_sicen) - -export(os.path.join(DIR,v_synthese_invertebre),v_inv) -export(os.path.join(DIR,v_synthese_vertebre),v_ver) -export(os.path.join(DIR,v_synthese_flore),v_flo) - - -v_ver.etude.unique() -v_ver.protocole.unique() -v_ver.lot_donnee.unique() +# v_ver.etude.unique() +# v_ver.protocole.unique() +# v_ver.lot_donnee.unique()