Python_scripts/1_SICEN/MIGRATION GEONATURE/classification_group.py

322 lines
13 KiB
Python

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
from pycen import con_sicen
import geopandas as gpd
DICT_TAXREF_OLDNAME = {
116744 : 521658, # Quercus petraea
# 9999056 : 0, # Micromammalia sp.
9999077 : 183820, # Arachnida sp.
9999078 : 187496, # Fungi sp.
# 9999081 : 0, # Heterocera sp.
9999058 : 195005, # Myotis sp.
}
DICT_TAXREF = {
# 116744:{'cd_nom':521658,'lb_nom':'Quercus petraea (Matt.) Liebl., 1784'}, # Quercus petraea
# 9999077:{'cd_nom':183820,'lb_nom':'Arachnida Cuvier, 1812'}, # Arachnida sp.
# Espèce domestique
9999031 : {'cd_nom':441709,'lb_nom':'Cairina moschata (Linnaeus, 1758)'}, # Cairina moschata f. domestica
# GROUPE
9999005: {'cd_nom':194069,'lb_nom':'Leucanthemum Mill., 1754'}, # Leucanthemum vulgare (#groupe) (#ref)
9999014: {'cd_nom':197281,'lb_nom':'Rubus L., 1753 [nom. et typ. cons.]'}, # Rubus fruticosus (#groupe) (#ref)
9999017: {'cd_nom':198226,'lb_nom':'Taraxacum F.H.Wigg., 1780'}, # Taraxacum officinale (#groupe) (#ref)
9999019: {'cd_nom':198449,'lb_nom':'Thymus L., 1753'}, # Thymus serpyllum (#groupe) (#ref)
9999020: {'cd_nom':198902,'lb_nom':'Vicia L., 1753'}, # Vicia sativa (#groupe) (#ref)
# COMPLEXE
9999075: {'cd_nom':196296,'lb_nom':'Pipistrellus Kaup, 1829'}, # Pipistrellus nathusii / kuhlii (#complexe) (#ref)
9999074: {'cd_nom':197040,'lb_nom':'Rana Linnaeus, 1758'}, # Rana dalmatina / temporaria (#complexe) (#ref)
9999057: {'cd_nom':195005,'lb_nom':'Myotis Kaup, 1829'}, # Myotis myotis / blythii (#complexe) (#ref)
9999050: {'cd_nom':193993,'lb_nom':'Leptidea Billberg, 1820'}, # Leptidea sinapis / reali (#complexe) (#ref)
9999066: {'cd_nom':196980,'lb_nom':'Pyrgus Hübner, 1819'}, # Pyrgus malvae / malvoides (#complexe) (#ref)
9999063: {'cd_nom':444436,'lb_nom':'Pelophylax Fitzinger, 1843'}, # Pelophylax kl. esculentus / lessonae (#complex...
9999054: {'cd_nom':194481,'lb_nom':'Martes Pinel, 1792'}, # Martes martes / foina (#complexe) (#ref)
9999037: {'cd_nom':191029,'lb_nom':'Colias Fabricius, 1807'}, # Colias alfacariensis / hyale (#complexe) (#ref)
9999023: {'cd_nom':188772,'lb_nom':'Acrocephalus J. A. Naumann & J. F. Naumann, 1811'}, # Acrocephalus palustris / scirpaceus (#complexe...
9999064: {'cd_nom':4280,'lb_nom':'Phylloscopus collybita (Vieillot, 1817)'}, # Phylloscopus collybita tristis / "abietinus" ...
9999046: {'cd_nom':192539,'lb_nom':'Felis Linnaeus, 1758'}, # Felis silvestris / catus (#complexe) (#ref)
9999080: {'cd_nom':194357,'lb_nom':'Lysandra Hemming, 1933'},# Lysandra coridon / hispana (#complexe)
9999033: {'cd_nom':886228,'lb_nom':'Acanthis Borkhausen, 1797'}, # Carduelis flammea flammea / cabaret / Carduelis hornemanni (#complexe)
9999041: {'cd_nom':4503,'lb_nom':'Corvus corone Linnaeus, 1758'}, # Corvus corone corone / cornix (#complexe)
9999042: {'cd_nom':186239,'lb_nom':'Vespertilionidae J. E. Gray, 1821'}, # Eptesicus / Nyctalus sp. (#complexe) (#ref)
9999082: {'cd_nom':195005,'lb_nom':'Myotis Kaup, 1829'}, # Myotis daubentonii/Myotis mystacinus (#complexe)
9999083: {'cd_nom':699094,'lb_nom':'Yangochiroptera Koopman, 1985'}, # Pipistrellus/Miniopterus (#complexe)
}
def get_pheno_genre(esp_fx, df):
nom = esp_fx
return
def _vm_synthese_observations_(where=''):
# Lecture des données
sql = 'SELECT * FROM saisie.vm_synthese_observations '
df = gpd.read_postgis(sql+where,con_sicen)
# drop Micromammalia sp.
drp = df.loc[df.cd_nom == 9999056].index
df.drop(drp,inplace=True)
is_group = df.nom_complet.str.contains('#group')
is_complex = df.nom_complet.str.contains('#complex')
is_domestic = df.nom_complet.str.contains('domestic')
lst_group = [*df[is_group].nom_complet.unique()]
lst_complex = [*df[is_complex].nom_complet.unique()]
lst_domestic = [*df[is_domestic].nom_complet.unique()]
# df['group'] = None
# df['complex'] = None
# df.loc[is_group,'group'] = df[is_group].nom_complet.copy()
# df.loc[is_complex,'complex'] = df[is_complex].nom_complet.copy()
df['rmq_taxonomie'] = None
df.loc[is_complex|is_group|is_domestic,'rmq_taxonomie'] = df[is_complex|is_group|is_domestic].nom_complet.copy()
for cd in df[is_complex|is_group|is_domestic].cd_nom.unique():
lb = df[df.cd_nom==cd].nom_complet.unique()[0]
cdn = {cd:DICT_TAXREF[cd]['cd_nom']}
lbn = {lb:DICT_TAXREF[cd]['lb_nom']}
df.nom_complet.replace(lbn,inplace=True)
df.nom_latin.replace(lbn,inplace=True)
df.cd_nom.replace(cdn,inplace=True)
# Transform geom TO WKT format
df['geom_wkt'] = df.geometry.to_wkt()
# Récup code insee département
df['insee_dep'] = df.insee_commune.str[:2]
# Mise en forme de la précision de la données
df.precision = format_precision(df.precision)
# Actualisation des cd_nom
df.cd_nom.replace(DICT_TAXREF_OLDNAME,inplace=True)
# Mise en forme des effectifs
df.effectif = format_effectif(df.effectif.copy())
df.effectif_min = format_effectif(df.effectif_min.copy())
df.effectif_max = format_effectif(df.effectif_max.copy())
df.loc[df.effectif.isna(),'effectif'] = df[df.effectif.isna()].effectif_min
df.effectif.fillna('1',inplace=True)
df.loc[df.effectif_max.isna(),'effectif_max'] = df[df.effectif_max.isna()].effectif
df.effectif = df.effectif.astype(int)
df.effectif_max = df.effectif_max.astype(int)
check_effectif = df['effectif'].astype(float) > df['effectif_min'].astype(float)
if not df[check_effectif].empty:
print('WARNING : "effectif" > "effectif_min"')
print(' nrows : %s'%df[check_effectif].shape[0])
df.loc[check_effectif,'effectif_max'] = df[check_effectif].effectif.copy()
df.loc[check_effectif,'effectif'] = df[check_effectif].effectif_min.copy()
print( 'effectif TO effectif_max : OK !')
df.effectif_min = df.effectif.copy()
return df
def format_precision(lst):
return lst.copy().replace({
'GPS': '5',
'0 à 10m': '5',
'10 à 100m': '50',
'100 à 500m': '250',
'500 à 1000m': '750',
'> 1000m': '2000',
'lieu-dit': '2500',
'commune': '5000',
})
def format_effectif(lst):
lst.loc[lst.notna()] = lst[lst.notna()].astype(int).astype(str)
return lst
def additional_data(df,columns):
df['additional_data'] = df[columns].to_json(orient="records",force_ascii=False)
df.drop(columns=columns,inplace=True)
return df
class flore:
def all():
w = "WHERE regne = 'Plantae'"
res = _vm_synthese_observations_(w)
# return additional_data(res,['strate_flore','effectif_textuel'])
return res
def vasculaire():
w = "WHERE group1_inpn = 'Plantes vasculaires'"
res = _vm_synthese_observations_(w)
# return additional_data(res,['strate_flore','effectif_textuel'])
return res
def bryophyte():
w = "WHERE group1_inpn = 'Bryophytes'"
res = _vm_synthese_observations_(w)
# return additional_data(res,['strate_flore','effectif_textuel'])
return res
def characee():
w = "WHERE famille = 'Characeae'"
res = _vm_synthese_observations_(w)
# return additional_data(res,['strate_flore','effectif_textuel'])
return res
class faune:
def all():
w = "WHERE regne = 'Animalia'"
return _vm_synthese_observations_(w)
class invertebre:
def all():
w = "WHERE regne = 'Animalia' AND phylum <> 'Chordata'"
return _vm_synthese_observations_(w)
def odonate():
w = "WHERE ordre = 'Odonata'"
return _vm_synthese_observations_(w)
def orthoptere():
w = "WHERE ordre = 'Orthoptera'"
return _vm_synthese_observations_(w)
def lepidoptere():
w = "WHERE ordre = 'Lepidoptera'"
return _vm_synthese_observations_(w)
def mollusque():
w = "WHERE group1_inpn = 'Mollusques'"
return _vm_synthese_observations_(w)
def crustacees():
w = "WHERE group2_inpn = 'Crustacés'"
return _vm_synthese_observations_(w)
def ascomycete():
w = "WHERE ordre = 'Ascomycètes'"
return _vm_synthese_observations_(w)
def autre():
w = """
WHERE regne = 'Animalia'
AND phylum <> 'Chordata'
AND ordre <> 'Odonata'
AND ordre <> 'Orthoptera'
AND ordre <> 'Lepidoptera'
AND group1_inpn <> 'Mollusques'
AND group2_inpn <> 'Crustacés'
AND ordre <> 'Ascomycètes'
"""
return _vm_synthese_observations_(w)
class vertebre:
def all():
w = "WHERE regne = 'Animalia' AND phylum = 'Chordata'"
return _vm_synthese_observations_(w)
def mammifere():
w = "WHERE group2_inpn = 'Mammifères' AND ordre <> 'Chiroptera'"
return _vm_synthese_observations_(w)
def chiroptere():
w = "WHERE ordre = 'Chiroptera'"
return _vm_synthese_observations_(w)
def oiseau():
w = "WHERE group2_inpn = 'Oiseaux'"
return _vm_synthese_observations_(w)
def reptile():
w = "WHERE group2_inpn = 'Reptiles'"
return _vm_synthese_observations_(w)
def amphibien():
w = "WHERE group2_inpn = 'Amphibiens'"
return _vm_synthese_observations_(w)
def poisson():
w = "WHERE group2_inpn = 'Poissons'"
return _vm_synthese_observations_(w)
def champignon():
w = "WHERE regne IN ('Fungi','Chromista')"
return _vm_synthese_observations_(w)
def export_csv(df,path_name):
df.dropna(how='all',axis=1,inplace=True)
drop_cols = [
'regne','phylum','classe','ordre','famille','group1_inpn','group2_inpn','cd_ref',
'altitude_z','longitude_x','latitude_y','geom','nom_commune',
'etude','lot_donnee','observateurs','structures'
]
d = df.columns[df.columns.isin(drop_cols)]
df.drop(columns=d,inplace=True)
if df.shape[0] > 50000:
from math import ceil
loop = ceil(df.shape[0]/50000)
i = 0
for j in range(1,loop+1):
p = path_name.replace('.csv','_%s.csv'%j)
jj = j*50000
df.iloc[i:jj].to_csv(p,index=False)
i = jj
else:
df.to_csv(path_name,index=False)
if __name__ == "__main__":
test = False
PATH = '/home/colas/Documents/9_PROJETS/4_SICEN/GN_MIGRATION/'
al = flore.all()
va = flore.vasculaire()
br = flore.bryophyte()
ch = flore.characee()
export_csv(al,PATH+'FLORE/all.csv')
# export_csv(va[va.effectif< va.effectif_max].copy(),PATH+'FLORE/test_eff_vasculaire.csv')
export_csv(va,PATH+'FLORE/vasculaire.csv')
export_csv(br,PATH+'FLORE/bryophyte.csv')
export_csv(ch,PATH+'FLORE/characee.csv')
al.shape[0] - (va.shape[0]+br.shape[0]+ch.shape[0])
fa = faune.all()
fia = faune.invertebre.all()
fva = faune.vertebre.all()
fa.shape[0] - (fia.shape[0] + fva.shape[0])
if test :
fa[~fa.id_obs.isin([*fia.id_obs,*fva.id_obs])]
ca = champignon()
vm = _vm_synthese_observations_()
date_cols = vm.columns[vm.columns.str.contains('date')]
mtd = vm[['lot_donnee','protocole','etude']].drop_duplicates()
vm.shape[0] - (al.shape[0]+fa.shape[0]+ca.shape[0])
if test :
vm[~vm.id_obs.isin([*al.id_obs,*fa.id_obs,*ca.id_obs])]
# Sub verification
fiod = faune.invertebre.odonate()
fior = faune.invertebre.orthoptere()
file = faune.invertebre.lepidoptere()
fimo = faune.invertebre.mollusque()
ficr = faune.invertebre.crustacees()
fias = faune.invertebre.ascomycete()
fiau = faune.invertebre.autre()
fia.shape[0] - (
fiod.shape[0] + fior.shape[0] + file.shape[0] + fimo.shape[0] + ficr.shape[0] + fias.shape[0] + fiau.shape[0]
)
if test :
fia[~fia.id_obs.isin([*fiod.id_obs,*fior.id_obs,*file.id_obs,*fimo.id_obs,*ficr.id_obs,*fias.id_obs,*fiau.id_obs])]
fvma = faune.vertebre.mammifere()
fvch = faune.vertebre.chiroptere()
fvoi = faune.vertebre.oiseau()
fvre = faune.vertebre.reptile()
fvam = faune.vertebre.amphibien()
fvpo = faune.vertebre.poisson()
fva.shape[0] - (
fvma.shape[0] + fvch.shape[0] + fvoi.shape[0] + fvre.shape[0] + fvam.shape[0] + fvpo.shape[0]
)
if test :
fva[~fva.id_obs.isin([*fvma.id_obs,*fvch.id_obs,*fvoi.id_obs,*fvre.id_obs,*fvam.id_obs,*fvpo.id_obs])]