322 lines
13 KiB
Python
322 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
from pycen import con_sicen
|
|
import geopandas as gpd
|
|
|
|
DICT_TAXREF_OLDNAME = {
|
|
116744 : 521658, # Quercus petraea
|
|
# 9999056 : 0, # Micromammalia sp.
|
|
9999077 : 183820, # Arachnida sp.
|
|
9999078 : 187496, # Fungi sp.
|
|
# 9999081 : 0, # Heterocera sp.
|
|
9999058 : 195005, # Myotis sp.
|
|
}
|
|
|
|
DICT_TAXREF = {
|
|
# 116744:{'cd_nom':521658,'lb_nom':'Quercus petraea (Matt.) Liebl., 1784'}, # Quercus petraea
|
|
# 9999077:{'cd_nom':183820,'lb_nom':'Arachnida Cuvier, 1812'}, # Arachnida sp.
|
|
|
|
# Espèce domestique
|
|
9999031 : {'cd_nom':441709,'lb_nom':'Cairina moschata (Linnaeus, 1758)'}, # Cairina moschata f. domestica
|
|
|
|
# GROUPE
|
|
9999005: {'cd_nom':194069,'lb_nom':'Leucanthemum Mill., 1754'}, # Leucanthemum vulgare (#groupe) (#ref)
|
|
9999014: {'cd_nom':197281,'lb_nom':'Rubus L., 1753 [nom. et typ. cons.]'}, # Rubus fruticosus (#groupe) (#ref)
|
|
9999017: {'cd_nom':198226,'lb_nom':'Taraxacum F.H.Wigg., 1780'}, # Taraxacum officinale (#groupe) (#ref)
|
|
9999019: {'cd_nom':198449,'lb_nom':'Thymus L., 1753'}, # Thymus serpyllum (#groupe) (#ref)
|
|
9999020: {'cd_nom':198902,'lb_nom':'Vicia L., 1753'}, # Vicia sativa (#groupe) (#ref)
|
|
|
|
# COMPLEXE
|
|
9999075: {'cd_nom':196296,'lb_nom':'Pipistrellus Kaup, 1829'}, # Pipistrellus nathusii / kuhlii (#complexe) (#ref)
|
|
9999074: {'cd_nom':197040,'lb_nom':'Rana Linnaeus, 1758'}, # Rana dalmatina / temporaria (#complexe) (#ref)
|
|
9999057: {'cd_nom':195005,'lb_nom':'Myotis Kaup, 1829'}, # Myotis myotis / blythii (#complexe) (#ref)
|
|
9999050: {'cd_nom':193993,'lb_nom':'Leptidea Billberg, 1820'}, # Leptidea sinapis / reali (#complexe) (#ref)
|
|
9999066: {'cd_nom':196980,'lb_nom':'Pyrgus Hübner, 1819'}, # Pyrgus malvae / malvoides (#complexe) (#ref)
|
|
9999063: {'cd_nom':444436,'lb_nom':'Pelophylax Fitzinger, 1843'}, # Pelophylax kl. esculentus / lessonae (#complex...
|
|
9999054: {'cd_nom':194481,'lb_nom':'Martes Pinel, 1792'}, # Martes martes / foina (#complexe) (#ref)
|
|
9999037: {'cd_nom':191029,'lb_nom':'Colias Fabricius, 1807'}, # Colias alfacariensis / hyale (#complexe) (#ref)
|
|
9999023: {'cd_nom':188772,'lb_nom':'Acrocephalus J. A. Naumann & J. F. Naumann, 1811'}, # Acrocephalus palustris / scirpaceus (#complexe...
|
|
9999064: {'cd_nom':4280,'lb_nom':'Phylloscopus collybita (Vieillot, 1817)'}, # Phylloscopus collybita tristis / "abietinus" ...
|
|
9999046: {'cd_nom':192539,'lb_nom':'Felis Linnaeus, 1758'}, # Felis silvestris / catus (#complexe) (#ref)
|
|
9999080: {'cd_nom':194357,'lb_nom':'Lysandra Hemming, 1933'},# Lysandra coridon / hispana (#complexe)
|
|
9999033: {'cd_nom':886228,'lb_nom':'Acanthis Borkhausen, 1797'}, # Carduelis flammea flammea / cabaret / Carduelis hornemanni (#complexe)
|
|
9999041: {'cd_nom':4503,'lb_nom':'Corvus corone Linnaeus, 1758'}, # Corvus corone corone / cornix (#complexe)
|
|
9999042: {'cd_nom':186239,'lb_nom':'Vespertilionidae J. E. Gray, 1821'}, # Eptesicus / Nyctalus sp. (#complexe) (#ref)
|
|
9999082: {'cd_nom':195005,'lb_nom':'Myotis Kaup, 1829'}, # Myotis daubentonii/Myotis mystacinus (#complexe)
|
|
9999083: {'cd_nom':699094,'lb_nom':'Yangochiroptera Koopman, 1985'}, # Pipistrellus/Miniopterus (#complexe)
|
|
}
|
|
|
|
def get_pheno_genre(esp_fx, df):
|
|
nom = esp_fx
|
|
return
|
|
|
|
|
|
|
|
def _vm_synthese_observations_(where=''):
|
|
# Lecture des données
|
|
sql = 'SELECT * FROM saisie.vm_synthese_observations '
|
|
df = gpd.read_postgis(sql+where,con_sicen)
|
|
|
|
# drop Micromammalia sp.
|
|
drp = df.loc[df.cd_nom == 9999056].index
|
|
df.drop(drp,inplace=True)
|
|
|
|
is_group = df.nom_complet.str.contains('#group')
|
|
is_complex = df.nom_complet.str.contains('#complex')
|
|
is_domestic = df.nom_complet.str.contains('domestic')
|
|
lst_group = [*df[is_group].nom_complet.unique()]
|
|
lst_complex = [*df[is_complex].nom_complet.unique()]
|
|
lst_domestic = [*df[is_domestic].nom_complet.unique()]
|
|
|
|
# df['group'] = None
|
|
# df['complex'] = None
|
|
# df.loc[is_group,'group'] = df[is_group].nom_complet.copy()
|
|
# df.loc[is_complex,'complex'] = df[is_complex].nom_complet.copy()
|
|
df['rmq_taxonomie'] = None
|
|
df.loc[is_complex|is_group|is_domestic,'rmq_taxonomie'] = df[is_complex|is_group|is_domestic].nom_complet.copy()
|
|
|
|
for cd in df[is_complex|is_group|is_domestic].cd_nom.unique():
|
|
lb = df[df.cd_nom==cd].nom_complet.unique()[0]
|
|
cdn = {cd:DICT_TAXREF[cd]['cd_nom']}
|
|
lbn = {lb:DICT_TAXREF[cd]['lb_nom']}
|
|
|
|
df.nom_complet.replace(lbn,inplace=True)
|
|
df.nom_latin.replace(lbn,inplace=True)
|
|
df.cd_nom.replace(cdn,inplace=True)
|
|
|
|
# Transform geom TO WKT format
|
|
df['geom_wkt'] = df.geometry.to_wkt()
|
|
# Récup code insee département
|
|
df['insee_dep'] = df.insee_commune.str[:2]
|
|
# Mise en forme de la précision de la données
|
|
df.precision = format_precision(df.precision)
|
|
# Actualisation des cd_nom
|
|
df.cd_nom.replace(DICT_TAXREF_OLDNAME,inplace=True)
|
|
|
|
# Mise en forme des effectifs
|
|
df.effectif = format_effectif(df.effectif.copy())
|
|
df.effectif_min = format_effectif(df.effectif_min.copy())
|
|
df.effectif_max = format_effectif(df.effectif_max.copy())
|
|
df.loc[df.effectif.isna(),'effectif'] = df[df.effectif.isna()].effectif_min
|
|
df.effectif.fillna('1',inplace=True)
|
|
df.loc[df.effectif_max.isna(),'effectif_max'] = df[df.effectif_max.isna()].effectif
|
|
df.effectif = df.effectif.astype(int)
|
|
df.effectif_max = df.effectif_max.astype(int)
|
|
|
|
check_effectif = df['effectif'].astype(float) > df['effectif_min'].astype(float)
|
|
if not df[check_effectif].empty:
|
|
print('WARNING : "effectif" > "effectif_min"')
|
|
print(' nrows : %s'%df[check_effectif].shape[0])
|
|
|
|
df.loc[check_effectif,'effectif_max'] = df[check_effectif].effectif.copy()
|
|
df.loc[check_effectif,'effectif'] = df[check_effectif].effectif_min.copy()
|
|
print( 'effectif TO effectif_max : OK !')
|
|
|
|
df.effectif_min = df.effectif.copy()
|
|
|
|
return df
|
|
|
|
|
|
def format_precision(lst):
|
|
return lst.copy().replace({
|
|
'GPS': '5',
|
|
'0 à 10m': '5',
|
|
'10 à 100m': '50',
|
|
'100 à 500m': '250',
|
|
'500 à 1000m': '750',
|
|
'> 1000m': '2000',
|
|
'lieu-dit': '2500',
|
|
'commune': '5000',
|
|
})
|
|
|
|
def format_effectif(lst):
|
|
lst.loc[lst.notna()] = lst[lst.notna()].astype(int).astype(str)
|
|
return lst
|
|
|
|
|
|
def additional_data(df,columns):
|
|
df['additional_data'] = df[columns].to_json(orient="records",force_ascii=False)
|
|
df.drop(columns=columns,inplace=True)
|
|
return df
|
|
|
|
|
|
class flore:
|
|
|
|
def all():
|
|
w = "WHERE regne = 'Plantae'"
|
|
res = _vm_synthese_observations_(w)
|
|
# return additional_data(res,['strate_flore','effectif_textuel'])
|
|
return res
|
|
|
|
def vasculaire():
|
|
w = "WHERE group1_inpn = 'Plantes vasculaires'"
|
|
res = _vm_synthese_observations_(w)
|
|
# return additional_data(res,['strate_flore','effectif_textuel'])
|
|
return res
|
|
|
|
def bryophyte():
|
|
w = "WHERE group1_inpn = 'Bryophytes'"
|
|
res = _vm_synthese_observations_(w)
|
|
# return additional_data(res,['strate_flore','effectif_textuel'])
|
|
return res
|
|
|
|
def characee():
|
|
w = "WHERE famille = 'Characeae'"
|
|
res = _vm_synthese_observations_(w)
|
|
# return additional_data(res,['strate_flore','effectif_textuel'])
|
|
return res
|
|
|
|
|
|
class faune:
|
|
def all():
|
|
w = "WHERE regne = 'Animalia'"
|
|
return _vm_synthese_observations_(w)
|
|
|
|
class invertebre:
|
|
def all():
|
|
w = "WHERE regne = 'Animalia' AND phylum <> 'Chordata'"
|
|
return _vm_synthese_observations_(w)
|
|
|
|
def odonate():
|
|
w = "WHERE ordre = 'Odonata'"
|
|
return _vm_synthese_observations_(w)
|
|
def orthoptere():
|
|
w = "WHERE ordre = 'Orthoptera'"
|
|
return _vm_synthese_observations_(w)
|
|
def lepidoptere():
|
|
w = "WHERE ordre = 'Lepidoptera'"
|
|
return _vm_synthese_observations_(w)
|
|
def mollusque():
|
|
w = "WHERE group1_inpn = 'Mollusques'"
|
|
return _vm_synthese_observations_(w)
|
|
def crustacees():
|
|
w = "WHERE group2_inpn = 'Crustacés'"
|
|
return _vm_synthese_observations_(w)
|
|
def ascomycete():
|
|
w = "WHERE ordre = 'Ascomycètes'"
|
|
return _vm_synthese_observations_(w)
|
|
def autre():
|
|
w = """
|
|
WHERE regne = 'Animalia'
|
|
AND phylum <> 'Chordata'
|
|
AND ordre <> 'Odonata'
|
|
AND ordre <> 'Orthoptera'
|
|
AND ordre <> 'Lepidoptera'
|
|
AND group1_inpn <> 'Mollusques'
|
|
AND group2_inpn <> 'Crustacés'
|
|
AND ordre <> 'Ascomycètes'
|
|
"""
|
|
return _vm_synthese_observations_(w)
|
|
|
|
class vertebre:
|
|
def all():
|
|
w = "WHERE regne = 'Animalia' AND phylum = 'Chordata'"
|
|
return _vm_synthese_observations_(w)
|
|
|
|
def mammifere():
|
|
w = "WHERE group2_inpn = 'Mammifères' AND ordre <> 'Chiroptera'"
|
|
return _vm_synthese_observations_(w)
|
|
def chiroptere():
|
|
w = "WHERE ordre = 'Chiroptera'"
|
|
return _vm_synthese_observations_(w)
|
|
def oiseau():
|
|
w = "WHERE group2_inpn = 'Oiseaux'"
|
|
return _vm_synthese_observations_(w)
|
|
def reptile():
|
|
w = "WHERE group2_inpn = 'Reptiles'"
|
|
return _vm_synthese_observations_(w)
|
|
def amphibien():
|
|
w = "WHERE group2_inpn = 'Amphibiens'"
|
|
return _vm_synthese_observations_(w)
|
|
def poisson():
|
|
w = "WHERE group2_inpn = 'Poissons'"
|
|
return _vm_synthese_observations_(w)
|
|
|
|
|
|
def champignon():
|
|
w = "WHERE regne IN ('Fungi','Chromista')"
|
|
return _vm_synthese_observations_(w)
|
|
|
|
|
|
def export_csv(df,path_name):
|
|
df.dropna(how='all',axis=1,inplace=True)
|
|
drop_cols = [
|
|
'regne','phylum','classe','ordre','famille','group1_inpn','group2_inpn','cd_ref',
|
|
'altitude_z','longitude_x','latitude_y','geom','nom_commune',
|
|
'etude','lot_donnee','observateurs','structures'
|
|
]
|
|
d = df.columns[df.columns.isin(drop_cols)]
|
|
df.drop(columns=d,inplace=True)
|
|
if df.shape[0] > 50000:
|
|
from math import ceil
|
|
loop = ceil(df.shape[0]/50000)
|
|
i = 0
|
|
for j in range(1,loop+1):
|
|
p = path_name.replace('.csv','_%s.csv'%j)
|
|
jj = j*50000
|
|
df.iloc[i:jj].to_csv(p,index=False)
|
|
i = jj
|
|
else:
|
|
df.to_csv(path_name,index=False)
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test = False
|
|
PATH = '/home/colas/Documents/9_PROJETS/4_SICEN/GN_MIGRATION/'
|
|
al = flore.all()
|
|
va = flore.vasculaire()
|
|
br = flore.bryophyte()
|
|
ch = flore.characee()
|
|
|
|
export_csv(al,PATH+'FLORE/all.csv')
|
|
# export_csv(va[va.effectif< va.effectif_max].copy(),PATH+'FLORE/test_eff_vasculaire.csv')
|
|
export_csv(va,PATH+'FLORE/vasculaire.csv')
|
|
export_csv(br,PATH+'FLORE/bryophyte.csv')
|
|
export_csv(ch,PATH+'FLORE/characee.csv')
|
|
al.shape[0] - (va.shape[0]+br.shape[0]+ch.shape[0])
|
|
|
|
fa = faune.all()
|
|
fia = faune.invertebre.all()
|
|
fva = faune.vertebre.all()
|
|
fa.shape[0] - (fia.shape[0] + fva.shape[0])
|
|
if test :
|
|
fa[~fa.id_obs.isin([*fia.id_obs,*fva.id_obs])]
|
|
|
|
ca = champignon()
|
|
|
|
vm = _vm_synthese_observations_()
|
|
date_cols = vm.columns[vm.columns.str.contains('date')]
|
|
mtd = vm[['lot_donnee','protocole','etude']].drop_duplicates()
|
|
|
|
vm.shape[0] - (al.shape[0]+fa.shape[0]+ca.shape[0])
|
|
if test :
|
|
vm[~vm.id_obs.isin([*al.id_obs,*fa.id_obs,*ca.id_obs])]
|
|
|
|
|
|
# Sub verification
|
|
fiod = faune.invertebre.odonate()
|
|
fior = faune.invertebre.orthoptere()
|
|
file = faune.invertebre.lepidoptere()
|
|
fimo = faune.invertebre.mollusque()
|
|
ficr = faune.invertebre.crustacees()
|
|
fias = faune.invertebre.ascomycete()
|
|
fiau = faune.invertebre.autre()
|
|
fia.shape[0] - (
|
|
fiod.shape[0] + fior.shape[0] + file.shape[0] + fimo.shape[0] + ficr.shape[0] + fias.shape[0] + fiau.shape[0]
|
|
)
|
|
if test :
|
|
fia[~fia.id_obs.isin([*fiod.id_obs,*fior.id_obs,*file.id_obs,*fimo.id_obs,*ficr.id_obs,*fias.id_obs,*fiau.id_obs])]
|
|
|
|
fvma = faune.vertebre.mammifere()
|
|
fvch = faune.vertebre.chiroptere()
|
|
fvoi = faune.vertebre.oiseau()
|
|
fvre = faune.vertebre.reptile()
|
|
fvam = faune.vertebre.amphibien()
|
|
fvpo = faune.vertebre.poisson()
|
|
fva.shape[0] - (
|
|
fvma.shape[0] + fvch.shape[0] + fvoi.shape[0] + fvre.shape[0] + fvam.shape[0] + fvpo.shape[0]
|
|
)
|
|
if test :
|
|
fva[~fva.id_obs.isin([*fvma.id_obs,*fvch.id_obs,*fvoi.id_obs,*fvre.id_obs,*fvam.id_obs,*fvpo.id_obs])]
|
|
|