Python_scripts/tmp/AVALON - aggreg data.py

import pandas as pd
import geopandas as gpd
from pycen import con_gn
from os import path

dict_sicen = {
    'nom latin/complet':'nom_complet',
    'date obs.':'date_debut',
    'règne':'regne',
    'observateurs':'observateu',
    'nom usuel/simplifié':'nom_vernac',
}
dict_biodiv = {
    'Salix alba L., 1753 [nom. et typ. cons.]':'Salix alba L., 1753',
    'Euphorbia verrucosa L., 1753':'Euphorbia flavicoma subsp. verrucosa (Fiori) Pignatti, 1973',
    'Ononis spinosa L., 1753 [nom. et typ. cons.]':'Ononis spinosa L., 1753',
    'Schedonorus arundinaceus (Schreb.) Dumort., 1824 [nom. cons.]':'Schedonorus arundinaceus (Schreb.) Dumort., 1824',
}
dict_gn = {
    'group1_inp':'group1_inpn',
    'group2_inp':'group2_inpn'
}

PATH = "/media/colas/SRV/FICHIERS/SITES/SITES GERES/AVAL_AVALON/Scientifique et technique/Données historiques/Marais d'Avalon"

biodiv1 = gpd.read_file(path.join(PATH,'Biodi_Aura_Mare Ratier','POINT_2023_12_13_08h35m51.shp'))
# biodiv2 = gpd.read_file(path.join(PATH,'Biodiv_Aura_Marais','POINT_2023_12_13_08h41m24.shp'))
biodiv3 = gpd.read_file(path.join(PATH,'Biodiv_Aura_Muraillat','POINT_2023_12_13_08h39m22.shp'))
biodiv = pd.concat([biodiv1,biodiv3])
# biodiv.nom_valide.replace(dict_biodiv,inplace=True)


gn = gpd.read_file(path.join(PATH,'Geonature_ENS Avalon','POINT_2023_12_13_08h41m24.shp'),encoding='Windows-1252')
gn.rename(columns=dict_gn,inplace=True)

sicen = pd.read_excel(path.join(PATH,'SICEN','vtOv_WyE.xlsx'),'undefined')
sicen.columns = sicen.columns.str.lower()
sicen.rename(columns=dict_sicen,inplace=True)

sql = """
SELECT cd_nom,cd_ref, group1_inpn,group2_inpn FROM taxonomie.taxref
WHERE cd_nom IN {}
"""
tax = pd.read_sql(sql.format(tuple(sicen.cd_nom.unique())),con_gn)
sicen = sicen.merge(tax,on='cd_nom',how='left',copy=True)

sql = """
SELECT cd_nom,cd_ref,regne, group1_inpn,group2_inpn FROM taxonomie.taxref
WHERE cd_nom IN {}
"""
tax = pd.read_sql(sql.format(tuple(biodiv.cd_ref.unique())),con_gn)
biodiv = biodiv.merge(tax,right_on='cd_nom',left_on='cd_ref',how='left',copy=False,suffixes=('_x', ''))


keep_cols = ['date_debut','cd_nom','cd_ref','nom_valide','nom_vernac','regne','group1_inpn','group2_inpn','classe','ordre','famille','observateu']

df = pd.concat([
    gn[keep_cols],
    sicen[keep_cols],
    biodiv[keep_cols],
])

df.date_debut = pd.to_datetime(df.date_debut)
before = df[df.date_debut<'2016-01-01']
after = df[df.date_debut>='2016-01-01']
before.date_debut = before.date_debut.astype(str)
after.date_debut = after.date_debut.astype(str)
before.to_excel(path.join(PATH,'avant 2016.xlsx'),encoding='Windows-1252')
after.to_excel(path.join(PATH,'2016 et +.xlsx'),encoding='Windows-1252')