191 lines
6.5 KiB
Python

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import pandas_access as mdb # pour lecture de la bd medwet
from os import path
from pycen import con_sicen, update_to_sql
import pandas as pd
# access / postgis
SOURCE = 'postgis'
postgis_sch = 'serenabase'
DICT_RLV_COLS = {
'relv_id':'id_etude',
'relv_nom':'nom_etude',
'relv_prop_libel':'proprietaire',
'relv_1date_c':'date_debut',
'relv_2date_c':'date_fin',
'relv_comment':'description',
}
DICT_OBS_COLS = {
'obse_id':'id_origine',
'obse_relv_id':'id_etude',
}
def list_access_datatable():
PATH_IN = '/media/colas/SRV/FICHIERS/OUTILS/BASES DE DONNEES/ESPECES/backup'
BDD_IN = 'AVENIR.serena_V2.mdb'
LST_SCHEMA = mdb.read_schema(path.join(PATH_IN,BDD_IN))
LST_TABLE = [ *LST_SCHEMA.keys() ]
LST_TABLE = mdb.list_tables(path.join(PATH_IN,BDD_IN))
TABLE_DATA= []
for TABLE in LST_TABLE:
DTYPE = {x : float for x in LST_SCHEMA[TABLE].keys() if LST_SCHEMA[TABLE][x]=='Long Integer'}
df = mdb.read_table(path.join(PATH_IN,BDD_IN),TABLE, dtype=DTYPE)
if df.shape[0] == 0:
print('%s IS EMPTY !'%TABLE)
else :
TABLE_DATA += [TABLE]
return TABLE_DATA
def list_releve(source='postgis'):
if source=='postgis':
choi = pd.read_sql_table('rnf_choi',con_sicen,postgis_sch)\
.dropna(axis=1,how='all')
relv = pd.read_sql_table('rnf_relv',con_sicen,postgis_sch)\
.dropna(axis=1,how='all')
# Correction coquille
relv.loc[relv.relv_categ_choi_id==100800,'relv_categ_choi_id'] = 100008
# Jointure relevé / catégorie
df = relv.merge(choi[['choi_id','choi_nom']],how='inner',left_on='relv_categ_choi_id',right_on='choi_id')\
.drop(columns=['relv_categ_choi_id'])
# Mise en forme des dates
lstcol_dat = df.columns[df.columns.str.contains('date')]
for lcd in lstcol_dat:
# df[lcd] = df[lcd].replace({'VIDE':None})
df.loc[df[lcd]=='2000 à 2001',lcd] = '2000' if '1date' in lcd else '2001'
df[lcd].replace({'VIDE':None},inplace=True)
df[lcd] = pd.to_datetime(df[lcd])
return df
def get_serena_obs(source='postgis'):
if source=='postgis':
obs = pd.read_sql_table('rnf_obse',con_sicen,postgis_sch)
return obs
def get_sicen_obs():
return pd.read_sql_table('saisie_observation',con_sicen,'saisie')
if __name__ == "__main__":
if SOURCE == 'access':
lst_tab = list_access_datatable()
elif SOURCE == 'postgis':
lstrelv_serena = list_releve()
obs_serena = get_serena_obs()
obs_serena = obs_serena.merge(lstrelv_serena[['relv_id','relv_nom']],left_on='obse_relv_id',right_on='relv_id')\
.drop(columns='relv_id')
# Isolation des relevés HORS RhoMéo
is_relv = [x for x in lstrelv_serena.relv_nom.str[:3].unique() if not x.isnumeric()]
relv_serena = lstrelv_serena[lstrelv_serena.relv_nom.str[:3].isin(is_relv)]
# Formatage des relevés HORS RhoMéo
relv_2sicen = relv_serena.rename(columns=DICT_RLV_COLS).filter(DICT_RLV_COLS.values(),axis=1)
relv_2sicen.sort_values('id_etude',inplace=True)
relv_2sicen.reset_index(inplace=True,drop=True)
relv_2sicen.set_index(-relv_2sicen.index,inplace=True)
# # Intégration des relevés SERENA dans la table 'etude'
# relv_2sicen.drop(columns=['id_etude','proprietaire'])\
# .rename_axis('id_etude')\
# .to_sql('etude',con_sicen,'md',if_exists='append')
# Récupération des données SICEN2
# id_lot = 'SERENA'
sicen_obs = get_sicen_obs()
is_serena = sicen_obs.id_lot==3
is_horsetude = sicen_obs.id_etude==2
lst_idorigine = sicen_obs[is_serena].id_origine.astype(int).tolist()
# isolation des données "suposées" non présentes
obs_notinsicen = obs_serena[~obs_serena.obse_id.isin(lst_idorigine)].copy()
# rhomeo_notinsicen = obs_notinsicen.obse_nom.str.startswith('RhoMéO',na=False)
# obs_notinsicen[rhomeo_notinsicen]
# test = obs_notinsicen.obse_site_id == 101335
# obs_notinsicen[test]
sql1 = "SELECT * FROM {sch}.tmp_ogll WHERE ogll_obse_id IN {lst} AND ogll_lat <> '999'"
res1 = pd.read_sql_query(sql1.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_id)),con_sicen)
sql2 = "SELECT * FROM {sch}.tmp_sgll WHERE sgll_site_id IN {lst} AND sgll_lat <> '999'"
res2 = pd.read_sql_query(sql2.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_site_id.unique())),con_sicen)
miss_serena1 = obs_notinsicen.merge(
res1.rename(columns={
'ogll_obse_id':'obse_id',
'ogll_lat':'lat',
'ogll_lon':'lon'
}),
on='obse_id')
miss_serena2 = obs_notinsicen.merge(
res2.rename(columns={
'sgll_site_id':'obse_site_id',
'sgll_lat':'lat',
'sgll_lon':'lon'
}),
on='obse_site_id')
miss_serena = pd.concat([miss_serena1,miss_serena2])
rhomeo_missserena = miss_serena.obse_nom.str.startswith('RhoMéO',na=False)
miss_serena[rhomeo_missserena]
# Formatages des données OBS
obs_2sicen = obs_serena[obs_serena.obse_relv_id.isin(relv_serena.relv_id)].rename(columns=DICT_OBS_COLS).filter(DICT_OBS_COLS.values(),axis=1)
obs_2sicen['id_lot'] = 3
obs_2sicen.id_origine = obs_2sicen.id_origine.astype(str)
# Correspondance id_relv (SERENA) / id_etude (SICEN2)
DICT_ID_ETUDE = dict(zip(relv_2sicen.id_etude,relv_2sicen.index))
obs_2sicen.id_etude.replace(DICT_ID_ETUDE,inplace=True)
# Mise à jour du champs id_etude de la table saisie_observation
update_to_sql(
obs_2sicen[obs_2sicen.id_origine.astype(int).isin(lst_idorigine)],
con_sicen,
'saisie_observation',
'saisie',
['id_lot','id_origine']
)
# identification des données restantes taguées "hors etude"
id_origine_he = sicen_obs[is_serena&is_horsetude].id_origine.astype(int)
is_horsetude2 = obs_serena.obse_id.isin(id_origine_he)
res_he = obs_serena[is_horsetude2].copy()
DICT2_OBS_COLS = {**DICT_OBS_COLS,'relv_nom':'id_waypoint'}
obs2_2sicen = res_he.rename(columns=DICT2_OBS_COLS).filter(DICT2_OBS_COLS.values(),axis=1)
obs2_2sicen['id_lot'] = 3
obs2_2sicen['id_etude'] = -152
obs2_2sicen.id_origine = obs2_2sicen.id_origine.astype(str)
# Mise à jour du champs id_etude de la table saisie_observation
update_to_sql(
obs2_2sicen,
con_sicen,
'saisie_observation',
'saisie',
['id_lot','id_origine']
)