191 lines
6.5 KiB
Python
191 lines
6.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: UTF-8 -*-
|
|
import pandas_access as mdb # pour lecture de la bd medwet
|
|
from os import path
|
|
from pycen import con_sicen, update_to_sql
|
|
import pandas as pd
|
|
|
|
# access / postgis
|
|
SOURCE = 'postgis'
|
|
postgis_sch = 'serenabase'
|
|
|
|
DICT_RLV_COLS = {
|
|
'relv_id':'id_etude',
|
|
'relv_nom':'nom_etude',
|
|
'relv_prop_libel':'proprietaire',
|
|
'relv_1date_c':'date_debut',
|
|
'relv_2date_c':'date_fin',
|
|
'relv_comment':'description',
|
|
}
|
|
|
|
DICT_OBS_COLS = {
|
|
'obse_id':'id_origine',
|
|
'obse_relv_id':'id_etude',
|
|
}
|
|
|
|
|
|
def list_access_datatable():
|
|
|
|
PATH_IN = '/media/colas/SRV/FICHIERS/OUTILS/BASES DE DONNEES/ESPECES/backup'
|
|
BDD_IN = 'AVENIR.serena_V2.mdb'
|
|
|
|
LST_SCHEMA = mdb.read_schema(path.join(PATH_IN,BDD_IN))
|
|
LST_TABLE = [ *LST_SCHEMA.keys() ]
|
|
LST_TABLE = mdb.list_tables(path.join(PATH_IN,BDD_IN))
|
|
TABLE_DATA= []
|
|
|
|
for TABLE in LST_TABLE:
|
|
DTYPE = {x : float for x in LST_SCHEMA[TABLE].keys() if LST_SCHEMA[TABLE][x]=='Long Integer'}
|
|
df = mdb.read_table(path.join(PATH_IN,BDD_IN),TABLE, dtype=DTYPE)
|
|
if df.shape[0] == 0:
|
|
print('%s IS EMPTY !'%TABLE)
|
|
else :
|
|
TABLE_DATA += [TABLE]
|
|
return TABLE_DATA
|
|
|
|
|
|
def list_releve(source='postgis'):
|
|
if source=='postgis':
|
|
|
|
choi = pd.read_sql_table('rnf_choi',con_sicen,postgis_sch)\
|
|
.dropna(axis=1,how='all')
|
|
relv = pd.read_sql_table('rnf_relv',con_sicen,postgis_sch)\
|
|
.dropna(axis=1,how='all')
|
|
|
|
# Correction coquille
|
|
relv.loc[relv.relv_categ_choi_id==100800,'relv_categ_choi_id'] = 100008
|
|
|
|
# Jointure relevé / catégorie
|
|
df = relv.merge(choi[['choi_id','choi_nom']],how='inner',left_on='relv_categ_choi_id',right_on='choi_id')\
|
|
.drop(columns=['relv_categ_choi_id'])
|
|
|
|
# Mise en forme des dates
|
|
lstcol_dat = df.columns[df.columns.str.contains('date')]
|
|
for lcd in lstcol_dat:
|
|
# df[lcd] = df[lcd].replace({'VIDE':None})
|
|
df.loc[df[lcd]=='2000 à 2001',lcd] = '2000' if '1date' in lcd else '2001'
|
|
df[lcd].replace({'VIDE':None},inplace=True)
|
|
df[lcd] = pd.to_datetime(df[lcd])
|
|
|
|
return df
|
|
|
|
|
|
def get_serena_obs(source='postgis'):
|
|
if source=='postgis':
|
|
obs = pd.read_sql_table('rnf_obse',con_sicen,postgis_sch)
|
|
|
|
return obs
|
|
|
|
|
|
|
|
def get_sicen_obs():
|
|
return pd.read_sql_table('saisie_observation',con_sicen,'saisie')
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
if SOURCE == 'access':
|
|
lst_tab = list_access_datatable()
|
|
elif SOURCE == 'postgis':
|
|
lstrelv_serena = list_releve()
|
|
obs_serena = get_serena_obs()
|
|
obs_serena = obs_serena.merge(lstrelv_serena[['relv_id','relv_nom']],left_on='obse_relv_id',right_on='relv_id')\
|
|
.drop(columns='relv_id')
|
|
|
|
# Isolation des relevés HORS RhoMéo
|
|
is_relv = [x for x in lstrelv_serena.relv_nom.str[:3].unique() if not x.isnumeric()]
|
|
relv_serena = lstrelv_serena[lstrelv_serena.relv_nom.str[:3].isin(is_relv)]
|
|
|
|
# Formatage des relevés HORS RhoMéo
|
|
relv_2sicen = relv_serena.rename(columns=DICT_RLV_COLS).filter(DICT_RLV_COLS.values(),axis=1)
|
|
relv_2sicen.sort_values('id_etude',inplace=True)
|
|
relv_2sicen.reset_index(inplace=True,drop=True)
|
|
relv_2sicen.set_index(-relv_2sicen.index,inplace=True)
|
|
|
|
|
|
# # Intégration des relevés SERENA dans la table 'etude'
|
|
# relv_2sicen.drop(columns=['id_etude','proprietaire'])\
|
|
# .rename_axis('id_etude')\
|
|
# .to_sql('etude',con_sicen,'md',if_exists='append')
|
|
|
|
|
|
# Récupération des données SICEN2
|
|
# id_lot = 'SERENA'
|
|
sicen_obs = get_sicen_obs()
|
|
is_serena = sicen_obs.id_lot==3
|
|
is_horsetude = sicen_obs.id_etude==2
|
|
lst_idorigine = sicen_obs[is_serena].id_origine.astype(int).tolist()
|
|
|
|
# isolation des données "suposées" non présentes
|
|
obs_notinsicen = obs_serena[~obs_serena.obse_id.isin(lst_idorigine)].copy()
|
|
# rhomeo_notinsicen = obs_notinsicen.obse_nom.str.startswith('RhoMéO',na=False)
|
|
# obs_notinsicen[rhomeo_notinsicen]
|
|
# test = obs_notinsicen.obse_site_id == 101335
|
|
# obs_notinsicen[test]
|
|
|
|
sql1 = "SELECT * FROM {sch}.tmp_ogll WHERE ogll_obse_id IN {lst} AND ogll_lat <> '999'"
|
|
res1 = pd.read_sql_query(sql1.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_id)),con_sicen)
|
|
sql2 = "SELECT * FROM {sch}.tmp_sgll WHERE sgll_site_id IN {lst} AND sgll_lat <> '999'"
|
|
res2 = pd.read_sql_query(sql2.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_site_id.unique())),con_sicen)
|
|
|
|
|
|
miss_serena1 = obs_notinsicen.merge(
|
|
res1.rename(columns={
|
|
'ogll_obse_id':'obse_id',
|
|
'ogll_lat':'lat',
|
|
'ogll_lon':'lon'
|
|
}),
|
|
on='obse_id')
|
|
miss_serena2 = obs_notinsicen.merge(
|
|
res2.rename(columns={
|
|
'sgll_site_id':'obse_site_id',
|
|
'sgll_lat':'lat',
|
|
'sgll_lon':'lon'
|
|
}),
|
|
on='obse_site_id')
|
|
miss_serena = pd.concat([miss_serena1,miss_serena2])
|
|
rhomeo_missserena = miss_serena.obse_nom.str.startswith('RhoMéO',na=False)
|
|
miss_serena[rhomeo_missserena]
|
|
|
|
# Formatages des données OBS
|
|
obs_2sicen = obs_serena[obs_serena.obse_relv_id.isin(relv_serena.relv_id)].rename(columns=DICT_OBS_COLS).filter(DICT_OBS_COLS.values(),axis=1)
|
|
obs_2sicen['id_lot'] = 3
|
|
obs_2sicen.id_origine = obs_2sicen.id_origine.astype(str)
|
|
|
|
# Correspondance id_relv (SERENA) / id_etude (SICEN2)
|
|
DICT_ID_ETUDE = dict(zip(relv_2sicen.id_etude,relv_2sicen.index))
|
|
obs_2sicen.id_etude.replace(DICT_ID_ETUDE,inplace=True)
|
|
|
|
# Mise à jour du champs id_etude de la table saisie_observation
|
|
update_to_sql(
|
|
obs_2sicen[obs_2sicen.id_origine.astype(int).isin(lst_idorigine)],
|
|
con_sicen,
|
|
'saisie_observation',
|
|
'saisie',
|
|
['id_lot','id_origine']
|
|
)
|
|
|
|
|
|
# identification des données restantes taguées "hors etude"
|
|
id_origine_he = sicen_obs[is_serena&is_horsetude].id_origine.astype(int)
|
|
is_horsetude2 = obs_serena.obse_id.isin(id_origine_he)
|
|
res_he = obs_serena[is_horsetude2].copy()
|
|
DICT2_OBS_COLS = {**DICT_OBS_COLS,'relv_nom':'id_waypoint'}
|
|
obs2_2sicen = res_he.rename(columns=DICT2_OBS_COLS).filter(DICT2_OBS_COLS.values(),axis=1)
|
|
obs2_2sicen['id_lot'] = 3
|
|
obs2_2sicen['id_etude'] = -152
|
|
obs2_2sicen.id_origine = obs2_2sicen.id_origine.astype(str)
|
|
|
|
# Mise à jour du champs id_etude de la table saisie_observation
|
|
update_to_sql(
|
|
obs2_2sicen,
|
|
con_sicen,
|
|
'saisie_observation',
|
|
'saisie',
|
|
['id_lot','id_origine']
|
|
)
|
|
|