#!/usr/bin/env python # -*- coding: UTF-8 -*- import pandas_access as mdb # pour lecture de la bd medwet from os import path from pycen import con_sicen, update_to_sql import pandas as pd # access / postgis SOURCE = 'postgis' postgis_sch = 'serenabase' DICT_RLV_COLS = { 'relv_id':'id_etude', 'relv_nom':'nom_etude', 'relv_prop_libel':'proprietaire', 'relv_1date_c':'date_debut', 'relv_2date_c':'date_fin', 'relv_comment':'description', } DICT_OBS_COLS = { 'obse_id':'id_origine', 'obse_relv_id':'id_etude', } def list_access_datatable(): PATH_IN = '/media/colas/SRV/FICHIERS/OUTILS/BASES DE DONNEES/ESPECES/backup' BDD_IN = 'AVENIR.serena_V2.mdb' LST_SCHEMA = mdb.read_schema(path.join(PATH_IN,BDD_IN)) LST_TABLE = [ *LST_SCHEMA.keys() ] LST_TABLE = mdb.list_tables(path.join(PATH_IN,BDD_IN)) TABLE_DATA= [] for TABLE in LST_TABLE: DTYPE = {x : float for x in LST_SCHEMA[TABLE].keys() if LST_SCHEMA[TABLE][x]=='Long Integer'} df = mdb.read_table(path.join(PATH_IN,BDD_IN),TABLE, dtype=DTYPE) if df.shape[0] == 0: print('%s IS EMPTY !'%TABLE) else : TABLE_DATA += [TABLE] return TABLE_DATA def list_releve(source='postgis'): if source=='postgis': choi = pd.read_sql_table('rnf_choi',con_sicen,postgis_sch)\ .dropna(axis=1,how='all') relv = pd.read_sql_table('rnf_relv',con_sicen,postgis_sch)\ .dropna(axis=1,how='all') # Correction coquille relv.loc[relv.relv_categ_choi_id==100800,'relv_categ_choi_id'] = 100008 # Jointure relevé / catégorie df = relv.merge(choi[['choi_id','choi_nom']],how='inner',left_on='relv_categ_choi_id',right_on='choi_id')\ .drop(columns=['relv_categ_choi_id']) # Mise en forme des dates lstcol_dat = df.columns[df.columns.str.contains('date')] for lcd in lstcol_dat: # df[lcd] = df[lcd].replace({'VIDE':None}) df.loc[df[lcd]=='2000 à 2001',lcd] = '2000' if '1date' in lcd else '2001' df[lcd].replace({'VIDE':None},inplace=True) df[lcd] = pd.to_datetime(df[lcd]) return df def get_serena_obs(source='postgis'): if source=='postgis': obs = pd.read_sql_table('rnf_obse',con_sicen,postgis_sch) return obs def get_sicen_obs(): return pd.read_sql_table('saisie_observation',con_sicen,'saisie') if __name__ == "__main__": if SOURCE == 'access': lst_tab = list_access_datatable() elif SOURCE == 'postgis': lstrelv_serena = list_releve() obs_serena = get_serena_obs() obs_serena = obs_serena.merge(lstrelv_serena[['relv_id','relv_nom']],left_on='obse_relv_id',right_on='relv_id')\ .drop(columns='relv_id') # Isolation des relevés HORS RhoMéo is_relv = [x for x in lstrelv_serena.relv_nom.str[:3].unique() if not x.isnumeric()] relv_serena = lstrelv_serena[lstrelv_serena.relv_nom.str[:3].isin(is_relv)] # Formatage des relevés HORS RhoMéo relv_2sicen = relv_serena.rename(columns=DICT_RLV_COLS).filter(DICT_RLV_COLS.values(),axis=1) relv_2sicen.sort_values('id_etude',inplace=True) relv_2sicen.reset_index(inplace=True,drop=True) relv_2sicen.set_index(-relv_2sicen.index,inplace=True) # # Intégration des relevés SERENA dans la table 'etude' # relv_2sicen.drop(columns=['id_etude','proprietaire'])\ # .rename_axis('id_etude')\ # .to_sql('etude',con_sicen,'md',if_exists='append') # Récupération des données SICEN2 # id_lot = 'SERENA' sicen_obs = get_sicen_obs() is_serena = sicen_obs.id_lot==3 is_horsetude = sicen_obs.id_etude==2 lst_idorigine = sicen_obs[is_serena].id_origine.astype(int).tolist() # isolation des données "suposées" non présentes obs_notinsicen = obs_serena[~obs_serena.obse_id.isin(lst_idorigine)].copy() # rhomeo_notinsicen = obs_notinsicen.obse_nom.str.startswith('RhoMéO',na=False) # obs_notinsicen[rhomeo_notinsicen] # test = obs_notinsicen.obse_site_id == 101335 # obs_notinsicen[test] sql1 = "SELECT * FROM {sch}.tmp_ogll WHERE ogll_obse_id IN {lst} AND ogll_lat <> '999'" res1 = pd.read_sql_query(sql1.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_id)),con_sicen) sql2 = "SELECT * FROM {sch}.tmp_sgll WHERE sgll_site_id IN {lst} AND sgll_lat <> '999'" res2 = pd.read_sql_query(sql2.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_site_id.unique())),con_sicen) miss_serena1 = obs_notinsicen.merge( res1.rename(columns={ 'ogll_obse_id':'obse_id', 'ogll_lat':'lat', 'ogll_lon':'lon' }), on='obse_id') miss_serena2 = obs_notinsicen.merge( res2.rename(columns={ 'sgll_site_id':'obse_site_id', 'sgll_lat':'lat', 'sgll_lon':'lon' }), on='obse_site_id') miss_serena = pd.concat([miss_serena1,miss_serena2]) rhomeo_missserena = miss_serena.obse_nom.str.startswith('RhoMéO',na=False) miss_serena[rhomeo_missserena] # Formatages des données OBS obs_2sicen = obs_serena[obs_serena.obse_relv_id.isin(relv_serena.relv_id)].rename(columns=DICT_OBS_COLS).filter(DICT_OBS_COLS.values(),axis=1) obs_2sicen['id_lot'] = 3 obs_2sicen.id_origine = obs_2sicen.id_origine.astype(str) # Correspondance id_relv (SERENA) / id_etude (SICEN2) DICT_ID_ETUDE = dict(zip(relv_2sicen.id_etude,relv_2sicen.index)) obs_2sicen.id_etude.replace(DICT_ID_ETUDE,inplace=True) # Mise à jour du champs id_etude de la table saisie_observation update_to_sql( obs_2sicen[obs_2sicen.id_origine.astype(int).isin(lst_idorigine)], con_sicen, 'saisie_observation', 'saisie', ['id_lot','id_origine'] ) # identification des données restantes taguées "hors etude" id_origine_he = sicen_obs[is_serena&is_horsetude].id_origine.astype(int) is_horsetude2 = obs_serena.obse_id.isin(id_origine_he) res_he = obs_serena[is_horsetude2].copy() DICT2_OBS_COLS = {**DICT_OBS_COLS,'relv_nom':'id_waypoint'} obs2_2sicen = res_he.rename(columns=DICT2_OBS_COLS).filter(DICT2_OBS_COLS.values(),axis=1) obs2_2sicen['id_lot'] = 3 obs2_2sicen['id_etude'] = -152 obs2_2sicen.id_origine = obs2_2sicen.id_origine.astype(str) # Mise à jour du champs id_etude de la table saisie_observation update_to_sql( obs2_2sicen, con_sicen, 'saisie_observation', 'saisie', ['id_lot','id_origine'] )