2025-02-25 16:52:00 +01:00

1485 lines
50 KiB
Python

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
#Nom : : recup_ps.py
#Description :
#Copyright : 2021, CEN38
#Auteur : Colas Geier
#Version : 1.0
# import re
from contextlib import AsyncExitStack
from os import register_at_fork
import types
from numpy.lib.shape_base import expand_dims
import pandas as pd
import geopandas as gpd
from pandas.io.pytables import incompatibility_doc
from shapely import wkb
import numpy as np
from sqlalchemy.sql.expression import column
# from pycen import bdd
from sys import exit
import pycen
# from sqlalchemy import create_engine
# from sqlalchemy.engine import URL
# from geoalchemy2 import Geometry
# Parametres bdd IN
# user = 'cen_admin'
# pwd = '#CEN38@venir'
# adr = '192.168.0.189'
# base = 'bd-cen-38'
# schema = 'pelouse_seche'
# # schema = 'habitat'
# url = URL.create('postgresql+psycopg2',
# username=user,
# password=pwd,
# host=adr,
# database=base,
# )
# con = create_engine(url)
con = pycen.con_bdcen
con_ps = pycen.con
def get_pers():
sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1'
' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme')
df_pers = pd.read_sql(
sql = sql,
con = pycen.con)
df_pers.replace([None],'',inplace=True)
df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom
df_pers['auteur'] = df_pers['auteur'].str.strip()
df_pers['organisme'] = df_pers['organisme'].str.strip()
return pycen.pers.get_auteur()
# def get_idgeomsite(lst_site):
# '''
# :lst_site: tuple.
# '''
# sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 '
# 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id '
# 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site)
# df = pd.read_sql(
# sql = sql,
# con = pycen.con)
# return df
def format_legende(leg):
dic = {
}
df = pd.DataFrame({'leg':leg})
df.replace(
['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'],
['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'],
regex=True, inplace=True)
return df
def ident_format_date(col):
return
# "%d/%m/%Y"
def format_date(df):
df = df.copy()
check_date = [x.startswith('date') or x.endswith('date') for x in df.columns]
cols_date = df.columns[check_date]
if 'date' not in df.columns and not cols_date.empty:
# df[cols_date].replace({'/':None},inplace=True)
for col in cols_date:
df[col].replace({'/':None},inplace=True)
df[col] = pd.to_datetime(df[col])
# df.loc[df[col].notna(),col] = pd.to_datetime(df[df[col].notna()][col])
df['date'] = df[cols_date].max(axis=1)
if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"',
'"PS_CHAMBARAN_CEN38_2013"']:
df.date = pd.to_datetime(df.date,format='%d/%m/%Y')
if table == '"PS_BELLEDONNE_CEN38_2014"':
df.date = pd.to_datetime(df.date,format='%d%b%Y')
if table == '"PS_BIEVRE_CEN38_2016"':
df.date = df.date.astype(str).replace('\.','',regex=True)
df.date = df.date.str.rsplit(' ',1).str[0]\
.replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True)
df.date = pd.to_datetime(df.date,format='%d %b %Y')
if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"':
# df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int)
df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str)
df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date'])
if 'annee' in df.columns and 'date' not in df.columns:
# Si pas de date mais année précisée
df['annee'] = df['annee'].astype(int)
df['date'] = df['annee'].astype(str) + '-06-01'
df.date = pd.to_datetime(df.date,format='%Y-%m-%d')
elif any(df.date.astype(str).str.len() <= 4):
# Si dates non-homogènes, cohexistance date&année
d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique()
if None in d:
df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01'
else:
df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01'
df.date = pd.to_datetime(df.date,format='%Y-%m-%d')
else:
df.date = pd.to_datetime(df.date)
return df
def normalize_auteur(lst_auteur):
'''
lst_auteur : pd.Series
'''
dic = {
'Lise Duconte':'DUCONTE Lise',
'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie',
'CHABERT Chloé (AVENIR)':'CHABERT Chloé',
'CHABERT Chloé (AVNEIR)':'CHABERT Chloé',
'PACHE Gilles (CBNA)':'PACHE Gilles',
'Gilles PACHE':'PACHE Gilles',
'JAMEAU Laura (AVENIR)':'JAMEAU Laura',
'MARCIAU Roger (AVENIR)':'MARCIAU Roger',
'Roger MARCIAU':'MARCIAU Roger',
'Aude Massa':'MASSA Aude',
'Aude MASSA':'MASSA Aude',
'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse',
'Romain BARTHELD':'BARTHELD Romain',
'VILLARET J.C.':'VILLARET Jean-charles',
'Jean-Charles VILLARET':'VILLARET Jean-charles',
'Fabien ANTHELME':'ANTHELME Fabien',
'Jean-Christophe GATTUS':'GATTUS Jean-christophe',
'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis',
'Jonathan MALINEAU':'MALINEAU Jonathan',
'Aurélien DAUTREY':'DAUTREY Aurélien',
'Etienne MARY':'MARY Etienne',
'Alix GUEDOU': 'GUEDOU Alix',
'BEGUIN Lucile': 'BÉGUIN Lucile',
}
lst_aut = lst_auteur.copy()
lst_aut.replace(' & | - ',',',regex=True,inplace=True)
if any(lst_aut.str.contains(',')):
lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1)
lst_aut.replace(dic,inplace=True)
df_aut = lst_aut.str.rsplit(' ',n=1,expand=True)
if df_aut.shape[1]==2:
df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower()
df_aut[0] = df_aut[0].str.upper()
df_aut[2] = df_aut[0]
df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1]
lst_aut = df_aut[2].copy()
else : lst_aut = df_aut[0].copy()
aut = lst_aut.unique()
res = [x for x in aut if x not in [*get_pers().nom_prenom]]
if res:
print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res)
new = new_auteur(res)
lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True)
lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x))
return lst_aut
def new_auteur(new):
'''
new : list
'''
df = pd.DataFrame({'auteur': new})
dic = {'aut':{}, 'org':[]}
# Pour chaque auteur
for pers in df.auteur:
print(pers)
while True:
newaut = input("Insérer un nouvel auteur ? (Y/N)\n")
if not newaut[0].upper() in ['Y','N']:
continue
else: break
# Si : pas d'insertion d'un nouvel auteur
if newaut[0].upper() == 'N':
while True:
rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n")
if not rep[0].upper() in ['Y','N']:
continue
else: break
if rep[0].upper() == 'Y':
while True:
org = pycen.pers.get_organisme().sort_index().to_dict()['nom']
reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org))
if not reporg in [*org.keys()]:
continue
else: break
while True:
aut = get_pers().sort_values('id')
aut = aut[aut.id_organisme == reporg]
aut = aut.nom_prenom.to_dict()
repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut))
if not repaut in [*aut.keys()]:
continue
else: break
dic['aut'] = {**dic['aut'],pers:aut[repaut]}
dic['org'] = [*dic['org'],org[reporg]]
# Autrement : insertion d'un nouvel auteur
else:
while True:
print("Auteur : %s"%pers)
rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n")
if not rep[0].upper() in ['Y','N']:
continue
elif rep[0].upper() == 'Y':
repaut = input("Saisisser un nouveau nom :\n")
dic['aut'] = {**dic['aut'],pers:repaut}
pers = repaut
break
else:
dic['aut'] = {**dic['aut'],pers:pers}
break
while True:
org = pycen.pers.get_organisme().sort_index().to_dict()['nom']
org = {-1: 'Autre', **org}
reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org))
if not reporg in [*org.keys()]:
continue
elif reporg == -1:
reporg = input("Saisisser un nouvel organisme :\n")
dic['org'] = [*dic['org'],reporg]
insert_newOrganisme(reporg)
org = pycen.pers.get_organisme()
idorg = org[org.nom==reporg].index[0]
insert_newAuteur(pers,idorg)
break
else:
dic['org'] = [*dic['org'],org[reporg]]
insert_newAuteur(pers,reporg)
break
return dic
def insert_newAuteur(aut,idorg):
aut = pd.Series(aut)
idorg = pd.Series(idorg)
df = pd.DataFrame({'aut':aut,'id_organisme':idorg})
# print(df)
tmp = df.aut.str.rsplit(' ',1,expand=True)
if tmp.shape[1] == 1:
df[['nom']] = tmp
elif tmp.shape[1] == 2:
df[['nom', 'prenom']] = tmp
else:
raise ValueError('NOM Prénom est de longueur inconnu :\ntmp')
del df['aut']
try:
df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema,
if_exists='append', index=False)
print("Nouvel auteur inséré : '%s'"%aut)
except:
print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut)
def insert_newOrganisme(nom):
nom = pd.Series(nom)
df = pd.DataFrame({'nom':nom})
try:
df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema,
if_exists='append', index=False)
print("Nouvel organisme inséré : '%s'"%nom)
except:
print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom)
def normalize_colname(df):
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace('__','_',regex=True)
if df.columns.str.contains('cb_hab').any():
df.columns = df.columns.str.replace('cb_hab','code_hab',regex=True)
if df.columns.str.contains('cod_hab').any():
df.columns = df.columns.str.replace('cod_hab','code_hab',regex=True)
return df.rename(columns={
'n_polygone':'ident',
# 'id_site':'ident',
'code-ug':'ident',
'ident_':'ident',
'id':'ident',
'idfinal':'id_origine',
'site_code':'id_site',
'date_':'date',
'obs':'auteur',
'structur':'structure',
'pratiq_' :'pratiques',
'pratique' :'pratiques',
's_p_brous' :'%_embrous',
's_p_brouss':'%_embrous',
'taux_embrou':'%_embrous',
'tx_embrous':'%_embrous',
'niv__embro':'niv_embrous',
'niv_embro' :'niv_embrous',
'niv_embrou' :'niv_embrous',
'niv_emb' :'niv_embrous',
'embroussaillement' :'niv_embrous',
'embrouss' :'niv_embrous',
'taux_recvmt':'%_recouvmnt',
'recouvrement':'recouvmnt',
'recouvreme':'recouvmnt',
'recouvr':'recouvmnt',
'recouv' :'recouvmnt',
'recouvr_' :'recouvmnt',
'remarque' :'remarques',
'remarq_' :'remarques',
'rmq_intere' :'remarques',
'legendes' :'legende',
'legend' :'legende',
'sources' :'source',
'surf' :'surface',
'geometry' :'geom',
})
def get_id_auteur(lst_author):
sch = 'personnes'
tabP = 'personne'
tabO = 'organisme'
# sql = """
# WITH str as
# (SELECT id FROM {sch}.{tabO}
# WHERE lower(nom) = lower(%(structure)s) OR lower(abbrev) = lower(%(structure)s)),
# tmp as
# (SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur, id_organisme FROM {sch}.{tabP})
# SELECT tmp.* FROM tmp,str WHERE auteur IN %(lst_author)s
# AND id_organisme IN (str.id) """.format(sch=sch,tabP=tabP,tabO=tabO)
sql = """
WITH tmp AS (
SELECT
pers.id,
TRIM(CONCAT(pers.nom,' ',pers.prenom)) AS auteur,
pers.id_organisme,
orga.nom organisme,
orga.abbrev orga_abbrev
FROM {sch}.{tab_auth} pers JOIN {sch}.{tab_orga} orga ON pers.id_organisme = orga.id
)
SELECT * FROM tmp
WHERE (auteur,organisme) IN %(lst_author)s
OR (auteur,orga_abbrev) IN %(lst_author)s
""".format(sch=sch, tab_auth=tabP, tab_orga=tabO)
X = lst_author.to_dict('index')
XX = [tuple(X[x].values()) for x in X]
df = pd.read_sql(
sql = sql,
con = pycen.con,
params = {
'lst_author': tuple(XX)
})
return df
def existing_ps(df):
gdf = pycen.ps.get_sitesGeom()
gdf['area'] = gdf.area
# ovl = gpd.overlay(df,gdf)
ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference'
ovl['ovl_area'] = ovl.area
ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area']
return df
def define_siteOrg(df):
gdf = pycen.get_districtNat()[['abrev','geom']]
tmp = df[['geom']].copy().set_geometry('geom')
tmp.reset_index(0, inplace=True)
tmp = gpd.overlay(tmp,gdf, how='intersection')
tmp['area'] = tmp.area
tmp.set_index('area',inplace=True)
tmp.sort_index(inplace=True)
tmp.drop_duplicates(subset=['index'],keep='last',inplace=True)
tmp.reset_index(drop=True,inplace=True)
del tmp['geometry']
df = df.merge(tmp,how='left',left_index=True,right_on='index')
del df['index']
df.rename(columns={'abrev':'org'}, inplace=True)
return df
# tmp[tmp.id_site==334]
# 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by'
def ident_newsite(df,rcvmt=10):
'''
Identification des nouvelles géometries ou des mises
à jours des géométries par recouvrement.
MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0)
'''
maj = []
if 'id_site' in df.columns:
maj = df[~df.id_site.isna()]
df = df[df.id_site.isna()]
if df.empty:
return df,df,maj
# Récupération de la couche pelouses_seches en bdd.
sql = 'SELECT site_code,geom FROM ps.v_pelouseseches'
v_ps = gpd.read_postgis(sql, pycen.con)
# Identification des superpositions new_site/old_site
df_inters = gpd.sjoin(df,v_ps, how='left')
del df_inters['index_right']
# news1 = df_inters[df_inters.site_code.isna()].id_origine
lst_old_site = df_inters.site_code.unique()
v_ps = v_ps[v_ps.site_code.isin(lst_old_site)].copy()
v_ps.loc[:,'surf'] = v_ps.area
# Re-définition des columns géometriques
# if not isinstance(df,gpd.GeoDataFrame):
# df.set_geometry('geom', inplace=True, crs=2154)
# if not isinstance(v_ps,gpd.GeoDataFrame):
# v_ps.set_geometry('geom', inplace=True, crs=2154)
# Calcul du recouvrement
tmp = gpd.overlay(v_ps,df[['id_origine','geom']],how='intersection',make_valid=True,keep_geom_type=False)
if isinstance(tmp, gpd.GeoDataFrame) and tmp.geometry.name !='geom':
tmp.rename_geometry('geom',inplace=True)
tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf']
# Identification des sites : MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0)
# Limite recouvrement = 10%
tmpp = tmp[tmp.perc_rcvmt > rcvmt]
site_maj = tmpp[
(~tmpp.id_origine.duplicated(keep=False)) &
(~tmpp.site_code.duplicated(keep=False))
].id_origine
# site_repl = tmp[(tmp.perc_rcvmt > 10)&(tmp.id_origine.duplicated(keep=False))].id_origine.unique()
# news2 = tmp[(tmp.perc_rcvmt <= 10)&(~tmp.id_origine.isin([*site_maj,*site_repl]))].id_origine
# Isolement des sites à mettre à jour
# maj2 = gpd.sjoin(
# df[df.id_origine.isin(site_maj)],
# v_ps.loc[v_ps.site_code.isin(tmpp.site_code),['site_code','geom']]) \
# .drop(columns=['index_right'])
maj2 = df[df.id_origine.isin(site_maj)].merge(
tmpp[['site_code','id_origine']], on=['id_origine'])
maj2['id_site'] = maj2['site_code']
del maj2['site_code']
if maj : df_maj = pd.concat([maj,maj2])
else : df_maj = maj2
# Isolement des nouveaux sites
orig_maj_all = df_maj.id_origine
id_maj_all = df_maj.id_site
df_new = df[~df.id_origine.isin(orig_maj_all)]
# Isolement des correspondance new_site / old_site_toClose
df_cor = tmpp.loc[
(~tmpp.site_code.isin(id_maj_all))&(~tmpp.id_origine.isin(orig_maj_all)),
['site_code','id_origine']
]
df_cor.rename(columns={'site_code':'id_site_old'}, inplace=True)
return df_new, df_cor, df_maj
def define_siteName(df,rcvmt=10):
'''
Return df_new, df_replace, df_maj
'''
dfnew, dfcor, df_maj = ident_newsite(df,rcvmt)
df2 = df[~df.index.isin(df_maj.index)].copy()
if not df2.empty:
df2 = define_siteOrg(dfnew.copy())
df2['dept'] = '38'
# df2['num'] = None
if 'org' in df2.columns:
for org in df2.org.unique():
tmp = pd.concat([
pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu),
pycen.sites.get_sitesInfos(milieu='Trame thermophiles')
])
# tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0])
tmp = tmp[tmp.org == org]
if not tmp.empty:
num = tmp.num.astype(int).max()
seq = range(num+1, num + df2.loc[df2.org==org].shape[0]+1)
df2.loc[df2.org==org,['num']] = list(seq)
else:
seq = range(1, df2.loc[df2.org==org].shape[0]+1)
df2.loc[df2.org==org,['num']] = list(seq)
else:
tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu)
df2.loc[~df2.num.isna(),'num'] = df2[~df2.num.isna()].num.astype(int).astype(str)
df2.loc[df2.num.str.len() == 1,'num'] = '000'+ df2.loc[df2.num.str.len() == 1,'num']
df2.loc[df2.num.str.len() == 2,'num'] = '00' + df2.loc[df2.num.str.len() == 2,'num']
df2.loc[df2.num.str.len() == 3,'num'] = '0' + df2.loc[df2.num.str.len() == 3,'num']
df2['id_site'] = df2['dept'] + df2['org'] + df2['num']
# Jointure id_site_new / id_site_old
df_new = df2[~df2.id_origine.isin(dfcor.id_origine)]
df_rep = df2[df2.id_origine.isin(dfcor.id_origine)]
df_cor = pd.merge(dfcor,df2[['id_origine','id_site']], on='id_origine')
df_cor.drop(columns=['id_origine'], inplace=True)
df_cor.rename(columns={'id_site':'id_site_new'}, inplace=True)
df_replace = {
'df' : df_rep,
'cor': df_cor
}
else:
df_new = dfnew
df_replace = {'df' : dfcor}
return df_new, df_replace, df_maj
def normalize_formatIdsite(id_site):
import re
df = pd.DataFrame({'id_site':id_site})
df['dept'] = '38'
df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]]
del df['other']
df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num']
df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num']
df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num']
df['id_site'] = df['dept'] + df['org'] + df['num']
return df.id_site
def normalize_paturage(pat):
return pat.str.lower().replace(
['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'],
['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True)
def normalize_pratiques(prat):
dic = {
'pâturage equin':'pâturage équin',
'pâturage bovin':'pâturage bovin',
'paturâge cervidé':'pâturage cervidé',
'paturâge anin':'pâturage asin',
'broyer': 'broyage',
'truffier': 'broyage',
'plantation': 'broyage',
'culture' : 'labourage',
'abandon recent': 'abandon',
'abandon verger': 'abandon',
'remblais et feu' : 'incertain',
'remblais, feu' : 'incertain',
'caprin': 'pâturage caprin',
'ovin': 'pâturage ovin',
'équin': 'pâturage équin',
'equin': 'pâturage équin',
'bovin': 'pâturage bovin',
'paturage': 'pâturage',
'paturâge': 'pâturage',
'pâturage difficile': 'pâturage',
'f' : 'fauche',
'p' : 'pâturage',
'0' : 'N.D',
'i' : 'incertain',
'a' : 'abandon',
'pv': 'pâturage',
'b' : 'broyage',
'pc': 'pâturage caprin',
'po': 'pâturage ovin',
'pe': 'pâturage équin',
'pb': 'pâturage bovin',
'-' : 'N.D',
'0' : 'N.D',
'ND' : 'N.D',
'n.d' : 'N.D',
'chemin carrossable non goudronné' : 'N.D',
'plantation de châtaigniers' : 'broyage',
}
p = prat.replace(['.et','/'],[',',', '],regex=True).copy()
p = p.str.lower()
p = normalize_paturage(p)
p = p.replace(dic)
p.fillna('N.D', inplace=True)
p[p.str.contains('\?| ou ',na=False)] = 'incertain'
pp = p.str.split(',',expand=True)
pp.columns = 'p' + pp.columns.astype(str)
for c in pp.columns:
pp[c] = pp[c].str.strip()
pp[c] = pp[c].replace(dic)
return pp
def select_ID(df,sch,tab):
df = df.copy().dropna(axis=1,how='any')
pk = pycen.tools.__get_pkey__(pycen.con,tab,sch)
if pk['constrained_columns']:
pkey = pk['constrained_columns']
else : pkey = []
sql = 'SELECT %s FROM %s.%s'%(','.join([*pkey,*df.columns]),sch,tab)
df.replace("'","''",regex=True,inplace=True)
if 'geom' in df.columns:
del df['geom']
if 'date' in df.columns:
df['date'] = df['date'].astype(str)
elif 'date_deb' in df.columns:
df['date_deb'] = df['date_deb'].astype(str)
if 'id_cb' in df.columns:
df['id_cb'] = df['id_cb'].astype(str)
column = '(%s)'%','.join(df.columns)
df_tuple = tuple(df.itertuples(index=False, name=None))
sql = sql + ' WHERE %s IN %s'%(column,df_tuple)
if 'nan' in sql: sql = sql.replace('nan','NULL')
if ',)' in sql: sql = sql.replace(',)',')')
if '"' in sql: sql = sql.replace('"',"'")
# print(sql)
return pd.read_sql(
sql=sql,
con=pycen.con)
def insertAutorAttrs(df,colname_rsite, sch, tab):
df = df.copy()
df = df[['id','auteur','structure']].copy()
# structure = [df.structure.unique()]
# lst_author = df[['auteur','structure']].drop_duplicates()
if any(df.auteur.str.contains(',')):
tmp = df.auteur.str.split(',',expand=True)\
.stack().droplevel(-1)\
.str.strip()
tmp = pd.DataFrame({'auteur':tmp})
df = pd.merge(df[['id','structure']],tmp,how='left',left_index=True,right_index=True)
lst_author = df[['auteur','structure']]\
.drop_duplicates()\
.reset_index(drop=True)
df_autor = get_id_auteur(lst_author=lst_author)
for r,row in df_autor.iterrows():
df.loc[
(df.auteur.str.contains(row.auteur)) &
((df.structure==row.organisme) | (df.structure==row.orga_abbrev)),
'auteur'] = row.id
del df['structure']
# df.replace([*df_autor.auteur],[*df_autor.id], inplace=True)
df.columns = [colname_rsite, 'id_auteur']
df.drop_duplicates(inplace=True)
try :
df.to_sql(
name=tab,
con=pycen.con,
schema=sch,
index=False,
if_exists='append',
method='multi'
)
print('Insert author sites data OK !')
return 'Insert author sites data OK !'
except Exception as e:
print('ERROR : News author insert impossible !')
print(e)
def _altertype(df1,df2,con=None,table_name=None, schema=None):
if con and table_name and schema:
tp = pycen.tools.__get_dtype__(con,table_name, schema)
same_col = df1.columns[df1.columns.isin(df2.columns)]
if 'date' in df2.columns:
df2['date'] = df2['date'].astype(str)
if 'geom' in same_col:
same_col = same_col.drop('geom')
for col in same_col:
if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int:
if df1.shape[0] == df2.shape[0]:
df1[col] = df1[col].astype(int)
else :
df2[col] = df2[col].astype(float)
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float:
df1[col] = df1[col].astype(float)
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str:
df1[col] = df1[col].astype(str)
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == object:
df1[col] = df1[col].astype(str)
return [df1,df2]
def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur):
df_ins = df_ins.copy()
col_auth = ['auteur','structure']
if 'date' in df_ins.columns:
df_ins['date'] = df_ins['date'].astype(str)
elif 'date_deb' in df_ins.columns:
df_ins['date_deb'] = df_ins['date_deb'].astype(str)
tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema)
tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema)
if tab_data in tab_sit:
sch = pycen.sites.schema
elif tab_data in tab_ps:
sch = pycen.ps.schema
# ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data)
ids = select_ID(df_ins[[id_data]],sch,tab_data)
if 'id_site' in ids.columns:
if tab_data == 'r_sites_geom': col = 'id'
else: col = 'id_geom_site'
tp = ids.groupby(['id_site'])[col].max()
ids = ids[ids[col].isin(tp)]
# join_on = [id_data]
same_col = df_ins.columns[df_ins.columns.isin(ids.columns)]
if len(same_col) > 1 and ids[same_col].duplicated().any():
raise ValueError('Doublon détecté dans la jointure [df_ins VS ids] !')
if 'geom' in same_col:
same_col = same_col.drop('geom')
join_on = pd.Series([*same_col,id_data]).unique()
# if df_ins.shape[0] != ids.shape[0]:
# join_on = same_col
if 'date' in ids.columns:
ids['date'] = ids['date'].astype(str)
df_ins,ids = _altertype(df_ins,ids)
if tab_data != 'sites':
# df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*same_col], how='left') # A REVOIR
df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*join_on], how='left') # A REVOIR
# df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=['id_site'], how='left') # A REVOIR
insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur)
def insert_site(df):
df = df.copy()
if 'nom' not in df.columns:
df['nom'] = 'Inconnu'
if 'type_site' not in df.columns:
df['type_site'] = 'N.D.'
if 'type_milieu' not in df.columns:
df['type_milieu'] = 'Pelouses sèches'
# if 'remarques' not in df.columns:
# df['remarques'] = None
df = df[['id_site','nom','date','auteur','type_site','type_milieu',
'structure'
# 'remarques'
]]
drop_col = ['auteur','structure']
typs = pycen.sites._get_typ_site()
typm = pycen.sites._get_typ_milieux()
for t in df.type_milieu.unique():
df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0]
for s in df.type_site.unique():
df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0]
df['id_type_milieu'] = df['id_type_milieu'].astype(int)
# df['id_type_site'] = df['id_type_site'].astype(str)
df['dept'] = df.id_site.str[:2]
df['org'] = df.id_site.str[2:-4]
df['num'] = df.id_site.str[-4:]
df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True)
columns = [col['name'] for col in pycen.sites.columns_sitetab] + drop_col
c = df.columns[df.columns.isin(columns)]
df = df[c]
print(df)
try:
df[df.columns.drop(drop_col)].to_sql(
name = 'sites',
con = pycen.con,
schema = pycen.sites.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news sites OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news sites impossible !')
return [df,False]
def check_org_lot(lot):
"""
:lot: list. liste des lots
return
-------
DataFrame des id_lot de la liste lot.
"""
tab_lot = pycen.sites._get_lots()
not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ]
if not_intab:
df = pd.DataFrame({'libelle':not_intab})
df.to_sql(
name='lots',
con=pycen.con,
schema='sites',
index=False,
if_exists='append',
method='multi',
)
tab_lot = pycen.sites._get_lots()
tab_lot = tab_lot[tab_lot.libelle.isin(lot)]
return tab_lot[['id_lot','libelle']]
def insert_geom(df):
drop_col = ['auteur','structure']
dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}
c = ['id_site','geom','date','remarques','legende','table_org','id_origine',*drop_col]
cc = df.columns[df.columns.isin(c)]
df = df[cc].copy()
df.set_geometry('geom', inplace=True)
colindict = [ *df.columns[df.columns.isin(dic.keys())] ]
if colindict:
df.rename(
columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'},
inplace=True,
)
if 'table_org' in df.columns:
tab = [*df.table_org.unique()]
lots = check_org_lot(tab)
df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left')
del df['table_org']
del df['libelle']
try:
df[df.columns.drop(drop_col)].to_postgis(
name = 'r_sites_geom',
con = pycen.con,
schema = pycen.sites.schema,
if_exists = 'append',
index = False,
# geom_col = 'geom'
)
print('Insert news geom OK !')
except Exception as e:
print(e)
print('PS : Insert news geom impossible !')
return [df,False]
return [df,True]
def get_idGeomSite(lst_site,ps_statut='actif'):
'''
:lst_site: list or pd.Series .
:ps_statut: str. ['actif', 'history', 'all'].
Statut des sites à récupérer,
'actif'; Date_fin IS NULL
'history'; A une Date_fin
'all'; Tous les Sites
'''
id_geom = pycen.ps.get_sitesGeom(
id_site=[*lst_site], statut=ps_statut)
tp = id_geom.groupby('id_site').id.max()
id_geom = id_geom[id_geom.id.isin(tp)]
if not id_geom.empty:
id_geom = id_geom[['id','id_site']]
id_geom.columns = ['id_geom_site','id_site']
return id_geom
def insert_attrsPS(df,ps_statut='actif'):
params = []
drop_col = ['auteur','structure']
for p in ['%_embrous','niv_embrous','%_recouvmnt','recouvmnt']:
if p in df.columns:
params = [*params, p]
c = ['id_site','date','pratiques',*params,*drop_col]
for p in ['statut','pratiques']:
if p in df.columns:
c = [*c, p]
cc = df.columns[df.columns.isin(c)]
df = df[cc].copy()
if 'pratiques' in df.columns and not all(df.pratiques.isna()):
df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns')
del df['pratiques']
for col in params:
df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True)
if 'niv_embrous' in df.columns:
df.niv_embrous.replace(['nan'],['N.D'],inplace=True)
if 'recouvmnt' in df.columns:
df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True)
if 'statut' in df.columns:
df.statut.replace(['ND'],['N.D'],inplace=True)
# for t in df.dtypes[df.dtypes == int].index:
# df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str)
taux = False
if '%_embrous' in df.columns:
taux = True
df.niv_embrous = df.niv_embrous + ';' + df['%_embrous']
del df['%_embrous']
if '%_recouvmnt' in df.columns:
taux = True
df.recouvmnt = df.recouvmnt + ';' + df['%_recouvmnt']
del df['%_recouvmnt']
df.set_index(['id_site','date',*drop_col], inplace=True)
df = df.stack().reset_index(-1)
df.columns = ['type','param']
df.loc[df.type.str.len() < 4,'type'] = 'pratique'
if taux:
df[['param','taux']] = df.param.str.split(';',expand=True)
df.taux.replace(['nan','None'],[None,None],inplace=True)
df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True)
df.param.replace({'ND':'N.D'}, inplace=True)
df.reset_index(inplace=True,drop=False)
param = pycen.ps.get_param()
typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param')
for t in df.type.unique():
tmp = param[param.type == t]
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id])
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description], [*tmp.id])
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description.str.lower()], [*tmp.id])
df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True)
df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left')
# del df['id_site'] # id_site inséré dans la table depuis 19-07-2022
del df['type']
df.rename(columns={'param':'id_param'}, inplace=True)
# df.loc[df.id_param=='None','id_param'] = None
df.replace({'None':None}, inplace=True)
df.dropna(subset=['id_param'], inplace=True)
try:
df[df.columns.drop(drop_col)].to_sql(
name = 'r_site_param',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news attrs OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news attrs impossible !')
return [df,False]
def split_codehab(lst_codehab):
lst = lst_codehab.copy()
lst = lst.str.split('x|X|,|&|/|\(|et',expand=True) \
.stack().str.strip() \
.str.replace('\)','',regex=True)\
.droplevel(-1).reset_index(drop=False)
return lst
def format_codehab2insert(lst_codehab):
lst = lst_codehab.copy()
lst.replace('x|X|,|et','&',regex=True, inplace=True)
lst.replace('&','&;',regex=True, inplace=True)
lst.replace('/','/;',regex=True, inplace=True)
lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1)
lst.columns = ['ordre', 'code_hab']
lst.ordre = lst.ordre + 1
lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&'
lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/'
lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip()
lst.reset_index(drop=False, inplace=True)
suspect = lst.code_hab.str.split('(',expand=True)
if suspect.shape[1] > 1:
suspect[0] = suspect[0].str.strip()
suspect[1] = suspect[1].str.replace('\)','',regex=True)
suspect.columns = ['code_hab','suspect']
del lst['code_hab']
# raise Exception('habitat suspecté en développement')
else:
suspect.columns = ['code_hab']
del suspect['code_hab']
lst = pd.merge(lst,suspect,right_index=True,left_index=True)
lst.set_index('id', inplace=True)
return lst
def insert_cbPS(df):
drop_col = ['auteur','structure']
df = df.copy()
df.date = df.date.astype(str)
df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip()
df.code_hab = df.code_hab.replace(['None','nan','0'],[None,None,None])
df.dropna(subset=['code_hab'],inplace=True)
ids = select_ID(df[df.columns.drop(['code_hab',*drop_col])],sch=pycen.ps.schema,tab='r_site_habitat')
same_col = df.columns[df.columns.isin(ids.columns)]
if ids[same_col].duplicated().any():
raise ValueError('Doublon détecté dans la jointure [df VS ids] !')
df,ids = _altertype(df,ids)
df = df.merge(ids, on=[*same_col], how='left')
df = df[['id', 'code_hab']].copy()
df.set_index('id',inplace=True)
# df = df.code_hab.str.split('x|,|&',expand=True) \
# .stack().str.strip() \
# .droplevel(-1).reset_index(drop=False)
# df = split_codehab(df.code_hab)
# df.columns = ['id_sitehab', 'code_hab']
df = format_codehab2insert(df.code_hab)
df.index.name = 'id_sitehab'
df.reset_index(drop=False,inplace=True)
try:
df.to_sql(
name = 'r_hab_cb',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news codes habitats OK !')
except Exception as e:
print(e)
print('PS : Insert news codes habitats impossible !')
def insert_habPS(df,ps_statut='actif'):
import re
drop_col = ['auteur','structure']
c = ['id_site','date',*drop_col]
cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ]
df = df[cc].copy()
c_nhab = df.columns[df.columns.str.contains('n_hab')]
df[c_nhab] = df[c_nhab].astype(float)
cc = df.columns[(~df.columns.str.contains('hab')) ]
df.set_index([*cc], inplace=True)
# df.set_index(['id_site','auteur','date'], inplace=True)
# for t in df.dtypes[df.dtypes == int].index:
# df[t] = df[t].astype(str)
df = df.stack().reset_index(-1)
df.columns = ['param','value']
df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]]
del df['other']
df['index'] = df['index'].astype(int)
df['value'] = df['value'].replace(['-'],[None])
df.dropna(subset=['value'], inplace=True)
df = df.set_index(['index','param'], append=True).unstack()
df = df.droplevel(0,axis=1)
df.code_hab.replace(['0','None','nan'],[None,None,None], inplace=True)
df.dropna(subset=['code_hab'],inplace=True)
# df['n_hab'] = df['n_hab'].astype(int) # Désactiver pour insertion de données sans n_hab
df.reset_index(inplace=True,drop=False)
df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left')
# del df['id_site'] # id_site inséré dans la table depuis 19-07-2022
# Reclassification des codes habitats / proportions d'habitats
if 'n_hab' in df.columns:
df.sort_values(['id_geom_site','n_hab','index'],ascending=[True,False,True], inplace=True)
else:
df.sort_values(['id_geom_site','index'],ascending=[True,True], inplace=True)
lst = [ list(range(1, df[df.id_geom_site==d].shape[0]+1 )) for d in df.id_geom_site.unique()]
flat_list = [item for sublist in lst for item in sublist]
df['index'] = flat_list
try:
df[df.columns.drop([*drop_col,'code_hab'])].to_sql(
name = 'r_site_habitat',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news attrs habitats OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news attrs habitats impossible !')
return [df,False]
def insert_legendCartoPS(df,ps_statut='actif'):
c = ['id_site','auteur','date','leg_carto'] #,'legende']
cc = df.columns[df.columns.isin(c)]
df = df[cc].copy()
param = pycen.ps.get_listLegendePS()
df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id])
df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left')
del df['id_site']
del df['leg_carto']
try:
df[df.columns.drop('auteur')].to_sql(
name = 'r_site_legcarto',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news legendes OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news legendes impossible !')
return [df,False]
def filter_saisierror(df):
import re
tmp = df.copy()
lst_iderror = []
del tmp['geom']
cc = tmp.columns[(~tmp.columns.str.contains('hab')) ]
tmp.set_index([*cc], inplace=True)
tmp = tmp.stack().reset_index(-1)
tmp.columns = ['param','value']
tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]]
del tmp['other']
tmp['index'] = tmp['index'].astype(int)
tmp['value'] = tmp['value'].replace(['-'],[None])
tmp.dropna(subset=['value'], inplace=True)
tmp = tmp.set_index(['index','param'], append=True).unstack()
tmp = tmp.droplevel(0,axis=1)
tmp.reset_index(inplace=True,drop=False)
lst_error1 = []
if '_hab' in tmp.columns:
tmp.hab_.replace(
['Xerobromenion erecti',
'Mesobromion',
'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',],
['Xerobromion erecti',
'Mesobromion erecti',
'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',],
inplace=True)
for hab in tmp.hab_.sort_values().unique():
print(hab)
print(tmp[tmp.hab_ == hab].code_hab.unique())
for cod in tmp[tmp.hab_ == hab].code_hab.unique():
perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count()
if perc < 0.6:
if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']:
continue
print('\t'+cod + ' : '+ perc.astype(str))
lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()]
df[df.id_site.isin(lst_error1)].to_postgis(
name='incoherence_habVScodehab',
con=con,
schema=schema,
if_exists='replace',
index=False,
# geom_col='geom',
index_label='id_site',
)
if lst_error1:
print('%i'%len(lst_error1))
cb = pycen.ref_hab().get_CB()
tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip()
tmp.code_hab = tmp.code_hab.replace(['0'],[None])
tmp.dropna(subset=['code_hab'], inplace=True)
tmpp = tmp[['id_site','index','code_hab']].copy()
tmpp.set_index(['id_site','index'], inplace=True)
# if any(tmpp.code_hab.str.contains('x|,|&')):
# tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1)
tmpp = split_codehab(tmpp.code_hab)
tmpp.columns = ['id_site','index','code_hab']
tmpp = tmpp[~tmpp.code_hab.isin(cb.id)]
lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()]
if 'n_hab' in tmp.columns:
lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()]
if lst_error2:
print('ERROR code_hab : %i'%len(lst_error2))
df[df.id_site.isin(lst_error2)].to_postgis(
name='PB_codehab_nonPresent_dans_corineBiotope',
con=con,
schema=schema,
if_exists='append',
index=False,
# geom_col='geom',
index_label='id_site',
)
lst_iderror = [*lst_error1,*lst_error2]
lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index
df.drop(lst_iderrorindex,inplace=True)
return df.sort_values('id_site')
def _insert_site_(df,ps_statut='actif'):
df_ins, ins = insert_geom(df)
if ins:
insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur')
df_ins, ins = insert_attrsPS(df,ps_statut)
if ins:
insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur')
df_ins, ins = insert_habPS(df,ps_statut)
if ins:
if df_ins.shape[0] > 4000:
df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy()
df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy()
insert_cbPS(df_ins1)
insert_cbPS(df_ins2)
else:
insert_cbPS(df_ins)
insert_authordata(
df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat',
colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur')
def maj_site(df):
_insert_site_(df)
def new_site(df):
df_ins, ins = insert_site(df)
if ins:
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
_insert_site_(df)
def replace_site(df, cor):
# Insertion des nouveaux codes site
df_ins, ins = insert_site(df)
if ins:
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
# remplissage de la table de correspondance old/new site
cor.to_sql(
name = 'r_site_maj',
con = pycen.con,
schema = 'sites',
if_exists = 'append',
index = False,
method = 'multi'
)
# Insertion de la date de cloture des anciens sites
df_updt = pd.merge(
cor, df[['id_site','date']],
how='left',
left_on='id_site_new',
right_on='id_site')
df_updt = df_updt[['id_site_old','date']]
df_updt.columns = ['id','date_fin']
pycen.update_to_sql(
df = df_updt,
con = pycen.con,
table_name='sites',
schema_name='sites',
key_name='id'
)
# Suite de l'insertion des sites
_insert_site_(df)
def check_overlaps():
'''
Vérification de la superposition des polygons
'''
sql = '''
SELECT v1.site_code site_code_old, v1.geom geom_old, v2.site_code site_code_new, v2.geom geom_new
FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2
WHERE ST_OVERLAPS(v1.geom,v2.geom) = TRUE
AND v1.date_geom < v2.date_geom
AND v1.site_code <> v2.site_code;
'''
res = gpd.read_postgis(sql,con_ps,geom_col='geom_old')
return res
if __name__ == "__main__":
isin_bdd = True
schema = "habitat"
lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"',
'"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"',
'"PS_VERCORS_CEN38_2011"'
]
# from_table = '"cr_ECRIN_habitats_CBNA_2014"'
from_table = None
from_file = 'PS38_modifs_AG_2024.gpkg'
path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/2024/'
# org = from_file.split('/')[1]
tutu = pd.DataFrame()
if from_table is not None:
for table in lst_tab:
sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table)
df = gpd.read_postgis(
sql = sql,
con = con
)
if 'id' in df.columns and 'n_polygone' in df.columns:
df['id'] = df['n_polygone']
del df['n_polygone']
elif 'id' in df.columns and 'polygon' in df.columns:
df['id'] = df['polygon']
del df['polygon']
elif 'id' in df.columns and 'ident' in df.columns:
df['id'] = df['ident']
del df['ident']
df = normalize_colname(df)
if 'remarques' in df.columns and 'legende' not in df.columns:
df['legende'] = df.remarques
# df.rename(columns={'id':'id_site'}, inplace=True)
if table == '"PS_VERCORS_CEN38_2011"':
df = df[df.date != '?'].copy()
# df.legende = format_legende(df.legende)
df = format_date(df)
if table == '"PS_CHAMBARAN_CEN38_2013"':
df.type_patur = normalize_paturage(df.type_patur)
df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True)
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur']
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur']
df['pratiques'] = df.p1
df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2
df.drop(columns=['type_patur','p1','p2'], inplace=True)
df['table_org'] = table[1:-1]
tutu = pd.concat([tutu,df])
if from_file is not None:
table = from_file.split('/')[-1]#.split('.')[0]
df = gpd.read_file(path0+from_file)
df = normalize_colname(df)
df = format_date(df)
df['table_org'] = table
df['structure'] = 'CEN Isère'
# df['type_pat'].replace(['Indéterminé','/'],None,inplace=True)
# df.loc[df.type_pat.notna(),'pratiques'] = \
# df[df.type_pat.notna()].pratiques + ' ' + df[df.type_pat.notna()].type_pat.str.lower()
tutu = pd.concat([tutu,df])
c = ['ident','id_origine','id_site','geom','auteur','hab_1','code_hab1','n_hab1',
'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3',
'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6',
'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8',
'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10',
'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12',
'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14',
'hab_4','code_hab4','n_hab4','pratiques','niv_embrous',
'recouvmnt','remarques','phyto','statut','h_remarq','legende',
'leg_carto','date','annee','%_embrous','%_recouvmnt','surf_emb',
'source','structure',
# 'org','dept','num',
'table_org'
]
if 'ident' in df.columns and 'id_origine' not in df.columns:
tutu.rename(columns={'ident':'id_origine'}, inplace=True)
cols = tutu.columns[tutu.columns.isin(c)]
df = tutu[cols].copy()
df = df.set_geometry('geom',crs=2154)
# df.code_hab3.replace({
# '43.171':'41.171',
# '31.4B':'31.4'
# }, inplace=True)
df = pycen.tools.Polygons_to_MultiPolygon(df)
df_new, df_replace, df_maj = define_siteName(df,rcvmt=10)
df_new_tmp = df_new.copy()
df_new = df_new_tmp[df_new_tmp.id_site.notna()].copy()
if not df_new.empty:
df_new = df_new[~df_new.id_site.isna()].copy()
df_new.sort_index(inplace=True)
df_new['auteur'] = normalize_auteur(df_new.auteur)
df_new['id_origine'] = df_new['id_origine'].astype(str)
df_new = filter_saisierror(df_new)
if not df_maj.empty:
df_maj['auteur'] = normalize_auteur(df_maj.auteur)
id_col = 'id_origine' if 'id_origine' in df_maj.columns else 'id_site'
df_maj['id_origine'] = df_maj[id_col].astype(str)
df_maj = filter_saisierror(df_maj)
if not df_replace['df'].empty:
df_replace['df']['auteur'] = normalize_auteur(df_replace['df']['auteur'])
df_replace['df']['id_origine'] = df_replace['df']['id_origine'].astype(str)
df_replace['df'] = filter_saisierror(df_replace['df'])
# for d in df.dtypes[df.dtypes == object].index:
# # df[d].replace([' '], [' '],regex=True,inplace=True)
# df[d] = df[d].str.normalize('NFKC')
DF = df_maj.copy()
Cnhab = DF.columns[DF.columns.str.startswith('n_hab')]
if not all(DF[[*Cnhab]].astype(float).sum(axis=1) == 100):
print( DF[ ~(DF[[*Cnhab]].astype(float).sum(axis=1) == 100) ] )
# DF.loc[ (~(DF[[*Cnhab]].sum(axis=1) == 100)),'n_hab1' ] = 20
if not df_new.empty:
new_site(df_new)
if not df_maj.empty:
maj_site(df_maj)
if not df_replace['df'].empty:
replace_site(
df = df_replace['df'],
cor = df_replace['cor']
)
exit()
################
## END SCRIPT ##
################
print(df)
while True:
res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n')
if not res[0].upper() in ['Y','N']:
continue
else: break
if res[0].upper() == 'Y':
df_ins, ins = insert_site(df)
if ins:
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
df_ins, ins = insert_geom(df)
if ins:
insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur')
df_ins, ins = insert_attrsPS(df)
if ins:
insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur')
df_ins, ins = insert_habPS(df)
if ins:
if df_ins.shape[0] > 4000:
df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy()
df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy()
insert_cbPS(df_ins1)
insert_cbPS(df_ins2)
else:
insert_cbPS(df_ins)
insert_authordata(
df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat',
colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur')
# df_ins, ins = insert_legendCartoPS(df)%_embrous
# if ins:
# insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur')
sql = """
DELETE FROM sites.sites
WHERE id in (SELECT id_site FROM sites.r_sites_geom where id_lot = 20);
"""
with pycen.con.begin() as cnx:
cnx.execute(sql)