1485 lines
50 KiB
Python
1485 lines
50 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: UTF-8 -*-
|
|
#Nom : : recup_ps.py
|
|
#Description :
|
|
#Copyright : 2021, CEN38
|
|
#Auteur : Colas Geier
|
|
#Version : 1.0
|
|
|
|
# import re
|
|
from contextlib import AsyncExitStack
|
|
from os import register_at_fork
|
|
import types
|
|
from numpy.lib.shape_base import expand_dims
|
|
import pandas as pd
|
|
import geopandas as gpd
|
|
from pandas.io.pytables import incompatibility_doc
|
|
from shapely import wkb
|
|
import numpy as np
|
|
from sqlalchemy.sql.expression import column
|
|
# from pycen import bdd
|
|
from sys import exit
|
|
import pycen
|
|
# from sqlalchemy import create_engine
|
|
# from sqlalchemy.engine import URL
|
|
# from geoalchemy2 import Geometry
|
|
|
|
|
|
|
|
# Parametres bdd IN
|
|
# user = 'cen_admin'
|
|
# pwd = '#CEN38@venir'
|
|
# adr = '192.168.0.189'
|
|
# base = 'bd-cen-38'
|
|
# schema = 'pelouse_seche'
|
|
# # schema = 'habitat'
|
|
# url = URL.create('postgresql+psycopg2',
|
|
# username=user,
|
|
# password=pwd,
|
|
# host=adr,
|
|
# database=base,
|
|
# )
|
|
# con = create_engine(url)
|
|
con = pycen.con_bdcen
|
|
con_ps = pycen.con
|
|
|
|
def get_pers():
|
|
sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1'
|
|
' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme')
|
|
df_pers = pd.read_sql(
|
|
sql = sql,
|
|
con = pycen.con)
|
|
df_pers.replace([None],'',inplace=True)
|
|
df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom
|
|
df_pers['auteur'] = df_pers['auteur'].str.strip()
|
|
df_pers['organisme'] = df_pers['organisme'].str.strip()
|
|
return pycen.pers.get_auteur()
|
|
|
|
|
|
# def get_idgeomsite(lst_site):
|
|
# '''
|
|
# :lst_site: tuple.
|
|
# '''
|
|
# sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 '
|
|
# 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id '
|
|
# 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site)
|
|
# df = pd.read_sql(
|
|
# sql = sql,
|
|
# con = pycen.con)
|
|
# return df
|
|
|
|
def format_legende(leg):
|
|
dic = {
|
|
|
|
}
|
|
df = pd.DataFrame({'leg':leg})
|
|
df.replace(
|
|
['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'],
|
|
['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'],
|
|
regex=True, inplace=True)
|
|
return df
|
|
|
|
def ident_format_date(col):
|
|
return
|
|
|
|
|
|
# "%d/%m/%Y"
|
|
def format_date(df):
|
|
df = df.copy()
|
|
|
|
check_date = [x.startswith('date') or x.endswith('date') for x in df.columns]
|
|
cols_date = df.columns[check_date]
|
|
if 'date' not in df.columns and not cols_date.empty:
|
|
# df[cols_date].replace({'/':None},inplace=True)
|
|
for col in cols_date:
|
|
df[col].replace({'/':None},inplace=True)
|
|
df[col] = pd.to_datetime(df[col])
|
|
# df.loc[df[col].notna(),col] = pd.to_datetime(df[df[col].notna()][col])
|
|
|
|
df['date'] = df[cols_date].max(axis=1)
|
|
|
|
if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"',
|
|
'"PS_CHAMBARAN_CEN38_2013"']:
|
|
df.date = pd.to_datetime(df.date,format='%d/%m/%Y')
|
|
|
|
if table == '"PS_BELLEDONNE_CEN38_2014"':
|
|
df.date = pd.to_datetime(df.date,format='%d%b%Y')
|
|
|
|
if table == '"PS_BIEVRE_CEN38_2016"':
|
|
df.date = df.date.astype(str).replace('\.','',regex=True)
|
|
df.date = df.date.str.rsplit(' ',1).str[0]\
|
|
.replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True)
|
|
df.date = pd.to_datetime(df.date,format='%d %b %Y')
|
|
|
|
if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"':
|
|
# df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int)
|
|
df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str)
|
|
df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date'])
|
|
|
|
if 'annee' in df.columns and 'date' not in df.columns:
|
|
# Si pas de date mais année précisée
|
|
df['annee'] = df['annee'].astype(int)
|
|
df['date'] = df['annee'].astype(str) + '-06-01'
|
|
df.date = pd.to_datetime(df.date,format='%Y-%m-%d')
|
|
elif any(df.date.astype(str).str.len() <= 4):
|
|
# Si dates non-homogènes, cohexistance date&année
|
|
d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique()
|
|
if None in d:
|
|
df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01'
|
|
else:
|
|
df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01'
|
|
df.date = pd.to_datetime(df.date,format='%Y-%m-%d')
|
|
else:
|
|
df.date = pd.to_datetime(df.date)
|
|
|
|
return df
|
|
|
|
|
|
def normalize_auteur(lst_auteur):
|
|
'''
|
|
lst_auteur : pd.Series
|
|
'''
|
|
dic = {
|
|
'Lise Duconte':'DUCONTE Lise',
|
|
'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie',
|
|
'CHABERT Chloé (AVENIR)':'CHABERT Chloé',
|
|
'CHABERT Chloé (AVNEIR)':'CHABERT Chloé',
|
|
'PACHE Gilles (CBNA)':'PACHE Gilles',
|
|
'Gilles PACHE':'PACHE Gilles',
|
|
'JAMEAU Laura (AVENIR)':'JAMEAU Laura',
|
|
'MARCIAU Roger (AVENIR)':'MARCIAU Roger',
|
|
'Roger MARCIAU':'MARCIAU Roger',
|
|
'Aude Massa':'MASSA Aude',
|
|
'Aude MASSA':'MASSA Aude',
|
|
'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse',
|
|
'Romain BARTHELD':'BARTHELD Romain',
|
|
'VILLARET J.C.':'VILLARET Jean-charles',
|
|
'Jean-Charles VILLARET':'VILLARET Jean-charles',
|
|
'Fabien ANTHELME':'ANTHELME Fabien',
|
|
'Jean-Christophe GATTUS':'GATTUS Jean-christophe',
|
|
'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis',
|
|
'Jonathan MALINEAU':'MALINEAU Jonathan',
|
|
'Aurélien DAUTREY':'DAUTREY Aurélien',
|
|
'Etienne MARY':'MARY Etienne',
|
|
'Alix GUEDOU': 'GUEDOU Alix',
|
|
'BEGUIN Lucile': 'BÉGUIN Lucile',
|
|
}
|
|
lst_aut = lst_auteur.copy()
|
|
lst_aut.replace(' & | - ',',',regex=True,inplace=True)
|
|
|
|
if any(lst_aut.str.contains(',')):
|
|
lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1)
|
|
lst_aut.replace(dic,inplace=True)
|
|
|
|
df_aut = lst_aut.str.rsplit(' ',n=1,expand=True)
|
|
if df_aut.shape[1]==2:
|
|
df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower()
|
|
df_aut[0] = df_aut[0].str.upper()
|
|
df_aut[2] = df_aut[0]
|
|
df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1]
|
|
lst_aut = df_aut[2].copy()
|
|
else : lst_aut = df_aut[0].copy()
|
|
aut = lst_aut.unique()
|
|
res = [x for x in aut if x not in [*get_pers().nom_prenom]]
|
|
if res:
|
|
print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res)
|
|
new = new_auteur(res)
|
|
lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True)
|
|
lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x))
|
|
return lst_aut
|
|
|
|
|
|
def new_auteur(new):
|
|
'''
|
|
new : list
|
|
'''
|
|
df = pd.DataFrame({'auteur': new})
|
|
dic = {'aut':{}, 'org':[]}
|
|
|
|
# Pour chaque auteur
|
|
for pers in df.auteur:
|
|
print(pers)
|
|
|
|
while True:
|
|
newaut = input("Insérer un nouvel auteur ? (Y/N)\n")
|
|
if not newaut[0].upper() in ['Y','N']:
|
|
continue
|
|
else: break
|
|
# Si : pas d'insertion d'un nouvel auteur
|
|
if newaut[0].upper() == 'N':
|
|
while True:
|
|
rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n")
|
|
if not rep[0].upper() in ['Y','N']:
|
|
continue
|
|
else: break
|
|
if rep[0].upper() == 'Y':
|
|
while True:
|
|
org = pycen.pers.get_organisme().sort_index().to_dict()['nom']
|
|
reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org))
|
|
if not reporg in [*org.keys()]:
|
|
continue
|
|
else: break
|
|
while True:
|
|
aut = get_pers().sort_values('id')
|
|
aut = aut[aut.id_organisme == reporg]
|
|
aut = aut.nom_prenom.to_dict()
|
|
repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut))
|
|
if not repaut in [*aut.keys()]:
|
|
continue
|
|
else: break
|
|
dic['aut'] = {**dic['aut'],pers:aut[repaut]}
|
|
dic['org'] = [*dic['org'],org[reporg]]
|
|
# Autrement : insertion d'un nouvel auteur
|
|
else:
|
|
while True:
|
|
print("Auteur : %s"%pers)
|
|
rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n")
|
|
if not rep[0].upper() in ['Y','N']:
|
|
continue
|
|
elif rep[0].upper() == 'Y':
|
|
repaut = input("Saisisser un nouveau nom :\n")
|
|
dic['aut'] = {**dic['aut'],pers:repaut}
|
|
pers = repaut
|
|
break
|
|
else:
|
|
dic['aut'] = {**dic['aut'],pers:pers}
|
|
break
|
|
|
|
while True:
|
|
org = pycen.pers.get_organisme().sort_index().to_dict()['nom']
|
|
org = {-1: 'Autre', **org}
|
|
reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org))
|
|
if not reporg in [*org.keys()]:
|
|
continue
|
|
elif reporg == -1:
|
|
reporg = input("Saisisser un nouvel organisme :\n")
|
|
dic['org'] = [*dic['org'],reporg]
|
|
insert_newOrganisme(reporg)
|
|
org = pycen.pers.get_organisme()
|
|
idorg = org[org.nom==reporg].index[0]
|
|
insert_newAuteur(pers,idorg)
|
|
break
|
|
else:
|
|
dic['org'] = [*dic['org'],org[reporg]]
|
|
insert_newAuteur(pers,reporg)
|
|
break
|
|
|
|
return dic
|
|
|
|
|
|
def insert_newAuteur(aut,idorg):
|
|
aut = pd.Series(aut)
|
|
idorg = pd.Series(idorg)
|
|
df = pd.DataFrame({'aut':aut,'id_organisme':idorg})
|
|
# print(df)
|
|
tmp = df.aut.str.rsplit(' ',1,expand=True)
|
|
if tmp.shape[1] == 1:
|
|
df[['nom']] = tmp
|
|
elif tmp.shape[1] == 2:
|
|
df[['nom', 'prenom']] = tmp
|
|
else:
|
|
raise ValueError('NOM Prénom est de longueur inconnu :\ntmp')
|
|
del df['aut']
|
|
try:
|
|
df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema,
|
|
if_exists='append', index=False)
|
|
print("Nouvel auteur inséré : '%s'"%aut)
|
|
except:
|
|
print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut)
|
|
|
|
|
|
|
|
def insert_newOrganisme(nom):
|
|
nom = pd.Series(nom)
|
|
df = pd.DataFrame({'nom':nom})
|
|
try:
|
|
df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema,
|
|
if_exists='append', index=False)
|
|
print("Nouvel organisme inséré : '%s'"%nom)
|
|
except:
|
|
print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom)
|
|
|
|
|
|
|
|
def normalize_colname(df):
|
|
df.columns = df.columns.str.lower()
|
|
df.columns = df.columns.str.replace('__','_',regex=True)
|
|
if df.columns.str.contains('cb_hab').any():
|
|
df.columns = df.columns.str.replace('cb_hab','code_hab',regex=True)
|
|
if df.columns.str.contains('cod_hab').any():
|
|
df.columns = df.columns.str.replace('cod_hab','code_hab',regex=True)
|
|
return df.rename(columns={
|
|
'n_polygone':'ident',
|
|
# 'id_site':'ident',
|
|
'code-ug':'ident',
|
|
'ident_':'ident',
|
|
'id':'ident',
|
|
'idfinal':'id_origine',
|
|
'site_code':'id_site',
|
|
'date_':'date',
|
|
'obs':'auteur',
|
|
'structur':'structure',
|
|
'pratiq_' :'pratiques',
|
|
'pratique' :'pratiques',
|
|
's_p_brous' :'%_embrous',
|
|
's_p_brouss':'%_embrous',
|
|
'taux_embrou':'%_embrous',
|
|
'tx_embrous':'%_embrous',
|
|
'niv__embro':'niv_embrous',
|
|
'niv_embro' :'niv_embrous',
|
|
'niv_embrou' :'niv_embrous',
|
|
'niv_emb' :'niv_embrous',
|
|
'embroussaillement' :'niv_embrous',
|
|
'embrouss' :'niv_embrous',
|
|
'taux_recvmt':'%_recouvmnt',
|
|
'recouvrement':'recouvmnt',
|
|
'recouvreme':'recouvmnt',
|
|
'recouvr':'recouvmnt',
|
|
'recouv' :'recouvmnt',
|
|
'recouvr_' :'recouvmnt',
|
|
'remarque' :'remarques',
|
|
'remarq_' :'remarques',
|
|
'rmq_intere' :'remarques',
|
|
'legendes' :'legende',
|
|
'legend' :'legende',
|
|
'sources' :'source',
|
|
'surf' :'surface',
|
|
'geometry' :'geom',
|
|
})
|
|
|
|
|
|
def get_id_auteur(lst_author):
|
|
sch = 'personnes'
|
|
tabP = 'personne'
|
|
tabO = 'organisme'
|
|
# sql = """
|
|
# WITH str as
|
|
# (SELECT id FROM {sch}.{tabO}
|
|
# WHERE lower(nom) = lower(%(structure)s) OR lower(abbrev) = lower(%(structure)s)),
|
|
# tmp as
|
|
# (SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur, id_organisme FROM {sch}.{tabP})
|
|
# SELECT tmp.* FROM tmp,str WHERE auteur IN %(lst_author)s
|
|
# AND id_organisme IN (str.id) """.format(sch=sch,tabP=tabP,tabO=tabO)
|
|
|
|
sql = """
|
|
WITH tmp AS (
|
|
SELECT
|
|
pers.id,
|
|
TRIM(CONCAT(pers.nom,' ',pers.prenom)) AS auteur,
|
|
pers.id_organisme,
|
|
orga.nom organisme,
|
|
orga.abbrev orga_abbrev
|
|
FROM {sch}.{tab_auth} pers JOIN {sch}.{tab_orga} orga ON pers.id_organisme = orga.id
|
|
)
|
|
SELECT * FROM tmp
|
|
WHERE (auteur,organisme) IN %(lst_author)s
|
|
OR (auteur,orga_abbrev) IN %(lst_author)s
|
|
""".format(sch=sch, tab_auth=tabP, tab_orga=tabO)
|
|
|
|
X = lst_author.to_dict('index')
|
|
XX = [tuple(X[x].values()) for x in X]
|
|
|
|
df = pd.read_sql(
|
|
sql = sql,
|
|
con = pycen.con,
|
|
params = {
|
|
'lst_author': tuple(XX)
|
|
})
|
|
return df
|
|
|
|
|
|
def existing_ps(df):
|
|
gdf = pycen.ps.get_sitesGeom()
|
|
gdf['area'] = gdf.area
|
|
# ovl = gpd.overlay(df,gdf)
|
|
ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference'
|
|
ovl['ovl_area'] = ovl.area
|
|
ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area']
|
|
|
|
return df
|
|
|
|
def define_siteOrg(df):
|
|
gdf = pycen.get_districtNat()[['abrev','geom']]
|
|
tmp = df[['geom']].copy().set_geometry('geom')
|
|
tmp.reset_index(0, inplace=True)
|
|
tmp = gpd.overlay(tmp,gdf, how='intersection')
|
|
tmp['area'] = tmp.area
|
|
tmp.set_index('area',inplace=True)
|
|
tmp.sort_index(inplace=True)
|
|
tmp.drop_duplicates(subset=['index'],keep='last',inplace=True)
|
|
tmp.reset_index(drop=True,inplace=True)
|
|
del tmp['geometry']
|
|
df = df.merge(tmp,how='left',left_index=True,right_on='index')
|
|
del df['index']
|
|
df.rename(columns={'abrev':'org'}, inplace=True)
|
|
return df
|
|
# tmp[tmp.id_site==334]
|
|
|
|
# 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by'
|
|
def ident_newsite(df,rcvmt=10):
|
|
'''
|
|
Identification des nouvelles géometries ou des mises
|
|
à jours des géométries par recouvrement.
|
|
MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0)
|
|
'''
|
|
maj = []
|
|
if 'id_site' in df.columns:
|
|
maj = df[~df.id_site.isna()]
|
|
df = df[df.id_site.isna()]
|
|
if df.empty:
|
|
return df,df,maj
|
|
# Récupération de la couche pelouses_seches en bdd.
|
|
sql = 'SELECT site_code,geom FROM ps.v_pelouseseches'
|
|
v_ps = gpd.read_postgis(sql, pycen.con)
|
|
# Identification des superpositions new_site/old_site
|
|
df_inters = gpd.sjoin(df,v_ps, how='left')
|
|
del df_inters['index_right']
|
|
# news1 = df_inters[df_inters.site_code.isna()].id_origine
|
|
lst_old_site = df_inters.site_code.unique()
|
|
v_ps = v_ps[v_ps.site_code.isin(lst_old_site)].copy()
|
|
v_ps.loc[:,'surf'] = v_ps.area
|
|
# Re-définition des columns géometriques
|
|
# if not isinstance(df,gpd.GeoDataFrame):
|
|
# df.set_geometry('geom', inplace=True, crs=2154)
|
|
# if not isinstance(v_ps,gpd.GeoDataFrame):
|
|
# v_ps.set_geometry('geom', inplace=True, crs=2154)
|
|
# Calcul du recouvrement
|
|
tmp = gpd.overlay(v_ps,df[['id_origine','geom']],how='intersection',make_valid=True,keep_geom_type=False)
|
|
if isinstance(tmp, gpd.GeoDataFrame) and tmp.geometry.name !='geom':
|
|
tmp.rename_geometry('geom',inplace=True)
|
|
tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf']
|
|
# Identification des sites : MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0)
|
|
# Limite recouvrement = 10%
|
|
tmpp = tmp[tmp.perc_rcvmt > rcvmt]
|
|
site_maj = tmpp[
|
|
(~tmpp.id_origine.duplicated(keep=False)) &
|
|
(~tmpp.site_code.duplicated(keep=False))
|
|
].id_origine
|
|
# site_repl = tmp[(tmp.perc_rcvmt > 10)&(tmp.id_origine.duplicated(keep=False))].id_origine.unique()
|
|
# news2 = tmp[(tmp.perc_rcvmt <= 10)&(~tmp.id_origine.isin([*site_maj,*site_repl]))].id_origine
|
|
|
|
# Isolement des sites à mettre à jour
|
|
# maj2 = gpd.sjoin(
|
|
# df[df.id_origine.isin(site_maj)],
|
|
# v_ps.loc[v_ps.site_code.isin(tmpp.site_code),['site_code','geom']]) \
|
|
# .drop(columns=['index_right'])
|
|
maj2 = df[df.id_origine.isin(site_maj)].merge(
|
|
tmpp[['site_code','id_origine']], on=['id_origine'])
|
|
maj2['id_site'] = maj2['site_code']
|
|
del maj2['site_code']
|
|
if maj : df_maj = pd.concat([maj,maj2])
|
|
else : df_maj = maj2
|
|
# Isolement des nouveaux sites
|
|
orig_maj_all = df_maj.id_origine
|
|
id_maj_all = df_maj.id_site
|
|
df_new = df[~df.id_origine.isin(orig_maj_all)]
|
|
# Isolement des correspondance new_site / old_site_toClose
|
|
df_cor = tmpp.loc[
|
|
(~tmpp.site_code.isin(id_maj_all))&(~tmpp.id_origine.isin(orig_maj_all)),
|
|
['site_code','id_origine']
|
|
]
|
|
df_cor.rename(columns={'site_code':'id_site_old'}, inplace=True)
|
|
|
|
return df_new, df_cor, df_maj
|
|
|
|
|
|
|
|
def define_siteName(df,rcvmt=10):
|
|
'''
|
|
Return df_new, df_replace, df_maj
|
|
'''
|
|
dfnew, dfcor, df_maj = ident_newsite(df,rcvmt)
|
|
df2 = df[~df.index.isin(df_maj.index)].copy()
|
|
if not df2.empty:
|
|
df2 = define_siteOrg(dfnew.copy())
|
|
df2['dept'] = '38'
|
|
# df2['num'] = None
|
|
if 'org' in df2.columns:
|
|
for org in df2.org.unique():
|
|
tmp = pd.concat([
|
|
pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu),
|
|
pycen.sites.get_sitesInfos(milieu='Trame thermophiles')
|
|
])
|
|
# tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0])
|
|
tmp = tmp[tmp.org == org]
|
|
if not tmp.empty:
|
|
num = tmp.num.astype(int).max()
|
|
seq = range(num+1, num + df2.loc[df2.org==org].shape[0]+1)
|
|
df2.loc[df2.org==org,['num']] = list(seq)
|
|
else:
|
|
seq = range(1, df2.loc[df2.org==org].shape[0]+1)
|
|
df2.loc[df2.org==org,['num']] = list(seq)
|
|
else:
|
|
tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu)
|
|
|
|
df2.loc[~df2.num.isna(),'num'] = df2[~df2.num.isna()].num.astype(int).astype(str)
|
|
df2.loc[df2.num.str.len() == 1,'num'] = '000'+ df2.loc[df2.num.str.len() == 1,'num']
|
|
df2.loc[df2.num.str.len() == 2,'num'] = '00' + df2.loc[df2.num.str.len() == 2,'num']
|
|
df2.loc[df2.num.str.len() == 3,'num'] = '0' + df2.loc[df2.num.str.len() == 3,'num']
|
|
df2['id_site'] = df2['dept'] + df2['org'] + df2['num']
|
|
|
|
# Jointure id_site_new / id_site_old
|
|
df_new = df2[~df2.id_origine.isin(dfcor.id_origine)]
|
|
df_rep = df2[df2.id_origine.isin(dfcor.id_origine)]
|
|
df_cor = pd.merge(dfcor,df2[['id_origine','id_site']], on='id_origine')
|
|
|
|
|
|
df_cor.drop(columns=['id_origine'], inplace=True)
|
|
df_cor.rename(columns={'id_site':'id_site_new'}, inplace=True)
|
|
df_replace = {
|
|
'df' : df_rep,
|
|
'cor': df_cor
|
|
}
|
|
else:
|
|
df_new = dfnew
|
|
df_replace = {'df' : dfcor}
|
|
return df_new, df_replace, df_maj
|
|
|
|
|
|
|
|
def normalize_formatIdsite(id_site):
|
|
import re
|
|
df = pd.DataFrame({'id_site':id_site})
|
|
df['dept'] = '38'
|
|
df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]]
|
|
del df['other']
|
|
df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num']
|
|
df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num']
|
|
df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num']
|
|
df['id_site'] = df['dept'] + df['org'] + df['num']
|
|
return df.id_site
|
|
|
|
|
|
def normalize_paturage(pat):
|
|
return pat.str.lower().replace(
|
|
['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'],
|
|
['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True)
|
|
|
|
|
|
def normalize_pratiques(prat):
|
|
dic = {
|
|
'pâturage equin':'pâturage équin',
|
|
'pâturage bovin':'pâturage bovin',
|
|
'paturâge cervidé':'pâturage cervidé',
|
|
'paturâge anin':'pâturage asin',
|
|
'broyer': 'broyage',
|
|
'truffier': 'broyage',
|
|
'plantation': 'broyage',
|
|
'culture' : 'labourage',
|
|
'abandon recent': 'abandon',
|
|
'abandon verger': 'abandon',
|
|
'remblais et feu' : 'incertain',
|
|
'remblais, feu' : 'incertain',
|
|
'caprin': 'pâturage caprin',
|
|
'ovin': 'pâturage ovin',
|
|
'équin': 'pâturage équin',
|
|
'equin': 'pâturage équin',
|
|
'bovin': 'pâturage bovin',
|
|
'paturage': 'pâturage',
|
|
'paturâge': 'pâturage',
|
|
'pâturage difficile': 'pâturage',
|
|
'f' : 'fauche',
|
|
'p' : 'pâturage',
|
|
'0' : 'N.D',
|
|
'i' : 'incertain',
|
|
'a' : 'abandon',
|
|
'pv': 'pâturage',
|
|
'b' : 'broyage',
|
|
'pc': 'pâturage caprin',
|
|
'po': 'pâturage ovin',
|
|
'pe': 'pâturage équin',
|
|
'pb': 'pâturage bovin',
|
|
'-' : 'N.D',
|
|
'0' : 'N.D',
|
|
'ND' : 'N.D',
|
|
'n.d' : 'N.D',
|
|
'chemin carrossable non goudronné' : 'N.D',
|
|
'plantation de châtaigniers' : 'broyage',
|
|
}
|
|
p = prat.replace(['.et','/'],[',',', '],regex=True).copy()
|
|
p = p.str.lower()
|
|
p = normalize_paturage(p)
|
|
p = p.replace(dic)
|
|
p.fillna('N.D', inplace=True)
|
|
p[p.str.contains('\?| ou ',na=False)] = 'incertain'
|
|
pp = p.str.split(',',expand=True)
|
|
pp.columns = 'p' + pp.columns.astype(str)
|
|
for c in pp.columns:
|
|
pp[c] = pp[c].str.strip()
|
|
pp[c] = pp[c].replace(dic)
|
|
return pp
|
|
|
|
|
|
def select_ID(df,sch,tab):
|
|
df = df.copy().dropna(axis=1,how='any')
|
|
pk = pycen.tools.__get_pkey__(pycen.con,tab,sch)
|
|
if pk['constrained_columns']:
|
|
pkey = pk['constrained_columns']
|
|
else : pkey = []
|
|
sql = 'SELECT %s FROM %s.%s'%(','.join([*pkey,*df.columns]),sch,tab)
|
|
df.replace("'","''",regex=True,inplace=True)
|
|
if 'geom' in df.columns:
|
|
del df['geom']
|
|
if 'date' in df.columns:
|
|
df['date'] = df['date'].astype(str)
|
|
elif 'date_deb' in df.columns:
|
|
df['date_deb'] = df['date_deb'].astype(str)
|
|
if 'id_cb' in df.columns:
|
|
df['id_cb'] = df['id_cb'].astype(str)
|
|
column = '(%s)'%','.join(df.columns)
|
|
df_tuple = tuple(df.itertuples(index=False, name=None))
|
|
sql = sql + ' WHERE %s IN %s'%(column,df_tuple)
|
|
if 'nan' in sql: sql = sql.replace('nan','NULL')
|
|
if ',)' in sql: sql = sql.replace(',)',')')
|
|
if '"' in sql: sql = sql.replace('"',"'")
|
|
|
|
# print(sql)
|
|
return pd.read_sql(
|
|
sql=sql,
|
|
con=pycen.con)
|
|
|
|
|
|
def insertAutorAttrs(df,colname_rsite, sch, tab):
|
|
df = df.copy()
|
|
df = df[['id','auteur','structure']].copy()
|
|
# structure = [df.structure.unique()]
|
|
# lst_author = df[['auteur','structure']].drop_duplicates()
|
|
if any(df.auteur.str.contains(',')):
|
|
tmp = df.auteur.str.split(',',expand=True)\
|
|
.stack().droplevel(-1)\
|
|
.str.strip()
|
|
tmp = pd.DataFrame({'auteur':tmp})
|
|
df = pd.merge(df[['id','structure']],tmp,how='left',left_index=True,right_index=True)
|
|
lst_author = df[['auteur','structure']]\
|
|
.drop_duplicates()\
|
|
.reset_index(drop=True)
|
|
df_autor = get_id_auteur(lst_author=lst_author)
|
|
for r,row in df_autor.iterrows():
|
|
df.loc[
|
|
(df.auteur.str.contains(row.auteur)) &
|
|
((df.structure==row.organisme) | (df.structure==row.orga_abbrev)),
|
|
'auteur'] = row.id
|
|
del df['structure']
|
|
# df.replace([*df_autor.auteur],[*df_autor.id], inplace=True)
|
|
df.columns = [colname_rsite, 'id_auteur']
|
|
df.drop_duplicates(inplace=True)
|
|
try :
|
|
df.to_sql(
|
|
name=tab,
|
|
con=pycen.con,
|
|
schema=sch,
|
|
index=False,
|
|
if_exists='append',
|
|
method='multi'
|
|
)
|
|
print('Insert author sites data OK !')
|
|
return 'Insert author sites data OK !'
|
|
except Exception as e:
|
|
print('ERROR : News author insert impossible !')
|
|
print(e)
|
|
|
|
|
|
def _altertype(df1,df2,con=None,table_name=None, schema=None):
|
|
if con and table_name and schema:
|
|
tp = pycen.tools.__get_dtype__(con,table_name, schema)
|
|
same_col = df1.columns[df1.columns.isin(df2.columns)]
|
|
if 'date' in df2.columns:
|
|
df2['date'] = df2['date'].astype(str)
|
|
if 'geom' in same_col:
|
|
same_col = same_col.drop('geom')
|
|
for col in same_col:
|
|
if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int:
|
|
if df1.shape[0] == df2.shape[0]:
|
|
df1[col] = df1[col].astype(int)
|
|
else :
|
|
df2[col] = df2[col].astype(float)
|
|
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float:
|
|
df1[col] = df1[col].astype(float)
|
|
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str:
|
|
df1[col] = df1[col].astype(str)
|
|
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == object:
|
|
df1[col] = df1[col].astype(str)
|
|
return [df1,df2]
|
|
|
|
|
|
def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur):
|
|
df_ins = df_ins.copy()
|
|
col_auth = ['auteur','structure']
|
|
if 'date' in df_ins.columns:
|
|
df_ins['date'] = df_ins['date'].astype(str)
|
|
elif 'date_deb' in df_ins.columns:
|
|
df_ins['date_deb'] = df_ins['date_deb'].astype(str)
|
|
|
|
tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema)
|
|
tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema)
|
|
if tab_data in tab_sit:
|
|
sch = pycen.sites.schema
|
|
elif tab_data in tab_ps:
|
|
sch = pycen.ps.schema
|
|
|
|
# ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data)
|
|
ids = select_ID(df_ins[[id_data]],sch,tab_data)
|
|
if 'id_site' in ids.columns:
|
|
if tab_data == 'r_sites_geom': col = 'id'
|
|
else: col = 'id_geom_site'
|
|
tp = ids.groupby(['id_site'])[col].max()
|
|
ids = ids[ids[col].isin(tp)]
|
|
# join_on = [id_data]
|
|
same_col = df_ins.columns[df_ins.columns.isin(ids.columns)]
|
|
if len(same_col) > 1 and ids[same_col].duplicated().any():
|
|
raise ValueError('Doublon détecté dans la jointure [df_ins VS ids] !')
|
|
if 'geom' in same_col:
|
|
same_col = same_col.drop('geom')
|
|
join_on = pd.Series([*same_col,id_data]).unique()
|
|
# if df_ins.shape[0] != ids.shape[0]:
|
|
# join_on = same_col
|
|
if 'date' in ids.columns:
|
|
ids['date'] = ids['date'].astype(str)
|
|
df_ins,ids = _altertype(df_ins,ids)
|
|
|
|
if tab_data != 'sites':
|
|
# df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*same_col], how='left') # A REVOIR
|
|
df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*join_on], how='left') # A REVOIR
|
|
# df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=['id_site'], how='left') # A REVOIR
|
|
insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur)
|
|
|
|
|
|
|
|
def insert_site(df):
|
|
df = df.copy()
|
|
if 'nom' not in df.columns:
|
|
df['nom'] = 'Inconnu'
|
|
if 'type_site' not in df.columns:
|
|
df['type_site'] = 'N.D.'
|
|
if 'type_milieu' not in df.columns:
|
|
df['type_milieu'] = 'Pelouses sèches'
|
|
# if 'remarques' not in df.columns:
|
|
# df['remarques'] = None
|
|
|
|
df = df[['id_site','nom','date','auteur','type_site','type_milieu',
|
|
'structure'
|
|
# 'remarques'
|
|
]]
|
|
drop_col = ['auteur','structure']
|
|
typs = pycen.sites._get_typ_site()
|
|
typm = pycen.sites._get_typ_milieux()
|
|
|
|
for t in df.type_milieu.unique():
|
|
df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0]
|
|
for s in df.type_site.unique():
|
|
df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0]
|
|
|
|
df['id_type_milieu'] = df['id_type_milieu'].astype(int)
|
|
# df['id_type_site'] = df['id_type_site'].astype(str)
|
|
|
|
df['dept'] = df.id_site.str[:2]
|
|
df['org'] = df.id_site.str[2:-4]
|
|
df['num'] = df.id_site.str[-4:]
|
|
df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True)
|
|
|
|
columns = [col['name'] for col in pycen.sites.columns_sitetab] + drop_col
|
|
c = df.columns[df.columns.isin(columns)]
|
|
df = df[c]
|
|
print(df)
|
|
|
|
try:
|
|
df[df.columns.drop(drop_col)].to_sql(
|
|
name = 'sites',
|
|
con = pycen.con,
|
|
schema = pycen.sites.schema,
|
|
if_exists = 'append',
|
|
index = False,
|
|
method = 'multi'
|
|
)
|
|
print('Insert news sites OK !')
|
|
return [df,True]
|
|
except Exception as e:
|
|
print(e)
|
|
print('PS : Insert news sites impossible !')
|
|
return [df,False]
|
|
|
|
|
|
def check_org_lot(lot):
|
|
"""
|
|
:lot: list. liste des lots
|
|
|
|
return
|
|
-------
|
|
DataFrame des id_lot de la liste lot.
|
|
"""
|
|
tab_lot = pycen.sites._get_lots()
|
|
not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ]
|
|
if not_intab:
|
|
df = pd.DataFrame({'libelle':not_intab})
|
|
df.to_sql(
|
|
name='lots',
|
|
con=pycen.con,
|
|
schema='sites',
|
|
index=False,
|
|
if_exists='append',
|
|
method='multi',
|
|
)
|
|
tab_lot = pycen.sites._get_lots()
|
|
tab_lot = tab_lot[tab_lot.libelle.isin(lot)]
|
|
return tab_lot[['id_lot','libelle']]
|
|
|
|
|
|
|
|
def insert_geom(df):
|
|
drop_col = ['auteur','structure']
|
|
dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}
|
|
c = ['id_site','geom','date','remarques','legende','table_org','id_origine',*drop_col]
|
|
cc = df.columns[df.columns.isin(c)]
|
|
df = df[cc].copy()
|
|
df.set_geometry('geom', inplace=True)
|
|
|
|
colindict = [ *df.columns[df.columns.isin(dic.keys())] ]
|
|
if colindict:
|
|
df.rename(
|
|
columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'},
|
|
inplace=True,
|
|
)
|
|
if 'table_org' in df.columns:
|
|
tab = [*df.table_org.unique()]
|
|
lots = check_org_lot(tab)
|
|
df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left')
|
|
del df['table_org']
|
|
del df['libelle']
|
|
|
|
try:
|
|
df[df.columns.drop(drop_col)].to_postgis(
|
|
name = 'r_sites_geom',
|
|
con = pycen.con,
|
|
schema = pycen.sites.schema,
|
|
if_exists = 'append',
|
|
index = False,
|
|
# geom_col = 'geom'
|
|
)
|
|
print('Insert news geom OK !')
|
|
except Exception as e:
|
|
print(e)
|
|
print('PS : Insert news geom impossible !')
|
|
return [df,False]
|
|
|
|
return [df,True]
|
|
|
|
|
|
def get_idGeomSite(lst_site,ps_statut='actif'):
|
|
'''
|
|
:lst_site: list or pd.Series .
|
|
:ps_statut: str. ['actif', 'history', 'all'].
|
|
Statut des sites à récupérer,
|
|
'actif'; Date_fin IS NULL
|
|
'history'; A une Date_fin
|
|
'all'; Tous les Sites
|
|
'''
|
|
id_geom = pycen.ps.get_sitesGeom(
|
|
id_site=[*lst_site], statut=ps_statut)
|
|
tp = id_geom.groupby('id_site').id.max()
|
|
id_geom = id_geom[id_geom.id.isin(tp)]
|
|
if not id_geom.empty:
|
|
id_geom = id_geom[['id','id_site']]
|
|
id_geom.columns = ['id_geom_site','id_site']
|
|
return id_geom
|
|
|
|
|
|
def insert_attrsPS(df,ps_statut='actif'):
|
|
params = []
|
|
drop_col = ['auteur','structure']
|
|
|
|
for p in ['%_embrous','niv_embrous','%_recouvmnt','recouvmnt']:
|
|
if p in df.columns:
|
|
params = [*params, p]
|
|
c = ['id_site','date','pratiques',*params,*drop_col]
|
|
|
|
for p in ['statut','pratiques']:
|
|
if p in df.columns:
|
|
c = [*c, p]
|
|
cc = df.columns[df.columns.isin(c)]
|
|
df = df[cc].copy()
|
|
|
|
if 'pratiques' in df.columns and not all(df.pratiques.isna()):
|
|
df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns')
|
|
del df['pratiques']
|
|
|
|
for col in params:
|
|
df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True)
|
|
|
|
if 'niv_embrous' in df.columns:
|
|
df.niv_embrous.replace(['nan'],['N.D'],inplace=True)
|
|
|
|
if 'recouvmnt' in df.columns:
|
|
df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True)
|
|
|
|
if 'statut' in df.columns:
|
|
df.statut.replace(['ND'],['N.D'],inplace=True)
|
|
# for t in df.dtypes[df.dtypes == int].index:
|
|
# df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str)
|
|
taux = False
|
|
if '%_embrous' in df.columns:
|
|
taux = True
|
|
df.niv_embrous = df.niv_embrous + ';' + df['%_embrous']
|
|
del df['%_embrous']
|
|
if '%_recouvmnt' in df.columns:
|
|
taux = True
|
|
df.recouvmnt = df.recouvmnt + ';' + df['%_recouvmnt']
|
|
del df['%_recouvmnt']
|
|
df.set_index(['id_site','date',*drop_col], inplace=True)
|
|
df = df.stack().reset_index(-1)
|
|
df.columns = ['type','param']
|
|
df.loc[df.type.str.len() < 4,'type'] = 'pratique'
|
|
if taux:
|
|
df[['param','taux']] = df.param.str.split(';',expand=True)
|
|
df.taux.replace(['nan','None'],[None,None],inplace=True)
|
|
|
|
df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True)
|
|
df.param.replace({'ND':'N.D'}, inplace=True)
|
|
df.reset_index(inplace=True,drop=False)
|
|
|
|
param = pycen.ps.get_param()
|
|
typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param')
|
|
for t in df.type.unique():
|
|
tmp = param[param.type == t]
|
|
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id])
|
|
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description], [*tmp.id])
|
|
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description.str.lower()], [*tmp.id])
|
|
df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True)
|
|
df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left')
|
|
# del df['id_site'] # id_site inséré dans la table depuis 19-07-2022
|
|
del df['type']
|
|
df.rename(columns={'param':'id_param'}, inplace=True)
|
|
|
|
# df.loc[df.id_param=='None','id_param'] = None
|
|
df.replace({'None':None}, inplace=True)
|
|
df.dropna(subset=['id_param'], inplace=True)
|
|
try:
|
|
df[df.columns.drop(drop_col)].to_sql(
|
|
name = 'r_site_param',
|
|
con = pycen.con,
|
|
schema = pycen.ps.schema,
|
|
if_exists = 'append',
|
|
index = False,
|
|
method = 'multi'
|
|
)
|
|
print('Insert news attrs OK !')
|
|
return [df,True]
|
|
except Exception as e:
|
|
print(e)
|
|
print('PS : Insert news attrs impossible !')
|
|
return [df,False]
|
|
|
|
|
|
def split_codehab(lst_codehab):
|
|
lst = lst_codehab.copy()
|
|
lst = lst.str.split('x|X|,|&|/|\(|et',expand=True) \
|
|
.stack().str.strip() \
|
|
.str.replace('\)','',regex=True)\
|
|
.droplevel(-1).reset_index(drop=False)
|
|
return lst
|
|
|
|
|
|
def format_codehab2insert(lst_codehab):
|
|
lst = lst_codehab.copy()
|
|
lst.replace('x|X|,|et','&',regex=True, inplace=True)
|
|
lst.replace('&','&;',regex=True, inplace=True)
|
|
lst.replace('/','/;',regex=True, inplace=True)
|
|
lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1)
|
|
lst.columns = ['ordre', 'code_hab']
|
|
lst.ordre = lst.ordre + 1
|
|
lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&'
|
|
lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/'
|
|
lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip()
|
|
lst.reset_index(drop=False, inplace=True)
|
|
|
|
suspect = lst.code_hab.str.split('(',expand=True)
|
|
if suspect.shape[1] > 1:
|
|
suspect[0] = suspect[0].str.strip()
|
|
suspect[1] = suspect[1].str.replace('\)','',regex=True)
|
|
suspect.columns = ['code_hab','suspect']
|
|
del lst['code_hab']
|
|
# raise Exception('habitat suspecté en développement')
|
|
else:
|
|
suspect.columns = ['code_hab']
|
|
del suspect['code_hab']
|
|
lst = pd.merge(lst,suspect,right_index=True,left_index=True)
|
|
lst.set_index('id', inplace=True)
|
|
|
|
return lst
|
|
|
|
|
|
def insert_cbPS(df):
|
|
drop_col = ['auteur','structure']
|
|
df = df.copy()
|
|
df.date = df.date.astype(str)
|
|
df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip()
|
|
df.code_hab = df.code_hab.replace(['None','nan','0'],[None,None,None])
|
|
df.dropna(subset=['code_hab'],inplace=True)
|
|
ids = select_ID(df[df.columns.drop(['code_hab',*drop_col])],sch=pycen.ps.schema,tab='r_site_habitat')
|
|
same_col = df.columns[df.columns.isin(ids.columns)]
|
|
if ids[same_col].duplicated().any():
|
|
raise ValueError('Doublon détecté dans la jointure [df VS ids] !')
|
|
df,ids = _altertype(df,ids)
|
|
df = df.merge(ids, on=[*same_col], how='left')
|
|
df = df[['id', 'code_hab']].copy()
|
|
df.set_index('id',inplace=True)
|
|
# df = df.code_hab.str.split('x|,|&',expand=True) \
|
|
# .stack().str.strip() \
|
|
# .droplevel(-1).reset_index(drop=False)
|
|
# df = split_codehab(df.code_hab)
|
|
# df.columns = ['id_sitehab', 'code_hab']
|
|
df = format_codehab2insert(df.code_hab)
|
|
df.index.name = 'id_sitehab'
|
|
df.reset_index(drop=False,inplace=True)
|
|
|
|
try:
|
|
df.to_sql(
|
|
name = 'r_hab_cb',
|
|
con = pycen.con,
|
|
schema = pycen.ps.schema,
|
|
if_exists = 'append',
|
|
index = False,
|
|
method = 'multi'
|
|
)
|
|
print('Insert news codes habitats OK !')
|
|
except Exception as e:
|
|
print(e)
|
|
print('PS : Insert news codes habitats impossible !')
|
|
|
|
|
|
def insert_habPS(df,ps_statut='actif'):
|
|
import re
|
|
drop_col = ['auteur','structure']
|
|
c = ['id_site','date',*drop_col]
|
|
cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ]
|
|
df = df[cc].copy()
|
|
c_nhab = df.columns[df.columns.str.contains('n_hab')]
|
|
df[c_nhab] = df[c_nhab].astype(float)
|
|
cc = df.columns[(~df.columns.str.contains('hab')) ]
|
|
df.set_index([*cc], inplace=True)
|
|
# df.set_index(['id_site','auteur','date'], inplace=True)
|
|
# for t in df.dtypes[df.dtypes == int].index:
|
|
# df[t] = df[t].astype(str)
|
|
df = df.stack().reset_index(-1)
|
|
df.columns = ['param','value']
|
|
df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]]
|
|
del df['other']
|
|
df['index'] = df['index'].astype(int)
|
|
df['value'] = df['value'].replace(['-'],[None])
|
|
df.dropna(subset=['value'], inplace=True)
|
|
df = df.set_index(['index','param'], append=True).unstack()
|
|
df = df.droplevel(0,axis=1)
|
|
df.code_hab.replace(['0','None','nan'],[None,None,None], inplace=True)
|
|
df.dropna(subset=['code_hab'],inplace=True)
|
|
# df['n_hab'] = df['n_hab'].astype(int) # Désactiver pour insertion de données sans n_hab
|
|
df.reset_index(inplace=True,drop=False)
|
|
df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left')
|
|
# del df['id_site'] # id_site inséré dans la table depuis 19-07-2022
|
|
|
|
# Reclassification des codes habitats / proportions d'habitats
|
|
if 'n_hab' in df.columns:
|
|
df.sort_values(['id_geom_site','n_hab','index'],ascending=[True,False,True], inplace=True)
|
|
else:
|
|
df.sort_values(['id_geom_site','index'],ascending=[True,True], inplace=True)
|
|
lst = [ list(range(1, df[df.id_geom_site==d].shape[0]+1 )) for d in df.id_geom_site.unique()]
|
|
flat_list = [item for sublist in lst for item in sublist]
|
|
df['index'] = flat_list
|
|
|
|
try:
|
|
df[df.columns.drop([*drop_col,'code_hab'])].to_sql(
|
|
name = 'r_site_habitat',
|
|
con = pycen.con,
|
|
schema = pycen.ps.schema,
|
|
if_exists = 'append',
|
|
index = False,
|
|
method = 'multi'
|
|
)
|
|
print('Insert news attrs habitats OK !')
|
|
return [df,True]
|
|
except Exception as e:
|
|
print(e)
|
|
print('PS : Insert news attrs habitats impossible !')
|
|
return [df,False]
|
|
|
|
|
|
def insert_legendCartoPS(df,ps_statut='actif'):
|
|
c = ['id_site','auteur','date','leg_carto'] #,'legende']
|
|
cc = df.columns[df.columns.isin(c)]
|
|
df = df[cc].copy()
|
|
param = pycen.ps.get_listLegendePS()
|
|
df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id])
|
|
df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left')
|
|
del df['id_site']
|
|
del df['leg_carto']
|
|
|
|
try:
|
|
df[df.columns.drop('auteur')].to_sql(
|
|
name = 'r_site_legcarto',
|
|
con = pycen.con,
|
|
schema = pycen.ps.schema,
|
|
if_exists = 'append',
|
|
index = False,
|
|
method = 'multi'
|
|
)
|
|
print('Insert news legendes OK !')
|
|
return [df,True]
|
|
except Exception as e:
|
|
print(e)
|
|
print('PS : Insert news legendes impossible !')
|
|
return [df,False]
|
|
|
|
|
|
def filter_saisierror(df):
|
|
import re
|
|
tmp = df.copy()
|
|
lst_iderror = []
|
|
del tmp['geom']
|
|
cc = tmp.columns[(~tmp.columns.str.contains('hab')) ]
|
|
tmp.set_index([*cc], inplace=True)
|
|
tmp = tmp.stack().reset_index(-1)
|
|
tmp.columns = ['param','value']
|
|
tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]]
|
|
del tmp['other']
|
|
tmp['index'] = tmp['index'].astype(int)
|
|
tmp['value'] = tmp['value'].replace(['-'],[None])
|
|
tmp.dropna(subset=['value'], inplace=True)
|
|
tmp = tmp.set_index(['index','param'], append=True).unstack()
|
|
tmp = tmp.droplevel(0,axis=1)
|
|
tmp.reset_index(inplace=True,drop=False)
|
|
lst_error1 = []
|
|
if '_hab' in tmp.columns:
|
|
tmp.hab_.replace(
|
|
['Xerobromenion erecti',
|
|
'Mesobromion',
|
|
'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',],
|
|
['Xerobromion erecti',
|
|
'Mesobromion erecti',
|
|
'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',],
|
|
inplace=True)
|
|
|
|
for hab in tmp.hab_.sort_values().unique():
|
|
print(hab)
|
|
print(tmp[tmp.hab_ == hab].code_hab.unique())
|
|
for cod in tmp[tmp.hab_ == hab].code_hab.unique():
|
|
perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count()
|
|
if perc < 0.6:
|
|
if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']:
|
|
continue
|
|
print('\t'+cod + ' : '+ perc.astype(str))
|
|
lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()]
|
|
df[df.id_site.isin(lst_error1)].to_postgis(
|
|
name='incoherence_habVScodehab',
|
|
con=con,
|
|
schema=schema,
|
|
if_exists='replace',
|
|
index=False,
|
|
# geom_col='geom',
|
|
index_label='id_site',
|
|
)
|
|
if lst_error1:
|
|
print('%i'%len(lst_error1))
|
|
|
|
|
|
cb = pycen.ref_hab().get_CB()
|
|
tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip()
|
|
tmp.code_hab = tmp.code_hab.replace(['0'],[None])
|
|
tmp.dropna(subset=['code_hab'], inplace=True)
|
|
tmpp = tmp[['id_site','index','code_hab']].copy()
|
|
tmpp.set_index(['id_site','index'], inplace=True)
|
|
# if any(tmpp.code_hab.str.contains('x|,|&')):
|
|
# tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1)
|
|
tmpp = split_codehab(tmpp.code_hab)
|
|
tmpp.columns = ['id_site','index','code_hab']
|
|
tmpp = tmpp[~tmpp.code_hab.isin(cb.id)]
|
|
lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()]
|
|
if 'n_hab' in tmp.columns:
|
|
lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()]
|
|
|
|
if lst_error2:
|
|
print('ERROR code_hab : %i'%len(lst_error2))
|
|
df[df.id_site.isin(lst_error2)].to_postgis(
|
|
name='PB_codehab_nonPresent_dans_corineBiotope',
|
|
con=con,
|
|
schema=schema,
|
|
if_exists='append',
|
|
index=False,
|
|
# geom_col='geom',
|
|
index_label='id_site',
|
|
)
|
|
|
|
lst_iderror = [*lst_error1,*lst_error2]
|
|
lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index
|
|
df.drop(lst_iderrorindex,inplace=True)
|
|
return df.sort_values('id_site')
|
|
|
|
def _insert_site_(df,ps_statut='actif'):
|
|
df_ins, ins = insert_geom(df)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur')
|
|
|
|
df_ins, ins = insert_attrsPS(df,ps_statut)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur')
|
|
|
|
df_ins, ins = insert_habPS(df,ps_statut)
|
|
if ins:
|
|
if df_ins.shape[0] > 4000:
|
|
df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy()
|
|
df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy()
|
|
insert_cbPS(df_ins1)
|
|
insert_cbPS(df_ins2)
|
|
else:
|
|
insert_cbPS(df_ins)
|
|
insert_authordata(
|
|
df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat',
|
|
colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur')
|
|
|
|
|
|
def maj_site(df):
|
|
_insert_site_(df)
|
|
|
|
|
|
def new_site(df):
|
|
df_ins, ins = insert_site(df)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
|
|
|
|
_insert_site_(df)
|
|
|
|
|
|
def replace_site(df, cor):
|
|
# Insertion des nouveaux codes site
|
|
df_ins, ins = insert_site(df)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
|
|
# remplissage de la table de correspondance old/new site
|
|
cor.to_sql(
|
|
name = 'r_site_maj',
|
|
con = pycen.con,
|
|
schema = 'sites',
|
|
if_exists = 'append',
|
|
index = False,
|
|
method = 'multi'
|
|
)
|
|
# Insertion de la date de cloture des anciens sites
|
|
df_updt = pd.merge(
|
|
cor, df[['id_site','date']],
|
|
how='left',
|
|
left_on='id_site_new',
|
|
right_on='id_site')
|
|
df_updt = df_updt[['id_site_old','date']]
|
|
df_updt.columns = ['id','date_fin']
|
|
pycen.update_to_sql(
|
|
df = df_updt,
|
|
con = pycen.con,
|
|
table_name='sites',
|
|
schema_name='sites',
|
|
key_name='id'
|
|
)
|
|
# Suite de l'insertion des sites
|
|
_insert_site_(df)
|
|
|
|
def check_overlaps():
|
|
'''
|
|
Vérification de la superposition des polygons
|
|
'''
|
|
sql = '''
|
|
SELECT v1.site_code site_code_old, v1.geom geom_old, v2.site_code site_code_new, v2.geom geom_new
|
|
FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2
|
|
WHERE ST_OVERLAPS(v1.geom,v2.geom) = TRUE
|
|
AND v1.date_geom < v2.date_geom
|
|
AND v1.site_code <> v2.site_code;
|
|
'''
|
|
res = gpd.read_postgis(sql,con_ps,geom_col='geom_old')
|
|
return res
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
isin_bdd = True
|
|
schema = "habitat"
|
|
lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"',
|
|
'"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"',
|
|
'"PS_VERCORS_CEN38_2011"'
|
|
]
|
|
# from_table = '"cr_ECRIN_habitats_CBNA_2014"'
|
|
from_table = None
|
|
from_file = 'PS38_modifs_AG_2024.gpkg'
|
|
path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/2024/'
|
|
|
|
# org = from_file.split('/')[1]
|
|
tutu = pd.DataFrame()
|
|
if from_table is not None:
|
|
for table in lst_tab:
|
|
sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table)
|
|
df = gpd.read_postgis(
|
|
sql = sql,
|
|
con = con
|
|
)
|
|
if 'id' in df.columns and 'n_polygone' in df.columns:
|
|
df['id'] = df['n_polygone']
|
|
del df['n_polygone']
|
|
elif 'id' in df.columns and 'polygon' in df.columns:
|
|
df['id'] = df['polygon']
|
|
del df['polygon']
|
|
elif 'id' in df.columns and 'ident' in df.columns:
|
|
df['id'] = df['ident']
|
|
del df['ident']
|
|
df = normalize_colname(df)
|
|
if 'remarques' in df.columns and 'legende' not in df.columns:
|
|
df['legende'] = df.remarques
|
|
# df.rename(columns={'id':'id_site'}, inplace=True)
|
|
|
|
if table == '"PS_VERCORS_CEN38_2011"':
|
|
df = df[df.date != '?'].copy()
|
|
|
|
# df.legende = format_legende(df.legende)
|
|
df = format_date(df)
|
|
|
|
if table == '"PS_CHAMBARAN_CEN38_2013"':
|
|
df.type_patur = normalize_paturage(df.type_patur)
|
|
df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True)
|
|
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \
|
|
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \
|
|
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur']
|
|
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \
|
|
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \
|
|
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur']
|
|
df['pratiques'] = df.p1
|
|
df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2
|
|
df.drop(columns=['type_patur','p1','p2'], inplace=True)
|
|
|
|
df['table_org'] = table[1:-1]
|
|
tutu = pd.concat([tutu,df])
|
|
|
|
if from_file is not None:
|
|
table = from_file.split('/')[-1]#.split('.')[0]
|
|
df = gpd.read_file(path0+from_file)
|
|
df = normalize_colname(df)
|
|
df = format_date(df)
|
|
df['table_org'] = table
|
|
df['structure'] = 'CEN Isère'
|
|
# df['type_pat'].replace(['Indéterminé','/'],None,inplace=True)
|
|
# df.loc[df.type_pat.notna(),'pratiques'] = \
|
|
# df[df.type_pat.notna()].pratiques + ' ' + df[df.type_pat.notna()].type_pat.str.lower()
|
|
tutu = pd.concat([tutu,df])
|
|
|
|
|
|
|
|
c = ['ident','id_origine','id_site','geom','auteur','hab_1','code_hab1','n_hab1',
|
|
'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3',
|
|
'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6',
|
|
'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8',
|
|
'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10',
|
|
'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12',
|
|
'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14',
|
|
'hab_4','code_hab4','n_hab4','pratiques','niv_embrous',
|
|
'recouvmnt','remarques','phyto','statut','h_remarq','legende',
|
|
'leg_carto','date','annee','%_embrous','%_recouvmnt','surf_emb',
|
|
'source','structure',
|
|
# 'org','dept','num',
|
|
'table_org'
|
|
]
|
|
if 'ident' in df.columns and 'id_origine' not in df.columns:
|
|
tutu.rename(columns={'ident':'id_origine'}, inplace=True)
|
|
cols = tutu.columns[tutu.columns.isin(c)]
|
|
df = tutu[cols].copy()
|
|
df = df.set_geometry('geom',crs=2154)
|
|
# df.code_hab3.replace({
|
|
# '43.171':'41.171',
|
|
# '31.4B':'31.4'
|
|
# }, inplace=True)
|
|
|
|
df = pycen.tools.Polygons_to_MultiPolygon(df)
|
|
df_new, df_replace, df_maj = define_siteName(df,rcvmt=10)
|
|
|
|
df_new_tmp = df_new.copy()
|
|
df_new = df_new_tmp[df_new_tmp.id_site.notna()].copy()
|
|
|
|
if not df_new.empty:
|
|
df_new = df_new[~df_new.id_site.isna()].copy()
|
|
df_new.sort_index(inplace=True)
|
|
df_new['auteur'] = normalize_auteur(df_new.auteur)
|
|
df_new['id_origine'] = df_new['id_origine'].astype(str)
|
|
df_new = filter_saisierror(df_new)
|
|
if not df_maj.empty:
|
|
df_maj['auteur'] = normalize_auteur(df_maj.auteur)
|
|
id_col = 'id_origine' if 'id_origine' in df_maj.columns else 'id_site'
|
|
df_maj['id_origine'] = df_maj[id_col].astype(str)
|
|
df_maj = filter_saisierror(df_maj)
|
|
if not df_replace['df'].empty:
|
|
df_replace['df']['auteur'] = normalize_auteur(df_replace['df']['auteur'])
|
|
df_replace['df']['id_origine'] = df_replace['df']['id_origine'].astype(str)
|
|
df_replace['df'] = filter_saisierror(df_replace['df'])
|
|
|
|
# for d in df.dtypes[df.dtypes == object].index:
|
|
# # df[d].replace([' '], [' '],regex=True,inplace=True)
|
|
# df[d] = df[d].str.normalize('NFKC')
|
|
|
|
DF = df_maj.copy()
|
|
Cnhab = DF.columns[DF.columns.str.startswith('n_hab')]
|
|
if not all(DF[[*Cnhab]].astype(float).sum(axis=1) == 100):
|
|
print( DF[ ~(DF[[*Cnhab]].astype(float).sum(axis=1) == 100) ] )
|
|
# DF.loc[ (~(DF[[*Cnhab]].sum(axis=1) == 100)),'n_hab1' ] = 20
|
|
|
|
|
|
if not df_new.empty:
|
|
new_site(df_new)
|
|
if not df_maj.empty:
|
|
maj_site(df_maj)
|
|
if not df_replace['df'].empty:
|
|
replace_site(
|
|
df = df_replace['df'],
|
|
cor = df_replace['cor']
|
|
)
|
|
|
|
|
|
exit()
|
|
################
|
|
## END SCRIPT ##
|
|
################
|
|
|
|
print(df)
|
|
while True:
|
|
res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n')
|
|
if not res[0].upper() in ['Y','N']:
|
|
continue
|
|
else: break
|
|
if res[0].upper() == 'Y':
|
|
df_ins, ins = insert_site(df)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
|
|
|
|
df_ins, ins = insert_geom(df)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur')
|
|
|
|
df_ins, ins = insert_attrsPS(df)
|
|
if ins:
|
|
insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur')
|
|
|
|
df_ins, ins = insert_habPS(df)
|
|
if ins:
|
|
if df_ins.shape[0] > 4000:
|
|
df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy()
|
|
df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy()
|
|
insert_cbPS(df_ins1)
|
|
insert_cbPS(df_ins2)
|
|
else:
|
|
insert_cbPS(df_ins)
|
|
insert_authordata(
|
|
df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat',
|
|
colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur')
|
|
|
|
# df_ins, ins = insert_legendCartoPS(df)%_embrous
|
|
# if ins:
|
|
# insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur')
|
|
|
|
|
|
sql = """
|
|
DELETE FROM sites.sites
|
|
WHERE id in (SELECT id_site FROM sites.r_sites_geom where id_lot = 20);
|
|
"""
|
|
with pycen.con.begin() as cnx:
|
|
cnx.execute(sql)
|
|
|