1481 lines
49 KiB
Python

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
#Nom : : recup_ps.py
#Description :
#Copyright : 2021, CEN38
#Auteur : Colas Geier
#Version : 1.0
# import re
from os import register_at_fork
import types
from numpy.lib.shape_base import expand_dims
import pandas as pd
import geopandas as gpd
from pandas.io.pytables import incompatibility_doc
from shapely import wkb
import numpy as np
from sqlalchemy.sql.expression import column
# from pycen import bdd
import pycen
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from geoalchemy2 import Geometry
isin_bdd = True
lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"',
'"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"',
'"PS_VERCORS_CEN38_2011"'
]
# Parametres bdd IN
user = 'cen_admin'
pwd = '#CEN38@venir'
adr = '192.168.0.189'
base = 'bd-cen-38'
schema = 'pelouse_seche'
schema = 'habitat'
url = URL.create('postgresql+psycopg2',
username=user,
password=pwd,
host=adr,
database=base,
)
con = create_engine(url)
table = '"cr_VERCORS_habitats_CBNA_1999-2007"'
# table = '"PS_CHAMBARAN_CEN38_2013"'
# bd = bdd.CEN(
# user = user,
# pwd = pwd,
# adr = adr,
# base = base
# # schema = schema
# )
# Parametres bdd OUT
user_ps = 'colas_g'
pwd_ps = 'adm1n*38*'
adr_ps = '192.168.60.10'
base_ps = 'bd_cen'
url_ps = URL.create('postgresql+psycopg2',
username=user_ps,
password=pwd_ps,
host=adr_ps,
database=base_ps,
)
con_ps = create_engine(url_ps)
con_ps = pycen.con
def get_pers():
sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1'
' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme')
df_pers = pd.read_sql(
sql = sql,
con = con_ps)
df_pers.replace([None],'',inplace=True)
df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom
df_pers['auteur'] = df_pers['auteur'].str.strip()
df_pers['organisme'] = df_pers['organisme'].str.strip()
return pycen.pers.get_auteur()
# def get_idgeomsite(lst_site):
# '''
# :lst_site: tuple.
# '''
# sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 '
# 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id '
# 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site)
# df = pd.read_sql(
# sql = sql,
# con = con_ps)
# return df
def format_legende(leg):
dic = {
}
df = pd.DataFrame({'leg':leg})
df.replace(
['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'],
['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'],
regex=True, inplace=True)
return df
def format_date(df):
df = df.copy()
if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"',
'"PS_CHAMBARAN_CEN38_2013"']:
df.date = pd.to_datetime(df.date,format='%d/%m/%Y')
if table == '"PS_BELLEDONNE_CEN38_2014"':
df.date = pd.to_datetime(df.date,format='%d%b%Y')
if table == '"PS_BIEVRE_CEN38_2016"':
df.date = df.date.astype(str).replace('\.','',regex=True)
df.date = df.date.str.rsplit(' ',1).str[0]\
.replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True)
df.date = pd.to_datetime(df.date,format='%d %b %Y')
if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"':
# df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int)
df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str)
df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date'])
if 'annee' in df.columns and 'date' not in df.columns:
# Si pas de date mais année précisée
df['annee'] = df['annee'].astype(int)
df['date'] = df['annee'].astype(str) + '-06-01'
df.date = pd.to_datetime(df.date,format='%Y-%m-%d')
elif any(df.date.astype(str).str.len() <= 4):
# Si dates non-homogènes, cohexistance date&année
d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique()
if None in d:
df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01'
else:
df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01'
df.date = pd.to_datetime(df.date,format='%Y-%m-%d')
else:
df.date = pd.to_datetime(df.date)
return df
def normalize_auteur(lst_auteur):
'''
lst_auteur : pd.Series
'''
dic = {
'Lise Duconte':'DUCONTE Lise',
'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie',
'CHABERT Chloé (AVENIR)':'CHABERT Chloé',
'CHABERT Chloé (AVNEIR)':'CHABERT Chloé',
'PACHE Gilles (CBNA)':'PACHE Gilles',
'Gilles PACHE':'PACHE Gilles',
'JAMEAU Laura (AVENIR)':'JAMEAU Laura',
'MARCIAU Roger (AVENIR)':'MARCIAU Roger',
'Roger MARCIAU':'MARCIAU Roger',
'Aude Massa':'MASSA Aude',
'Aude MASSA':'MASSA Aude',
'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse',
'Romain BARTHELD':'BARTHELD Romain',
'VILLARET J.C.':'VILLARET Jean-charles',
'Jean-Charles VILLARET':'VILLARET Jean-charles',
'Fabien ANTHELME':'ANTHELME Fabien',
'Jean-Christophe GATTUS':'GATTUS Jean-christophe',
'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis',
'Jonathan MALINEAU':'MALINEAU Jonathan',
'Aurélien DAUTREY':'DAUTREY Aurélien',
'Etienne MARY':'MARY Etienne',
'Alix GUEDOU': 'GUEDOU Alix',
}
lst_aut = lst_auteur.copy()
lst_aut.replace(' & ',',',regex=True,inplace=True)
if any(lst_aut.str.contains(',')):
lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1)
lst_aut.replace(dic,inplace=True)
df_aut = lst_aut.str.rsplit(' ',n=1,expand=True)
df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower()
df_aut[0] = df_aut[0].str.upper()
df_aut[2] = df_aut[0]
df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1]
lst_aut = df_aut[2].copy()
aut = lst_aut.unique()
res = [x for x in aut if x not in [*get_pers().nom_prenom]]
if res:
print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res)
new = new_auteur(res)
lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True)
lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x))
return lst_aut
def new_auteur(new):
'''
new : list
'''
df = pd.DataFrame({'auteur': new})
dic = {'aut':{}, 'org':[]}
# Pour chaque auteur
for pers in df.auteur:
print(pers)
while True:
newaut = input("Insérer un nouvel auteur ? (Y/N)\n")
if not newaut[0].upper() in ['Y','N']:
continue
else: break
# Si : pas d'insertion d'un nouvel auteur
if newaut[0].upper() == 'N':
while True:
rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n")
if not rep[0].upper() in ['Y','N']:
continue
else: break
if rep[0].upper() == 'Y':
while True:
org = pycen.pers.get_organisme().sort_index().to_dict()['nom']
reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org))
if not reporg in [*org.keys()]:
continue
else: break
while True:
aut = get_pers().sort_values('id')
aut = aut[aut.id_organisme == reporg]
aut = aut.nom_prenom.to_dict()
repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut))
if not repaut in [*aut.keys()]:
continue
else: break
dic['aut'] = {**dic['aut'],pers:aut[repaut]}
dic['org'] = [*dic['org'],org[reporg]]
# Autrement : insertion d'un nouvel auteur
else:
while True:
print("Auteur : %s"%pers)
rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n")
if not rep[0].upper() in ['Y','N']:
continue
elif rep[0].upper() == 'Y':
repaut = input("Saisisser un nouveau nom :\n")
dic['aut'] = {**dic['aut'],pers:repaut}
pers = repaut
break
else:
dic['aut'] = {**dic['aut'],pers:pers}
break
while True:
org = pycen.pers.get_organisme().sort_index().to_dict()['nom']
org = {-1: 'Autre', **org}
reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org))
if not reporg in [*org.keys()]:
continue
elif reporg == -1:
reporg = input("Saisisser un nouvel organisme :\n")
dic['org'] = [*dic['org'],reporg]
insert_newOrganisme(reporg)
org = pycen.pers.get_organisme()
idorg = org[org.nom==reporg].index[0]
insert_newAuteur(pers,idorg)
break
else:
dic['org'] = [*dic['org'],org[reporg]]
insert_newAuteur(pers,reporg)
break
return dic
def insert_newAuteur(aut,idorg):
aut = pd.Series(aut)
idorg = pd.Series(idorg)
df = pd.DataFrame({'aut':aut,'id_organisme':idorg})
# print(df)
tmp = df.aut.str.rsplit(' ',1,expand=True)
if tmp.shape[1] == 1:
df[['nom']] = tmp
elif tmp.shape[1] == 2:
df[['nom', 'prenom']] = tmp
else:
raise ValueError('NOM Prénom est de longueur inconnu :\ntmp')
del df['aut']
try:
df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema,
if_exists='append', index=False)
print("Nouvel auteur inséré : '%s'"%aut)
except:
print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut)
def insert_newOrganisme(nom):
nom = pd.Series(nom)
df = pd.DataFrame({'nom':nom})
try:
df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema,
if_exists='append', index=False)
print("Nouvel organisme inséré : '%s'"%nom)
except:
print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom)
def normalize_colname(df):
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace('__','_',regex=True)
return df.rename(columns={
'n_polygone':'ident',
'id_site':'ident',
'ident_':'ident',
'id':'ident',
'idfinal':'id_origine',
'date_':'date',
'obs':'auteur',
's_p_brous' :'%_embrous',
's_p_brouss':'%_embrous',
'niv__embro':'niv_embrous',
'niv_embro' :'niv_embrous',
'niv_emb' :'niv_embrous',
'recouvreme':'recouvmnt',
'recouvr':'recouvmnt',
'recouv' :'recouvmnt',
'recouvr_' :'recouvmnt',
'remarque' :'remarques',
'remarq_' :'remarques',
'legendes' :'legende',
'legend' :'legende',
'sources' :'source',
'surf' :'surface',
'geometry' :'geom',
})
def get_id_auteur(lst_autor):
sch = 'personnes'
tab = 'personne'
sql = """
WITH tmp as
(SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur FROM {sch}.{tab})
SELECT * FROM tmp WHERE auteur IN %(lst_autor)s""".format(sch=sch,tab=tab)
df = pd.read_sql(
sql = sql,
con = pycen.con,
params = {'lst_autor': tuple(lst_autor)})
return df
def existing_ps(df):
gdf = pycen.ps.get_sitesGeom()
gdf['area'] = gdf.area
ovl = gpd.overlay(df,gdf)
ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference'
ovl['ovl_area'] = ovl.area
ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area']
return df
def define_siteOrg(df):
gdf = pycen.get_districtNat()[['abrev','geom']]
tmp = df[['geom']].copy().set_geometry('geom')
tmp = gpd.overlay(tmp,gdf, how='intersection')
tmp['area'] = tmp.area
tmp.reset_index(0, inplace=True)
tmp.set_index('area',inplace=True)
tmp.sort_index(inplace=True)
tmp.drop_duplicates(subset=['index'],keep='last',inplace=True)
tmp.reset_index(drop=True,inplace=True)
del tmp['geometry']
df = df.merge(tmp,how='left',left_index=True,right_on='index')
del df['index']
df.rename(columns={'abrev':'org'}, inplace=True)
return df
# tmp[tmp.id_site==334]
# 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by'
def define_siteName(df):
df = define_siteOrg(df.copy())
df['dept'] = '38'
# df['num'] = None
if 'org' in df.columns:
for org in df.org.unique():
tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu)
# tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0])
tmp = tmp[tmp.org == org]
if not tmp.empty:
num = tmp.num.astype(int).max()
seq = range(num+1, num + df.loc[df.org==org].shape[0]+1)
df.loc[df.org==org,['num']] = list(seq)
else:
seq = range(1, df.loc[df.org==org].shape[0]+1)
df.loc[df.org==org,['num']] = list(seq)
else:
tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu)
df.loc[~df.num.isna(),'num'] = df[~df.num.isna()].num.astype(int).astype(str)
df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num']
df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num']
df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num']
df['id_site'] = df['dept'] + df['org'] + df['num']
return df
def normalize_formatIdsite(id_site):
import re
df = pd.DataFrame({'id_site':id_site})
df['dept'] = '38'
df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]]
del df['other']
df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num']
df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num']
df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num']
df['id_site'] = df['dept'] + df['org'] + df['num']
return df.id_site
def normalize_paturage(pat):
return pat.str.lower().replace(
['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'],
['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True)
def normalize_pratiques(prat):
dic = {
'pâturage equin':'pâturage équin',
'pâturage bovin':'pâturage bovin',
'paturâge cervidé':'pâturage cervidé',
'broyer': 'broyage',
'truffier': 'broyage',
'plantation': 'broyage',
'culture' : 'labourage',
'abandon recent': 'abandon',
'abandon verger': 'abandon',
'remblais et feu' : 'incertain',
'remblais, feu' : 'incertain',
'caprin': 'pâturage caprin',
'ovin': 'pâturage ovin',
'équin': 'pâturage équin',
'equin': 'pâturage équin',
'bovin': 'pâturage bovin',
'paturage': 'pâturage',
'paturâge': 'pâturage',
'f' : 'fauche',
'p' : 'pâturage',
'0' : 'N.D',
'i' : 'incertain',
'a' : 'abandon',
'pv': 'pâturage',
'b' : 'broyage',
'pc': 'pâturage caprin',
'po': 'pâturage ovin',
'pe': 'pâturage équin',
'pb': 'pâturage bovin',
'-' : 'N.D',
'0' : 'N.D',
'ND' : 'N.D',
'n.d' : 'N.D',
'chemin carrossable non goudronné' : 'N.D',
'plantation de châtaigniers' : 'broyage',
}
p = prat.replace(['.et','/'],[',',', '],regex=True).copy()
p = p.str.lower()
p = normalize_paturage(p)
p = p.replace(dic)
p.fillna('N.D', inplace=True)
p[p.str.contains('\?| ou ',na=False)] = 'incertain'
pp = p.str.split(',',expand=True)
pp.columns = 'p' + pp.columns.astype(str)
for c in pp.columns:
pp[c] = pp[c].str.strip()
pp[c] = pp[c].replace(dic)
return pp
def select_ID(df,sch,tab):
df = df.copy()
sql = 'SELECT * FROM %s.%s'%(sch,tab)
# df = df.copy().dropna(axis=1,how='all')
df.replace("'","''",regex=True,inplace=True)
if 'geom' in df.columns:
del df['geom']
if 'date' in df.columns:
df['date'] = df['date'].astype(str)
elif 'date_deb' in df.columns:
df['date_deb'] = df['date_deb'].astype(str)
if 'id_cb' in df.columns:
df['id_cb'] = df['id_cb'].astype(str)
column = '(%s)'%','.join(df.columns)
df_tuple = tuple(df.itertuples(index=False, name=None))
sql = sql + ' WHERE %s IN %s'%(column,df_tuple)
if 'nan' in sql: sql = sql.replace('nan','NULL')
if ',)' in sql: sql = sql.replace(',)',')')
if '"' in sql: sql = sql.replace('"',"'")
# print(sql)
return pd.read_sql(
sql=sql,
con=pycen.con,)
def insertAutorAttrs(df,colname_rsite, sch, tab):
df = df.copy()
df = df[['id', 'auteur']].copy()
if any(df.auteur.str.contains(',')):
tmp = df.auteur.str.split(',',expand=True).stack().droplevel(-1)
tmp = pd.DataFrame({'auteur':tmp})
df = pd.merge(df[['id']],tmp,how='left',left_index=True,right_index=True)
df_autor = get_id_auteur([*df.auteur.unique()])
df.replace([*df_autor.auteur],[*df_autor.id], inplace=True)
df.columns = [colname_rsite, 'id_auteur']
try :
df.to_sql(
name=tab,
con=pycen.con,
schema=sch,
index=False,
if_exists='append',
method='multi'
)
print('Insert author sites data OK !')
return 'Insert author sites data OK !'
except Exception as e:
print('ERROR : News author insert impossible !')
print(e)
def _altertype(df1,df2):
same_col = df1.columns[df1.columns.isin(df2.columns)]
if 'date' in df2.columns:
df2['date'] = df2['date'].astype(str)
if 'geom' in same_col:
same_col = same_col.drop('geom')
for col in same_col:
if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int:
df1[col] = df1[col].astype(int)
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float:
df1[col] = df1[col].astype(float)
elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str:
df1[col] = df1[col].astype(str)
return [df1,df2]
def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur):
df_ins = df_ins.copy()
if 'date' in df_ins.columns:
df_ins['date'] = df_ins['date'].astype(str)
elif 'date_deb' in df_ins.columns:
df_ins['date_deb'] = df_ins['date_deb'].astype(str)
tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema)
tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema)
if tab_data in tab_sit:
sch = pycen.sites.schema
elif tab_data in tab_ps:
sch = pycen.ps.schema
# ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data)
ids = select_ID(df_ins[[id_data]],sch,tab_data)
if 'date' in ids.columns:
ids['date'] = ids['date'].astype(str)
same_col = df_ins.columns[df_ins.columns.isin(ids.columns)]
if 'geom' in same_col:
same_col = same_col.drop('geom')
df_ins,ids = _altertype(df_ins,ids)
df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=[*same_col], how='left')
insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur)
def insert_site(df):
df = df.copy()
if 'nom' not in df.columns:
df['nom'] = 'Inconnu'
if 'type_site' not in df.columns:
df['type_site'] = 'N.D.'
if 'type_milieu' not in df.columns:
df['type_milieu'] = 'Pelouses sèches'
# if 'remarques' not in df.columns:
# df['remarques'] = None
df = df[['id_site','nom','date','auteur','type_site','type_milieu',
# 'remarques'
]]
typs = pycen.sites._get_typ_site()
typm = pycen.sites._get_typ_milieux()
for t in df.type_milieu.unique():
df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0]
for s in df.type_site.unique():
df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0]
df['id_type_milieu'] = df['id_type_milieu'].astype(int)
# df['id_type_site'] = df['id_type_site'].astype(str)
df['dept'] = df.id_site.str[:2]
df['org'] = df.id_site.str[2:-4]
df['num'] = df.id_site.str[-4:]
df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True)
columns = [col['name'] for col in pycen.sites.columns_sitetab] + ['auteur']
c = df.columns[df.columns.isin(columns)]
df = df[c]
print(df)
try:
df[df.columns.drop('auteur')].to_sql(
name = 'sites',
con = pycen.con,
schema = pycen.sites.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news sites OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news sites impossible !')
return [df,False]
def check_org_lot(lot):
"""
:lot: list. liste des lots
return
-------
DataFrame des id_lot de la liste lot.
"""
tab_lot = pycen.sites._get_lots()
not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ]
if not_intab:
df = pd.DataFrame({'libelle':not_intab})
df.to_sql(
name='lots',
con=con_ps,
schema='sites',
index=False,
if_exists='append',
method='multi',
)
tab_lot = pycen.sites._get_lots()
tab_lot = tab_lot[tab_lot.libelle.isin(lot)]
return tab_lot[['id_lot','libelle']]
def insert_geom(df):
dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}
c = ['id_site','geom','auteur','date','remarques','legende','table_org','id_origine']
cc = df.columns[df.columns.isin(c)]
df = df[cc].copy()
df.set_geometry('geom', inplace=True)
colindict = [ *df.columns[df.columns.isin(dic.keys())] ]
if colindict:
df.rename(
columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'},
inplace=True,
)
if 'table_org' in df.columns:
tab = [*df.table_org.unique()]
lots = check_org_lot(tab)
df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left')
del df['table_org']
del df['libelle']
try:
df[df.columns.drop('auteur')].to_postgis(
name = 'r_sites_geom',
con = pycen.con,
schema = pycen.sites.schema,
if_exists = 'append',
index = False,
geom_col = 'geom'
)
print('Insert news geom OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news geom impossible !')
return [df,False]
def get_idGeomSite(lst_site):
'''
:lst_site: list or pd.Series .
'''
id_geom = pycen.ps.get_sitesGeom(
id_site=[*lst_site])
if not id_geom.empty:
id_geom = id_geom[['id','id_site']]
id_geom.columns = ['id_geom_site','id_site']
return id_geom
def insert_attrsPS(df):
c = ['id_site','auteur','date','%_embrous','niv_embrous','recouvmnt','pratiques','statut']
cc = df.columns[df.columns.isin(c)]
df = df[cc].copy()
df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns')
del df['pratiques']
for col in ['%_embrous','niv_embrous','recouvmnt']:
df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True)
df.niv_embrous.replace(['nan'],['N.D'],inplace=True)
df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True)
df.statut.replace(['ND'],['N.D'],inplace=True)
# for t in df.dtypes[df.dtypes == int].index:
# df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str)
if '%_embrous' in df.columns:
taux = True
df.niv_embrous = df.niv_embrous + ';' + df['%_embrous']
del df['%_embrous']
else : taux = False
df.set_index(['id_site','auteur','date'], inplace=True)
df = df.stack().reset_index(-1)
df.columns = ['type','param']
df.loc[df.type.str.len() < 4,'type'] = 'pratique'
if taux:
df[['param','taux']] = df.param.str.split(';',expand=True)
df.taux.replace(['nan'],[None],inplace=True)
df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True)
df.param.replace({'ND':'N.D'}, inplace=True)
df.reset_index(inplace=True,drop=False)
param = pycen.ps.get_param()
typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param')
for t in df.type.unique():
tmp = param[param.type == t]
df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id])
df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True)
df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left')
del df['id_site']
del df['type']
df.rename(columns={'param':'id_param'}, inplace=True)
try:
df[df.columns.drop('auteur')].to_sql(
name = 'r_site_param',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news attrs OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news attrs impossible !')
return [df,False]
def split_codehab(lst_codehab):
lst = lst_codehab.copy()
lst = lst.str.split('x|,|&|/',expand=True) \
.stack().str.strip() \
.droplevel(-1).reset_index(drop=False)
return lst
def format_codehab2insert(lst_codehab):
lst = lst_codehab.copy()
lst.replace('x|,','&',regex=True, inplace=True)
lst.replace('&','&;',regex=True, inplace=True)
lst.replace('/','/;',regex=True, inplace=True)
lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1)
lst.columns = ['ordre', 'code_hab']
lst.ordre = lst.ordre + 1
lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&'
lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/'
lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip()
suspect = lst.code_hab.str.split('(',expand=True)
if suspect.shape[1] > 1:
raise Exception('habitat suspecté en développement')
else:
suspect.columns = ['code_hab']
del suspect['code_hab']
lst = pd.merge(lst,suspect,right_index=True,left_index=True)
return lst
def insert_cbPS(df):
df = df.copy()
df.date = df.date.astype(str)
df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip()
ids = select_ID(df[df.columns.drop(['code_hab','auteur'])],sch=pycen.ps.schema,tab='r_site_habitat')
same_col = df.columns[df.columns.isin(ids.columns)]
df,ids = _altertype(df,ids)
df = df.merge(ids, on=[*same_col], how='left')
df = df[['id', 'code_hab']].copy()
df.set_index('id',inplace=True)
# df = df.code_hab.str.split('x|,|&',expand=True) \
# .stack().str.strip() \
# .droplevel(-1).reset_index(drop=False)
# df = split_codehab(df.code_hab)
# df.columns = ['id_sitehab', 'code_hab']
df = format_codehab2insert(df.code_hab)
try:
df.to_sql(
name = 'r_hab_cb',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news codes habitats OK !')
except Exception as e:
print(e)
print('PS : Insert news codes habitats impossible !')
def insert_habPS(df):
import re
c = ['id_site','auteur','date']
cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ]
df = df[cc].copy()
cc = df.columns[(~df.columns.str.contains('hab')) ]
df.set_index([*cc], inplace=True)
# df.set_index(['id_site','auteur','date'], inplace=True)
# for t in df.dtypes[df.dtypes == int].index:
# df[t] = df[t].astype(str)
df = df.stack().reset_index(-1)
df.columns = ['param','value']
df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]]
del df['other']
df['index'] = df['index'].astype(int)
df['value'] = df['value'].replace(['-'],[None])
df.dropna(subset=['value'], inplace=True)
df = df.set_index(['index','param'], append=True).unstack()
df = df.droplevel(0,axis=1)
df.code_hab.replace(['0'],[None], inplace=True)
df.dropna(subset=['code_hab'],inplace=True)
df['n_hab'] = df['n_hab'].astype(int)
df.reset_index(inplace=True,drop=False)
df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left')
del df['id_site']
try:
df[df.columns.drop(['auteur','code_hab'])].to_sql(
name = 'r_site_habitat',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news attrs OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news attrs impossible !')
return [df,False]
def insert_legendCartoPS(df):
c = ['id_site','auteur','date','leg_carto'] #,'legende']
cc = df.columns[df.columns.isin(c)]
df = df[cc].copy()
param = pycen.ps.get_listLegendePS()
df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id])
df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left')
del df['id_site']
del df['leg_carto']
try:
df[df.columns.drop('auteur')].to_sql(
name = 'r_site_legcarto',
con = pycen.con,
schema = pycen.ps.schema,
if_exists = 'append',
index = False,
method = 'multi'
)
print('Insert news legendes OK !')
return [df,True]
except Exception as e:
print(e)
print('PS : Insert news legendes impossible !')
return [df,False]
def filter_saisierror(df):
import re
tmp = df.copy()
lst_iderror = []
del tmp['geom']
cc = tmp.columns[(~tmp.columns.str.contains('hab')) ]
tmp.set_index([*cc], inplace=True)
tmp = tmp.stack().reset_index(-1)
tmp.columns = ['param','value']
tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]]
del tmp['other']
tmp['index'] = tmp['index'].astype(int)
tmp['value'] = tmp['value'].replace(['-'],[None])
tmp.dropna(subset=['value'], inplace=True)
tmp = tmp.set_index(['index','param'], append=True).unstack()
tmp = tmp.droplevel(0,axis=1)
tmp.reset_index(inplace=True,drop=False)
lst_error1 = []
if '_hab' in tmp.columns:
tmp.hab_.replace(
['Xerobromenion erecti',
'Mesobromion',
'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',],
['Xerobromion erecti',
'Mesobromion erecti',
'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',],
inplace=True)
for hab in tmp.hab_.sort_values().unique():
print(hab)
print(tmp[tmp.hab_ == hab].code_hab.unique())
for cod in tmp[tmp.hab_ == hab].code_hab.unique():
perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count()
if perc < 0.6:
if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']:
continue
print('\t'+cod + ' : '+ perc.astype(str))
lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()]
df[df.id_site.isin(lst_error1)].to_postgis(
name='incoherence_habVScodehab',
con=con,
schema=schema,
if_exists='replace',
index=False,
geom_col='geom',
index_label='id_site',
)
cb = pycen.ref_hab().get_CB()
tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip()
tmp.code_hab = tmp.code_hab.replace(['0'],[None])
tmp.dropna(subset=['code_hab'], inplace=True)
tmpp = tmp[['id_site','code_hab']].copy()
tmpp.set_index('id_site', inplace=True)
# if any(tmpp.code_hab.str.contains('x|,|&')):
# tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1)
tmpp = split_codehab(tmpp.code_hab)
tmpp.columns = ['id_site', 'code_hab']
tmpp = tmpp[~tmpp.code_hab.isin(cb.id)]
lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()]
lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()]
df[df.id_site.isin(lst_error2)].to_postgis(
name='PB_codehabCBNA_nonPresent_dans_corineBiotope',
con=con,
schema=schema,
if_exists='append',
index=False,
geom_col='geom',
index_label='id_site',
)
lst_iderror = [*lst_error1,*lst_error2]
lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index
df.drop(lst_iderrorindex,inplace=True)
return df.sort_values('id_site')
if __name__ == "__main__":
tutu = pd.DataFrame()
for table in lst_tab:
sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table)
df = gpd.read_postgis(
sql = sql,
con = con
)
if 'id' in df.columns and 'n_polygone' in df.columns:
df['id'] = df['n_polygone']
del df['n_polygone']
elif 'id' in df.columns and 'polygon' in df.columns:
df['id'] = df['polygon']
del df['polygon']
elif 'id' in df.columns and 'ident' in df.columns:
df['id'] = df['ident']
del df['ident']
df = normalize_colname(df)
if 'remarques' in df.columns and 'legende' not in df.columns:
df['legende'] = df.remarques
# df.rename(columns={'id':'id_site'}, inplace=True)
if table == '"PS_VERCORS_CEN38_2011"':
df = df[df.date != '?'].copy()
df = format_date(df)
# df.legende = format_legende(df.legende)
if table == '"PS_CHAMBARAN_CEN38_2013"':
df.type_patur = normalize_paturage(df.type_patur)
df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True)
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \
df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur']
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \
df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur']
df['pratiques'] = df.p1
df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2
df.drop(columns=['type_patur','p1','p2'], inplace=True)
df['table_org'] = table[1:-1]
# df['table_org'] = table
tutu = pd.concat([tutu,df])
c = ['ident','id_origine','geom','auteur','hab_1','code_hab1','n_hab1',
'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3',
'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6',
'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8',
'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10',
'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12',
'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14',
'hab_4','code_hab4','n_hab4','pratiques','niv_embrous',
'recouvmnt','remarques','phyto','statut','h_remarq','legende',
'leg_carto','date','annee','source','%_embrous','surf_emb',
# 'org','dept','num',
'table_org'
]
cols = tutu.columns[tutu.columns.isin(c)]
df = tutu[cols].copy()
df = define_siteName(df)
df = df[~df.id_site.isna()].copy()
df.sort_index(inplace=True)
df['auteur'] = normalize_auteur(df.auteur) # ERROR : Longueur de sortie !=
df = filter_saisierror(df)
# for d in df.dtypes[df.dtypes == object].index:
# # df[d].replace([' '], [' '],regex=True,inplace=True)
# df[d] = df[d].str.normalize('NFKC')
DF = df.copy()
# df = define_siteName(df)
# tmp = df.copy()
print(df)
while True:
res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n')
if not res[0].upper() in ['Y','N']:
continue
else: break
if res[0].upper() == 'Y':
df_ins, ins = insert_site(df)
if ins:
insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur')
df_ins, ins = insert_geom(df)
if ins:
insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur')
df_ins, ins = insert_attrsPS(df)
if ins:
insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur')
df_ins, ins = insert_habPS(df)
if ins:
if df_ins.shape[0] > 4000:
df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy()
df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy()
insert_cbPS(df_ins1)
insert_cbPS(df_ins2)
else:
insert_cbPS(df_ins)
insert_authordata(
df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat',
colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur')
# df_ins, ins = insert_legendCartoPS(df)
# if ins:
# insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur')
table = ''
df = df2.copy()
col_hab = df.columns[df.columns.str.contains('hab')]
c = ['ident','id_origine','geom','auteur',*col_hab,
# 'hab_1','code_hab1','n_hab1',
# 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3',
# 'hab_4','code_hab4','n_hab4',
'pratiques','niv_embrous',
'recouvmnt','remarques','phyto','statut','h_remarq','legende',
'leg_carto','date','annee','source','%_embrous','surf_emb',
'table_org'
]
df.reset_index(drop=False,inplace=True)
df = normalize_colname(df)
df = format_date(df)
# df.legende = format_legende(df.legende)
cols = df.columns[df.columns.isin(c)]
df = df[cols].copy()
df = define_siteName(df)
df['auteur'] = normalize_auteur(df.auteur)
df.set_index('id_site', inplace=True)
df.dropna(axis=1, how='all', inplace=True)
col_hab = df.columns[df.columns.str.startswith('code_hab')]
rm = pd.DataFrame()
for col in col_hab:
tmp = df[df[col].str.contains('/|\(',na=False)]
if not tmp.empty:
rm = pd.concat([rm,tmp])
rm.to_postgis(
'CBNA_codehab_error',
con,
schema='pelouse_seche',
if_exists='replace', index=True, geom_col='geom'
)
df.drop(tmp.index,inplace=True)
# filter_saisierror(df)
# if 'id_site' not in df.columns:
# df.rename(columns={
# 'id':'id_site',},
# inplace=True)
# df.sort_values('id_site', inplace=True)
# df_site = df[['id_site', 'auteur', 'commune','lieu_dit', 'date']].copy()
# if 'sources' in df.columns:
# df_site = df[['id_site', 'auteur', 'sources', 'commune','lieu_dit', 'date']].copy()
# df_autre = df[['id_site', 'h_remarq', 'remarques']].copy()
# # sql = ('SELECT t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1'
# # ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id').format(sch='personnes', tab1='personne', tab2='organisme')
# # df_pers = pd.read_sql(
# # sql = sql,
# # con = con_ps
# # )
# df_pers = get_pers()
# tmp = df_site[['auteur']].copy()
# if 'sources' in df_site.columns:
# tmp = df_site[['auteur', 'sources']].copy()
# tmp[['prenom', 'nom']] = df_site['auteur'].str.split(' ', expand=True)
# tmp.drop_duplicates(inplace=True)
# pers_add = tmp.loc[~tmp.nom.isin(df_pers.nom),['prenom', 'nom']]
# if 'sources' in df_site.columns:
# pers_add = tmp.loc[~tmp.nom.isin(df_pers.nom),['prenom', 'nom', 'sources']]
# if not pers_add.empty:
# pers_add.replace(
# ['CEN Isere AVENIR'],
# [1],
# inplace=True
# )
# pers_add.rename(columns={'sources':'id_organisme'}, inplace=True)
# pers_add['id'] = max(df_pers.index)+1
# pers_add.set_index('id', inplace=True)
# pers_add.to_sql(
# name='personne',
# con=con_ps,
# schema='personnes',
# if_exists='append',
# index=True,
# index_label='id'
# )
# df_pers = get_pers()
# # SITE
# df_site['nom'] = 'Inconnu'
# df_site['date_deb'] = pd.to_datetime(df_site.date)
# df_site['id_type_milieu'] = 2
# df_site['id_type_site'] = 0
# df_site['dept'] = 38
# df_site['org'] = df_site.id_site.str[:2]
# df_site['num'] = df_site.id_site.str[2:]
# df_site['id_auteur'] = df_site['auteur'].copy()
# df_site['id'] = df_site['id_site'].copy()
# df_site.id_auteur.replace(
# df_pers.auteur.tolist(),
# df_pers.index.tolist(),
# inplace=True)
# df_site.drop(
# columns=['id_site','auteur','sources','date','commune','lieu_dit'],
# inplace=True)
# if not isin_bdd:
# df_site.to_sql(
# name='sites',
# con=con_ps,
# schema='sites',
# if_exists='append',
# index=False,
# # index_label='id'
# )
# # df_geom
# df_geom = df[['id_site', 'auteur', 'geom', 'date']].copy()
# df_geom['date'] = pd.to_datetime(df_geom.date)
# df_geom['id_auteur'] = df_geom['auteur'].copy()
# df_geom.id_auteur.replace(
# df_pers.auteur.tolist(),
# df_pers.index.tolist(),
# inplace=True)
# df_geom.drop(
# columns=['auteur'],
# inplace=True)
# df_geom.set_geometry('geom', crs='EPSG:2154', inplace=True)
# df_geom.reset_index(drop=True, inplace=True)
# if not isin_bdd:
# df_geom.to_postgis(
# name='r_sites_geom',
# con=con_ps,
# schema='sites',
# if_exists='append',
# index=False,
# geom_col='geom'
# # index_label='id'
# )
# # HABITAT
# df_hab = df[['id_site', 'code_hab1','n_hab1','code_hab2','n_hab2','code_hab3','n_hab3','code_hab4','n_hab4']].copy()
# df_hab1 = df[['id_site', 'code_hab1','n_hab1']].copy()
# df_hab2 = df[['id_site', 'code_hab2','n_hab2']].copy()
# df_hab3 = df[['id_site', 'code_hab3','n_hab3']].copy()
# df_hab4 = df[['id_site', 'code_hab4','n_hab4']].copy()
# df_hab1.columns = ['id_site', 'code_hab', 'n_hab']
# df_hab2.columns = ['id_site', 'code_hab', 'n_hab']
# df_hab3.columns = ['id_site', 'code_hab', 'n_hab']
# df_hab4.columns = ['id_site', 'code_hab', 'n_hab']
# df_hab1['index'] = 1
# df_hab2['index'] = 2
# df_hab3['index'] = 3
# df_hab4['index'] = 4
# df_habF = pd.concat([df_hab1,df_hab2,df_hab3,df_hab4])
# df_habF.reset_index(drop=True, inplace=True)
# idgeom = get_idgeomsite(tuple(df_hab.id_site)) \
# .rename(columns={'id':'id_geom_site'})
# idgeom.drop_duplicates(['id','id_site'],keep='last',inplace=True)
# df_habF['id_geom_site'] = df_habF['id_site'].copy()
# df_habF.id_geom_site.replace(
# idgeom.id_site.tolist(),
# idgeom.id.tolist(),
# inplace=True)
# df_habF.replace(['-'],[None], inplace=True)
# df_habF.dropna(subset=['code_hab'],inplace=True)
# df_cb = pd.read_sql_table(
# table_name='corine_biotope',
# schema='ref_habitats',
# con = con_ps)
# df_rSitHab = df_habF \
# .drop(columns=['id_site', 'code_hab'])
# if not isin_bdd:
# df_rSitHab.to_sql(
# name='r_site_habitat',
# con=con_ps,
# schema='ps',
# if_exists='append',
# index=False,
# method='multi',
# # index_label='id'
# )
# sql = ('SELECT * FROM {sch}.{tab} '
# 'WHERE id_geom_site IN {lst} ORDER BY id').format(sch='ps', tab='r_site_habitat', lst=tuple(df_rSitHab.id_geom_site))
# index_return = pd.read_sql(
# sql = sql,
# con = con_ps) \
# .rename(columns={'id':'id_site_hab'})
# df_habF = pd.merge(df_habF, index_return, on=['id_geom_site','n_hab','index'], how='left')
# df_rHabCb = df_habF[['id_site_hab', 'code_hab']]
# if not isin_bdd:
# df_rHabCb.to_sql(
# name='r_hab_cb',
# con=con_ps,
# schema='ps',
# if_exists='append',
# index=False,
# method='multi',
# # index_label='id'
# )
# # parametre
# columns_select = ['id_site','statut','pratiques','niv_embro','recouvmnt']
# df_Sparam = df[columns_select].copy()
# if 's_p_brouss' in df.columns:
# df_Sparam = df[columns_select + ['s_p_brouss','surf_emb']].copy()
# df_Sparam = pd.merge(df_Sparam, idgeom[['id_geom_site','id_site']],how='left', on='id_site')
# df_prm = get_param()
# df_stt = df_Sparam[['id_site','id_geom_site','statut']].copy()
# df_prt = df_Sparam[['id_site','id_geom_site','pratiques']].copy()
# df_rcv = df_Sparam[['id_site','id_geom_site','recouvmnt']].copy()
# df_brs = df_Sparam[['id_site','id_geom_site','niv_embro']].copy()
# if 's_p_brouss' in df_Sparam.columns:
# df_brs = df_Sparam[['id_site','id_geom_site','niv_embro','s_p_brouss']].copy()
# if not df_brs.empty:
# param = df_prm[df_prm.type_param=='embroussaillement'].copy()
# # if not (df_brs.s_p_brouss.unique() < 3).all():
# # # Conversion des pourcentages en identifiant de fourchette
# # param[['min', 'max']] = param.desc.str.split(' à ',expand=True)
# # param.loc[param['min'].str.contains('<'),'max'] = param.loc[param['min'].str.contains('<'),'min']
# # param.loc[param['min'].str.contains('<'),'min'] = '0'
# # param.loc[param['min'].str.contains('>'),'max'] = '100'
# # param[['min', 'max']] = param[['min', 'max']].replace(r'[\%\<\>]','',regex=True).astype(int)
# # df_brs['tx_brouss'] = df_brs['s_p_brouss'].copy()
# # for r,row in param.iterrows():
# # df_brs.loc[(df_brs.tx_brouss > row['min']) & (df_brs.tx_brouss <= row['max']), 's_p_brouss'] = row.param
# df_brs.niv_embro = df_brs.niv_embro.replace(list(param.param.astype(int)), param.id.tolist())
# df_brs.rename(columns={'niv_embro':'id_param','s_p_brouss':'taux'},inplace=True)
# if not df_rcv.empty:
# param = df_prm[df_prm.type_param=='recouvrement']
# df_rcv.recouvmnt = df_rcv.recouvmnt.replace(list(param.param.astype(int)), param.id.tolist())
# df_rcv.rename(columns={'recouvmnt':'id_param'},inplace=True)
# if not df_prt.empty:
# param = df_prm[df_prm.type_param=='pratique']
# df_prt.pratiques = df_prt.pratiques.str.lower()
# df_prt.pratiques.replace(['paturage'],['pâturage'], regex=True, inplace=True)
# # dissociation des multi-pratiques
# d1 = df_prt.loc[df_prt.pratiques.str.contains('et'),].copy()
# d1[['p1', 'p2']] = d1['pratiques'].str.split(' et ', expand=True)
# d2 = df_prt.loc[df_prt.pratiques.str.contains('ou'),].copy()
# d2[['p1', 'p2']] = d2['pratiques'].str.split(' ou ', expand=True)
# d = pd.concat([d1,d2])
# # Uniformisation des champs p1 et p2 avant ajout de "?"
# d.loc[(d.pratiques.str.contains('\?')),['p1','p2']] = d.loc[(d.pratiques.str.contains('\?')),['p1','p2']].replace(' \?', '', regex=True)
# # Ajout de "?"
# d.loc[(d.pratiques.str.contains('\?|ou')),['p1','p2']] = d.loc[(d.pratiques.str.contains('\?|ou')),['p1','p2']] + ' ?'
# dd = pd.concat([
# d[['id_site','id_geom_site','p1']].rename(columns={'p1':'pratiques'}),
# d[['id_site','id_geom_site','p2']].rename(columns={'p2':'pratiques'})
# ]).sort_index()
# df_prt= pd.concat([
# df_prt.drop(index=dd.index.unique(),axis=0),
# dd
# ])
# df_prt.loc[
# df_prt.pratiques.str.contains('caprin|bovin|ovin|equin') &
# (~df_prt.pratiques.str.contains('pâturage')), ['pratiques']
# ] = 'pâturage ' + df_prt.loc[
# df_prt.pratiques.str.contains('caprin|bovin|ovin|equin') &
# (~df_prt.pratiques.str.contains('pâturage')), ['pratiques'] ]
# df_prt.pratiques = df_prt.pratiques.replace(list(param.param), param.id.tolist())
# df_prt.rename(columns={'pratiques':'id_param'},inplace=True)
# if not df_stt.empty:
# param = df_prm[df_prm.type_param=='statut']
# df_stt.statut = df_stt.statut.replace(list(param.param), param.id.tolist())
# df_stt.rename(columns={'statut':'id_param'},inplace=True)
# df_SparamF = pd.concat([df_stt,df_prt,df_rcv,df_brs])
# if not isin_bdd:
# df_SparamF.drop(columns=['id_site']).to_sql(
# name='r_site_param',
# con=con_ps,
# schema='ps',
# if_exists='append',
# index=False,
# method='multi',
# # index_label='id'
# )
# # Legende Carto !
# df_leg = pd.DataFrame()
# if 'legende' in df.columns and 'leg_carto' in df.columns:
# df_leg = df[['id_site', 'leg_carto', 'legende']].copy()
# df_leg.legende = df_leg.legende.replace(r'[\d+\.\|\(\)]','',regex=True).str.strip()
# df_leg.legende = df_leg.legende.replace(' ',' ',regex=True)
# df_leg[~df_leg.leg_carto.eq(df_leg.legende)]
# elif 'leg_carto' in df.columns and 'legende' not in df.columns :
# df_leg = df[['id_site', 'leg_carto']].copy()
# elif 'legende' in df.columns and 'legende' not in df.columns :
# df_leg = df[['id_site', 'legende']].copy()
# if 'legende' in df.columns:
# df_leg.legende = df_leg.legende.replace(r'[\d+\.\|\(\)]','',regex=True).str.strip()
# df_leg.legende = df_leg.legende.replace(' ',' ',regex=True)
# df_leg = pd.merge(df_leg, idgeom[['id_geom_site','id_site']],how='left', on='id_site')
# # ...
# # ...
# # ...
# if not isin_bdd:
# df_leg.drop(columns=['id_site']).to_sql(
# name='r_site_param',
# con=con_ps,
# schema='ps',
# if_exists='append',
# index=False,
# method='multi',
# # index_label='id'
# )
# lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"',
# '"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"',
# '"PS_VERCORS_CEN38_2011"','"cr_PS_CBNA_habitats_aggreg_06_2020"','"c_ps_inventaire_bievre_valloire"'
# ]
# df = pd.DataFrame()
# for tab in lst_tab:
# sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=tab)
# tmp = gpd.read_postgis(
# sql = sql,
# con = con
# )
# df = pd.concat([df,tmp[['pratiques']] ])
# df.pratiques = df.pratiques.replace(['paturage', 'Paturage', 'Paturâge'],['pâturage', 'Pâturage', 'Pâturage'], regex=True)
# df.pratiques = df.pratiques.str[0].str.upper() + df.pratiques.str[1:]
# df.drop_duplicates(inplace=True)
# df.dropna(inplace=True)
# df.to_excel('~/Documents/tmp/pratiques_pelouses_seches.xlsx', index=False)
# ######################################
# ###### UPDATE EMBROUSSAILLEMENT ######
# ######################################
# from pycen import update_to_sql
# brs = df_brs.copy()
# sql = ('SELECT id,id_geom_site FROM {sch}.{tab} '
# 'WHERE id_geom_site IN {gsit} '
# 'AND id_param IN {param};').format(
# sch='ps', tab='r_site_param',
# gsit=tuple(df_brs.id_geom_site.unique()),
# param=tuple(df_brs.id_param.unique()) )
# data = pd.read_sql(sql=sql, con=con_ps)
# brs = pd.merge(brs,data, on='id_geom_site').drop(columns=['id_site'])
# update_to_sql(brs,con_ps,table_name='r_site_param',schema_name='ps',key_name=['id','id_geom_site'])
# # sql = 'SELECT *, ST_ClusterDBSCAN(geom::geometry, eps := 1000, minpoints := 1) over () AS cluster_id FROM pelouse_seche.c_ps_inventaire_agreg'
# # df = gpd.read_postgis(sql,con)
# # df.sort_values('cluster_id', inplace=True)
# # # df2 = gpd.GeoDataFrame({'geom':[]},geometry='geom', crs='EPSG:2154')
# # df2 = gpd.GeoSeries()
# # for i in df.cluster_id.unique().tolist():
# # print(i)
# # tmp = gpd.GeoSeries(df.loc[df.cluster_id == i, 'geom'].unary_union)
# # df2 = df2.append(tmp)
# # df2 = gpd.GeoDataFrame({'geom':df2},geometry='geom', crs='EPSG:2154')
# # df3 = df2.copy()
# # df3 = df3.buffer(500)
# # df3.to_file('/home/colas/Documents/tmp/cluster_500.shp')
# # df2.to_file('/home/colas/Documents/tmp/cluster_withoutbuffer.shp')