228 lines
9.3 KiB
Python
228 lines
9.3 KiB
Python
# 2024-11-26
|
|
# LE script permet d'intégrer de nouvelles géométries
|
|
# dans la table sites.r_sites_geom. Les AUTEURS ont été
|
|
# intégrés manuellement.
|
|
|
|
import geopandas as gpd
|
|
from pycen import con, zh as ZH
|
|
from pycen.tools import dropZ, Polygons_to_MultiPolygon
|
|
from os import path
|
|
from shapely import wkb
|
|
from matplotlib import pyplot as plt
|
|
import pycen
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.engine import URL
|
|
|
|
# isin_bdd = True
|
|
# # Parametres bdd CEN38 OUT
|
|
# user = 'cen_admin'
|
|
# pwd = "#CEN38@venir"
|
|
# adr = '91.134.194.221'
|
|
# base = 'azalee_restore'
|
|
# url = URL.create("postgresql+psycopg2", username=user, password=pwd, host=adr, database=base)
|
|
# con = create_engine(url)
|
|
|
|
|
|
zh = ZH()
|
|
r_geom = zh.get_sitesGeom()
|
|
r_geom = gpd.read_postgis('''
|
|
SELECT r_sites_geom.* FROM sites.r_sites_geom
|
|
JOIN sites.sites ON sites.id = r_sites_geom.id_site AND sites.id_type_milieu = 1
|
|
''',con)
|
|
r_geom.dropna(how="all",axis=1,inplace=True)
|
|
r_geom.drop(columns=['id','date_insert','id_origine','id_lot'],inplace=True)
|
|
r_geom.sort_values(['id_site','date'],inplace=True)
|
|
r_geom.drop_duplicates(['id_site'],keep='last',inplace=True)
|
|
|
|
|
|
PATH = '/home/colas/Documents/9_PROJETS/1_ZH/MAJ/Actu 2024/CBNA/zh38'
|
|
file = 'hab_agreg.gpkg'
|
|
df = gpd.read_file(path.join(PATH,file))
|
|
df.rename_geometry('geom', inplace=True)
|
|
df.geom = [wkb.loads(wkb.dumps(g,output_dimension=2)) for g in df.geom]
|
|
df.dropna(how='all',axis=1,inplace=True)
|
|
|
|
# gpd.GeoSeries([r_geom.unary_union],crs=2154).contains(df.geom)
|
|
# df.contains(r_geom.geom).any()
|
|
# res = df.within(r_geom.unary_union)
|
|
|
|
zh_caract = df[df.zh=='caractéristique'].copy()
|
|
zh_caract['surfzhall'] = zh_caract.groupby(['idpolyfinal'])['n06rechab'].transform('sum')
|
|
lst_idpoly = zh_caract[zh_caract.surfzhall >= 75].idpolyfinal.unique()
|
|
# zh_caract.within(r_geom.unary_union)
|
|
# zh_caract.contains(r_geom.unary_union).any()
|
|
|
|
# Non Intersection Après 2015 - tout milieux
|
|
no_inters = df[
|
|
(df.lien_zh == 'aucun') & (df.n07anneehab > 2015) #& (df.zh=='caractéristique')
|
|
& (df.idpolyfinal.isin(lst_idpoly))
|
|
& (~df.n05lbhab.str.contains('invasif',na=False,case=False))
|
|
].copy()
|
|
no_inters = no_inters.drop(columns=['id_site']).sjoin(r_geom[['id_site','geom']])
|
|
no_inters.to_file(path.join(PATH,'inters.gpkg'),driver='GPKG',layer='no_inters75')
|
|
len(no_inters.id_site.unique())
|
|
|
|
# Intersection Après 2015 - tout milieux
|
|
inters = df[
|
|
(df.lien_zh == 'intersecte') & (df.n07anneehab > 2015) #& (df.zh=='caractéristique')
|
|
# & (df.n06rechab.astype(int)>45)
|
|
& (df.idpolyfinal.isin(lst_idpoly))
|
|
& (~df.n05lbhab.str.contains('invasif',na=False,case=False))
|
|
].copy()
|
|
inters = inters.drop(columns=['id_site']).sjoin(r_geom[['id_site','geom']])
|
|
inters.to_file(path.join(PATH,'inters.gpkg'),driver='GPKG',layer='inters75')
|
|
len(inters.id_site.unique())
|
|
|
|
# Intersection Après 2009 - milieux Forêts uniquement
|
|
inters_foret = df[
|
|
(df.lien_zh == 'intersecte') & (df.zh=='caractéristique') #& (df.n07anneehab > 1999)
|
|
# & (df.n06rechab.astype(int)>45)
|
|
& (df.idpolyfinal.isin(lst_idpoly)) & (df.n05lbhab.str.contains('for.t',na=False,case=False))
|
|
& (~df.n05lbhab.str.contains('invasif',na=False,case=False))
|
|
& (~df.idpolyfinal.isin(inters.idpolyfinal.tolist()))
|
|
].copy()
|
|
inters_foret = inters_foret.drop(columns=['id_site']).sjoin(r_geom[['id_site','geom']])
|
|
inters_foret.to_file(path.join(PATH,'inters.gpkg'),driver='GPKG',layer='inters_forets')
|
|
|
|
# Fusion Géométries
|
|
intersF = gpd.pd.concat([inters,inters_foret])
|
|
rgeo = (r_geom[r_geom.id_site.isin(intersF.id_site.unique())]
|
|
.copy())
|
|
for id_site in rgeo.id_site.unique():
|
|
rgeo.loc[rgeo.id_site==id_site,'geom'] = gpd.GeoSeries([
|
|
gpd.GeoSeries(
|
|
[*rgeo[rgeo.id_site==id_site].geom,
|
|
*intersF[intersF.id_site==id_site].geom],
|
|
crs=2154).unary_union
|
|
],
|
|
crs=2154,
|
|
index=rgeo[rgeo.id_site==id_site].index)
|
|
rgeo.date = '2024-01-21'
|
|
|
|
# Filtre geométries plus grandes
|
|
rgeo2 = rgeo[
|
|
rgeo.sort_values('id_site').area
|
|
> r_geom[r_geom.id_site.isin(rgeo.id_site.unique())].sort_values('id_site').area
|
|
]
|
|
|
|
# Check habitats describes
|
|
sql = 'SELECT * FROM zones_humides.r_site_habitat WHERE valid IS True and id_site IN {lst}'.format(lst=tuple(intersF.id_site.unique()))
|
|
hab_bdd = gpd.pd.read_sql(sql,con)
|
|
|
|
insert_hab = gpd.pd.DataFrame()
|
|
for id_site in intersF.id_site.unique():
|
|
lst_bddhab = hab_bdd[hab_bdd.id_site==id_site].id_cb.tolist()
|
|
cbn_hab = intersF[(intersF.id_site==id_site)&(~intersF.n24cdcb.astype(str).isin(lst_bddhab))]
|
|
insert_hab = gpd.pd.concat([insert_hab,cbn_hab])
|
|
|
|
ins_hab = (insert_hab[['n03observat','n04organism','n07anneehab','n24cdcb','id_site']]
|
|
.rename(columns={'n24cdcb':'id_cb','n03observat':'observer','n04organism':'organisme','n07anneehab':'annee'})
|
|
.replace(
|
|
{'AMODEI T':'AMODEI Thomas',
|
|
'Lo Parvi':'LO PARVI',
|
|
'ACER CAMPESTRE':'Acer Campestre',
|
|
'Réflex environnement':'REFLEX Environnement',
|
|
'Office National des Forets':'Office National des Forêts',
|
|
'Conservatoire botanique national alpin':'Conservatoire Botanique National Alpin',
|
|
'Mosaique Environnement, M. Voirin':'Mosaïque Environnement',
|
|
'Boucard E.':'BOUCARD E.',
|
|
'MALET, A., JOUD. D., LINOSSIER, T.':'MALET A., JOUD Didier, LINOSSIER T.',
|
|
'AURAND T., TAIN C':'AURAND Theo, TAIN C.',
|
|
'FOLCHER C.':'FOLCHER Caroline',
|
|
"Conservatoire d'espaces naturels": 'CEN Isère'
|
|
})
|
|
.drop_duplicates())
|
|
ins_hab['auteur'] = None
|
|
ins_hab.loc[ins_hab.observer.isna(),['auteur']] = ins_hab[ins_hab.observer.isna()].organisme
|
|
ins_hab.loc[~ins_hab.observer.isna(),['auteur']] = (ins_hab[~ins_hab.observer.isna()].observer +
|
|
' (' +ins_hab[~ins_hab.observer.isna()].organisme + ')')
|
|
ins_hab['date'] = ins_hab.annee.astype(str) + '-01-01'
|
|
ins_hab.drop(columns=['observer','organisme','annee'],inplace=True)
|
|
|
|
# Insertion geom BDD
|
|
rgeo2.to_postgis('r_sites_geom',pycen.con,'sites',if_exists='append',index=False)
|
|
|
|
siteid = gpd.pd.read_sql('''
|
|
SELECT DISTINCT ON (id_site) id,id_site
|
|
FROM sites.r_sites_geom
|
|
WHERE id_site IN {lst}
|
|
ORDER BY id_site,id DESC
|
|
'''.format(lst=tuple(rgeo2.id_site.unique())),pycen.con)
|
|
usr = siteid.drop(columns=['id_site']).copy()
|
|
usr.rename(columns={'id':'id_geom_site'},inplace=True)
|
|
usr['id_auteur'] = 96
|
|
usr.to_sql('r_geomsites_auteur',pycen.con,schema='sites',if_exists='append',index=False)
|
|
|
|
# Insertion habitat BDD
|
|
siteidgeo = gpd.pd.read_sql('''
|
|
SELECT DISTINCT ON (id_site) id id_geom_site,id_site
|
|
FROM sites.r_sites_geom
|
|
WHERE id_site IN {lst}
|
|
ORDER BY id_site,id DESC
|
|
'''.format(lst=tuple(ins_hab.id_site.unique())),pycen.con)
|
|
ins_habF = ins_hab.merge(siteidgeo, on='id_site',how='left')
|
|
ins_habF.sort_values(['id_site','id_cb','date'],inplace=True)
|
|
ins_habF.drop_duplicates(subset=['id_site','id_cb'],keep='last',inplace=True)
|
|
ins_habF['valid'] = True
|
|
ins_habF.to_csv(path.join(PATH,'habs_inserted.csv'))
|
|
(ins_habF
|
|
.drop(columns=['auteur'])
|
|
.to_sql('r_site_habitat',pycen.con,schema='zones_humides',if_exists='append',index=False))
|
|
|
|
|
|
refpers = (pycen.pers.get_auteur2()
|
|
.replace({'nom_prenom':{
|
|
'ONF38 (ONF)':'Office National des Forêts',
|
|
'CBNA (CBNA)':'Conservatoire Botanique National Alpin',
|
|
'LO PARVI (LO PARVI)':'LO PARVI',
|
|
'CEN Isère (CEN Isère)':'CEN Isère',
|
|
'Ecosphère (Ecosphère)':'Ecosphère',
|
|
'BURGEAP (BURGEAP)':'BURGEAP',
|
|
'Acer Campestre (Acer Campestre)':'Acer Campestre',
|
|
'REFLEX Environnement (REFLEX Environnement)':'REFLEX Environnement',
|
|
'REFLEX Environnement (REFLEX Environnement)':'REFLEX Environnement'
|
|
}})
|
|
.replace({'nom_prenom':{
|
|
'CBNA':'Conservatoire Botanique National Alpin',
|
|
'ONF':'Office National des Forêts',
|
|
'BÉGUIN Lucile':'BEGUIN Lucile',
|
|
'CEN AURA':'Conservatoire Régional des Espaces Naturels Rhône-Alpes',
|
|
'CD Isère':"Département de l'Isère"
|
|
}},regex=True))
|
|
pers_dict = dict(zip(refpers.nom_prenom,refpers.index))
|
|
siteidhab = gpd.pd.read_sql('''
|
|
SELECT DISTINCT id,id_site,id_cb,date,id_geom_site
|
|
FROM zones_humides.r_site_habitat
|
|
WHERE (id_site,id_cb,date,id_geom_site) IN {lst}
|
|
ORDER BY id_site,id DESC
|
|
'''.format(lst=tuple(tuple(ins_habF[['id_site','id_cb','date','id_geom_site']].itertuples(index=False,name=None)))),pycen.con)
|
|
hab_auth = (ins_habF[['id_site','auteur','id_cb','date']]
|
|
.astype(str)
|
|
.merge(siteidhab.astype(str), on=['id_site','id_cb','date'],how='left')
|
|
.rename(columns={'id':'id_sitehab'})
|
|
.set_index(['id_sitehab']))
|
|
hab_auth['orga'] = (hab_auth.auteur.str.split('(').str[-1]
|
|
.str.split(')').str[0])
|
|
|
|
test_auth = hab_auth.auteur.str.contains(',')
|
|
hab_auth1 = (hab_auth[~test_auth].auteur
|
|
.replace(pers_dict)
|
|
.to_frame('id_auteur'))
|
|
hab_auth_tmp = (hab_auth[test_auth]
|
|
.auteur.str.split('(').str[0]
|
|
.str.split(',')
|
|
.explode()
|
|
.str.strip()
|
|
.to_frame('auteur')
|
|
.merge(hab_auth.loc[test_auth,['orga']],left_index=True,right_index=True))
|
|
hab_auth2 = ((hab_auth_tmp.auteur+' ('+hab_auth_tmp.orga+')')
|
|
.replace({
|
|
'BURGEAP (BURGEAP, ECOSPHERE)':'BURGEAP',
|
|
'ECOSPHERE (BURGEAP, ECOSPHERE)':'Ecosphère',})
|
|
.replace(pers_dict)
|
|
.to_frame('id_auteur'))
|
|
|
|
(gpd.pd.concat([hab_auth1,hab_auth2])
|
|
.reset_index(drop=False)
|
|
.to_sql('r_rsitehab_auteur',pycen.con,schema='zones_humides',if_exists='append',index=False))
|