Python_scripts/3_AZALEE/recup_CBNA_habPS.py

355 lines
12 KiB
Python

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from statistics import geometric_mean
import geopandas as gpd
import pandas as pd
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
# from shapely.geometry import Polygon
import pycen
from os import listdir, chdir
from pathlib import Path
from zipfile import ZipFile
from rasterstats import zonal_stats
# Path MNT
path0 = '/home/colas/Documents/9_PROJETS/2_PS/'
path0_mnt = '/home/colas/Documents/9_PROJETS/3_PGZH/'
path = path0_mnt + 'SIG/'
p_mltifct = 'multi_fonctions/IGN - BD Alti 25M/'
path2_mnt = path0+'MNT/'
file_mnt25 = 'MNT_25m.tif'
file_mnt5 = 'MNT_5m.tif'
# Parametres bdd IN
user = 'cen_admin'
pwd = '#CEN38@venir'
adr = '192.168.0.189'
base = 'bd-cen-38'
url = URL.create('postgresql+psycopg2',
username=user,
password=pwd,
host=adr,
database=base,
)
con = create_engine(url)
# con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False)
def keepgeom_indept(df):
dnat = pycen.ref.get_districtNat().geom.unary_union
df['in_isere'] = df.intersects(dnat)
df = df[df.in_isere].copy()
df.dropna(axis=1, how='all', inplace=True)
del df['in_isere']
return df
# df = gpd.read_postgis(
# sql = 'SELECT * FROM pelouse_seche."cr_PS_CBNA_habitats_aggreg_06_2020"',
# con = con
# )
lst_tab = ['"cr_VERCORS_habitats_CBNA_1999-2007"','"cr_TRIEVES+VERCORS_habitats_CBNA_2014"','"cr_ECRIN_habitats_CBNA_2014"','"cr_CHARTREUSE_habitats_CBNA_2000-2012"',]
table = lst_tab[3]
df = gpd.read_postgis(
sql = 'SELECT * FROM habitat.%s'%table,
con = con
)
df.set_index('idfinal', inplace=True)
# df.set_index('data_id', inplace=True)
colnhab = df.columns[df.columns.str.startswith('n_hab')]
nhab = df[colnhab].copy()
nhab['SUMM'] = nhab.sum(axis=1)
nhab['SUMM'].unique()
lst_hab = ['34.1','34.3','34.4','34.7','35.2','62.3','64.1']
df2 = pd.DataFrame()
for h in lst_hab:
tmp = df[
(df.code_hab1.str.contains(h,na=False))|
(df.code_hab2.str.contains(h,na=False)&(df.n_hab2 >= 20))|
(df.code_hab3.str.contains(h,na=False)&(df.n_hab3 >= 20))|
(df.code_hab4.str.contains(h,na=False)&(df.n_hab4 >= 20))
]
df2 = pd.concat([df2,tmp])
df3 = df2.copy()
# Before 1385 / After 1422
lst_hab2 = ['41','42','43','44','81','82','83']
for h2 in lst_hab2:
tmp2 = df2[
(df2.code_hab1.str.contains('&'+h2,na=False)&(df2.n_hab1 >= 60))
]
df2.drop(labels=tmp2.index,inplace=True)
for h2 in lst_hab2:
tmp2 = df2[
(df2.code_hab1.str.contains(h2+'.',na=False)&(df2.n_hab1 >= 60))
]
df2.drop(labels=tmp2.index,inplace=True)
for h2 in lst_hab2:
tmp3 = df3[
(df3.code_hab1.str.contains(h2,na=False)&(df3.n_hab1 >= 60))
]
df3.drop(labels=tmp3.index,inplace=True)
df2 = keepgeom_indept(df2)
df3 = keepgeom_indept(df3)
df = df2.copy()
df.reset_index(inplace=True,drop=False)
# ps = pycen.ps.get_sitesGeom()
sql = 'SELECT * FROM ps.v_pelouseseches_all'
PS = gpd.read_postgis(sql,pycen.con)
ps = PS[PS.source==table.replace('"','')].copy()
# 1385
not_in_azalee = df2[~df2.index.isin(df3.index)].copy()
in_azalee = df2[df2.index.isin(df3.index)].copy()
not_in_azalee.to_file(path0+'CBNA/MISSING_DATA_INDB/chartreuse.gpkg',layer='not_in_azalee')
in_azalee.to_file(path0+'CBNA/MISSING_DATA_INDB/chartreuse.gpkg',layer='in_azalee')
ps.columns
# Drop poly who intersect poly non-CBNA
# ps_autre = ps[~ps.auteur_geom.str.contains('CBNA')]
# intersect = gpd.sjoin(df,ps_autre,op='intersects').idfinal.tolist()
# drop_index = df[df.idfinal.isin(intersect)].index
# df.drop(drop_index,inplace=True)
# Drop poly who intersect poly CBNA
# ps_cbna = ps[ps.auteur_geom.str.contains('CBNA')]
# intersect = gpd.sjoin(
# df,
# gpd.GeoDataFrame( geometry=ps_cbna.centroid),
# op='intersects').idfinal.tolist()
# drop_index = df[df.idfinal.isin(intersect)].index
# df.drop(drop_index,inplace=True)
from os.path import join
PATH = '/home/colas/Documents/9_PROJETS/2_PS/CBNA/MISSING_DATA_INDB'
FILE = 'CHARTREUSE.gpkg'
df = gpd.read_file(join(PATH,FILE))
if 'date' not in df.columns and 'annee' in df.columns:
df.loc[~df.annee.isna(),'date'] = df.loc[~df.annee.isna(),'annee'].astype(str) + '-01-01'
df['date'] = pd.to_datetime(df['date'])
colnhab2 = df2.columns[df2.columns.str.startswith('n_hab')]
nhab2 = df2[colnhab2].copy()
nhab2['SUMM'] = nhab2.sum(axis=1)
nhab2['SUMM'].unique()
if table == '"cr_CHARTREUSE_habitats_CBNA_2000-2012"':
dic = {
'JCV':'VILLARET Jean-charles',
'GP' :'PACHE Gilles',
'AM' :'MIKOLAJCZAK Alexis',
'TL' :'LEGLAND Thomas',
'HM' :'MERLE Hugues',
'CC' :'Crassous Claire',
# 'AL'
# 'LA'
# 'JC'
}
df.obs = df.obs.replace(dic)
df.loc[~df.obs.isin(['ONF38',*dic.values()]),'obs'] = 'CBNA'
df.loc[df.structur=='ONF 38','structur'] = 'ONF38'
if table == '"cr_ECRIN_habitats_CBNA_2014"':
df['auteur'] = 'PnE'
df['structure'] = 'PnE'
df['date'] = '2014-01-01'
df['date'] = pd.to_datetime(df['date'])
if table == '"cr_TRIEVES+VERCORS_habitats_CBNA_2014"':
df['auteur'] = 'CBNA'
df['structure'] = 'CBNA'
df['date'] = '2014-01-01'
df['date'] = pd.to_datetime(df['date'])
if table == '"cr_VERCORS_habitats_CBNA_1999-2007"':
df['structure'] = 'CBNA'
if table in [
'"cr_VERCORS_habitats_CBNA_1999-2007"','"cr_TRIEVES+VERCORS_habitats_CBNA_2014"'
]:
df.loc[df.statut=="Habitat d'intérêt communautaire",'statut'] = \
"Communautaire"
df.loc[df.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \
"Prioritaire"
df.loc[df.statut=="Habitat d'intérêt communautaire, retenu prioritaire",'statut'] = \
"Prioritaire"
df.loc[
df.statut=="""Habitat d'intérêt communautaire, retenu prioritaire pour les sites riches en orchidées""",
'statut'] = "Prioritaire"
df.loc[
df.statut=="""Habitat d'intérêt communautaire retenu prioritaire pour les sites riches en orchidées""",
'statut'] = "Prioritaire"
df.loc[
df.statut=="""Habitat communautaire, retenu prioritaire pour les sites riches en orchidées""",
'statut'] = "Prioritaire"
df.loc[df.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \
"Prioritaire"
if table in [
'"cr_CHARTREUSE_habitats_CBNA_2000-2012"', '"cr_ECRIN_habitats_CBNA_2014"']:
dic = {'PR':'Prioritaire','IC':'Communautaire','NC':'Non communautaire'}
df.statut.replace(dic,inplace=True)
df.reset_index(inplace=True, drop=True)
df.to_postgis(
'ps'+table[3:-1],pycen.con_bdcen,'pelouse_seche',geom_col='geom',if_exists='replace'
)
# [CROSS_MNT]
ddf = df[['idfinal','geom']].copy()
ddf.set_geometry('geom',inplace=True)
# home = str(Path.home())
# chdir(path+p_mltifct)
# Dir = listdir()
# Dir = [x for x in Dir if '.zip' in x]
# # stats = pd.DataFrame()
# for i, d in enumerate(Dir):
# zip = ZipFile(d).namelist()
# z = [z for z in zip if 'MNT' in z][0]
# print(z)
# tmp = zonal_stats(ddf.geom,'/vsizip/{zip}/{mnt}'.format(zip=d,mnt=z),stats="max")
# tmp = pd.DataFrame(tmp)
# tmp.columns = ['max%s'%i]
# if i == 0 :
# stats = tmp
# else:
# stats = pd.merge(stats,tmp,how='left',left_index=True,right_index=True)
# stats['bilan'] = stats.max(axis=1)
# tmp = pd.merge(df,stats['bilan'],left_index=True,right_index=True)
# tmp.bilan = tmp.bilan.round()
# zonal_tmp = zonal_stats(ddf.geom,path2_mnt+file_mnt25,stats="mean max",nodata=0,all_touched=True)
# import rasterio
# with rasterio.open(path2_mnt+file_mnt5) as src:
# affine = src.transform
# array = src.read(1)
# zonal_tmp3 = zonal_stats(ddf, array, affine=affine,stats="mean max",nodata=0,all_touched=True)
# zonal_tmp = pd.DataFrame(zonal_tmp)
# zonal_tmp3 = pd.DataFrame(zonal_tmp3)
# zonal_tmp.columns = ['max_alti','mean_alti']
# zonal_tmp3.columns = ['max_alti','mean_alti']
# tmp = pd.merge(df,zonal_tmp2,left_index=True,right_index=True)
from pycen import con as con_aza
sql = 'SELECT * FROM ps.v_pelouseseches_noalti'
ddf = gpd.read_postgis(sql,con_aza)
zonal_tmp2 = zonal_stats(ddf,path2_mnt+file_mnt5,stats="max",nodata=0,all_touched=True)
zonal_tmp2 = pd.DataFrame(zonal_tmp2)
zonal_tmp2.columns = ['max_alti']
tmp = pd.concat([ddf,zonal_tmp2], axis=1)
tmp = tmp.set_geometry('geom', crs=2154)
tmp['infeq_1200'] = tmp.max_alti <= 1200
tmp2 = tmp[['site_code','infeq_1200']].copy()
tmp2.rename(columns={'site_code':'id_site'},inplace=True)
tmp2.to_sql('r_infeq_1200m',con_aza,'ps',if_exists='append',index=False)
df.to_postgis(
'ps'+table[3:-1],pycen.con_bdcen,'pelouse_seche',geom_col='geom',if_exists='replace'
)
PSinfeq1200 = tmp[tmp.max_alti <= 1200].copy()
PSsup1200 = tmp[tmp.max_alti > 1200].copy()
# PSinfeq1200 = tmp[tmp.bilan <= 1200].copy()
# PSsup1200 = tmp[tmp.bilan > 1200].copy()
# PSinfeq1200.rename(columns={'bilan':'max_alti'}, inplace=True)
# PSsup1200.rename(columns={'bilan':'max_alti'}, inplace=True)
PSinfeq1200.dropna(axis=1, how='all',inplace=True)
if table in [
'"cr_VERCORS_habitats_CBNA_1999-2007"','"cr_TRIEVES+VERCORS_habitats_CBNA_2014"']:
PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire",'statut'] = \
"Communautaire"
PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \
"Prioritaire"
PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire, retenu prioritaire",'statut'] = \
"Prioritaire"
PSinfeq1200.loc[
PSinfeq1200.statut=="""Habitat d'intérêt communautaire, retenu prioritaire pour les sites riches en orchidées""",
'statut'] = "Prioritaire"
PSinfeq1200.loc[
PSinfeq1200.statut=="""Habitat d'intérêt communautaire retenu prioritaire pour les sites riches en orchidées""",
'statut'] = "Prioritaire"
PSinfeq1200.loc[
PSinfeq1200.statut=="""Habitat communautaire, retenu prioritaire pour les sites riches en orchidées""",
'statut'] = "Prioritaire"
PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \
"Prioritaire"
if table in [
'"cr_CHARTREUSE_habitats_CBNA_2000-2012"', '"cr_ECRIN_habitats_CBNA_2014"']:
dic = {'PR':'Prioritaire','IC':'Communautaire','NC':'Non communautaire'}
PSinfeq1200.statut.replace(dic,inplace=True)
DF_INI = PSinfeq1200.copy()
df = DF_INI.copy()
# df['structure'] = 'CBNA'
# sql = """
# DELETE FROM sites.sites
# WHERE sites.id in (SELECT id_site from sites.r_sites_geom where id_lot = 1);
# """
# with pycen.con.begin() as cnx:
# cnx.execute(sql)
path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/'
cols_date = PSsup1200.columns[PSsup1200.columns.str.contains('date')]
PSsup1200[cols_date] = PSsup1200[cols_date].astype(str)
PSsup1200.to_file(path0 + 'PS_ECRIN_CBNA_2014_SUP1200.shp')
cols_date = DF_INI.columns[DF_INI.columns.str.contains('date')]
DF_INI[cols_date] = DF_INI[cols_date].astype(str)
DF_INI.to_file(path0 + 'PS_ECRIN_CBNA_2014_INF1200.shp')
op = '''
gdalwarp -overwrite -s_srs EPSG:2154 -t_srs EPSG:2154 -co FORMAT=GPKG -of GTiff -tr 25.0 -25.0 -tap -cutline \
"PG:dbname='bd-cen-38' host=192.168.0.189 port=5432 sslmode=disable user='cgeier' password='adm1n*bdCen'" \
-csql "SELECT * FROM habitat."cr_TRIEVES_VERCORS_habitats_CBNA_2014" WHERE idfinal in (24060)" \
"{mnt}" {out}
'''.format(
mnt=path2_mnt+'out.tif',
out=path2_mnt+'24060.tif')
system(op)
mnt = rio.open(Path_tmp+mnt_out)
sql = 'SELECT * FROM pelouse_seche."PB_codehab_nonPresent_dans_corineBiotope"'
df = gpd.read_postgis(sql,con)
lst = ['cr_TRIEVES+VERCORS_habitats_CBNA_2014','cr_VERCORS_habitats_CBNA_1999-2007','cr_ECRIN_habitats_CBNA_2014','cr_CHARTREUSE_habitats_CBNA_2000-2012']
df = df[df.table_org.isin(lst)]
df.loc[df.auteur=='PnE','structure'] = 'PnE'
sql = 'SELECT * FROM pelouse_seche."PB_codehabCBNA_nonPresent_dans_corineBiotope"'
df2 = gpd.read_postgis(sql,con)
df3 = pd.concat([df,df2])
df3.set_geometry('geom',inplace=True,crs=2154)
df3.to_postgis(
name='PB_codehabCBNA_nonPresent_dans_corineBiotope',
con=con,
schema='pelouse_seche',
if_exists='replace',
geom_col='geom'
)