Python_scripts/3_AZALEE/azaleeTOgeonature.py
2025-02-25 16:52:00 +01:00

1186 lines
42 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-.
from pycen import con_gn, zh, pers
import pandas as pd
import geopandas as gpd
import uuid
import re
zh = zh()
DICT_TZH = {
'site_code':'code',
'nom':'main_name',
'autre_nom':'secondary_name',
'auteur_site':'create_author',
##'auteur_geom',
'auteur_last_maj':'update_author',
'date_site':'create_date',
##'date_geom',
'date_last_maj':'update_date',
# 'CEN Isère': 'id_org', # organisme opérateur
##'type_milieu',
##'type_site',
# 'typo_sdage':'id_sdage',
# 'id_sage' #A récup
# 'rmq_site':'remark_pres',
# 'rmq_fct_majeur':'',
# 'rmq_interet_patri':'',
# 'rmq_bilan_menace':'',
# 'rmq_orient_act':'',
# 'rmq_usage_process':'',
# 'code_cb':'',
# 'lib_cb':'',
# 'activ_hum':'',
# 'impact':'',
# 'position':'',
# 'rmq_activ_hum':'',
# 'connexion':'',
# 'subm_orig':'',
# 'subm_freq':'',
# 'subm_etend':'',
# 'fct_bio':'',
# 'fct_hydro':'',
# 'int_patri':'',
# 'val_socioEco':'',
# 'crit_delim':'',
# 'crit_def_esp':'',
# 'entree_eau_reg':'',
# 'entree_eau_perm':'',
# 'entree_eau_topo':'',
# 'sortie_eau_reg':'',
# 'sortie_eau_perm':'',
# 'sortie_eau_topo':'',
# 'geom':'geom'
}
def get_id_organisme(nom):
if "'" in nom:
nom = nom.replace("'","")
sql = "SELECT id_organisme FROM utilisateurs.bib_organismes WHERE nom_organisme = '%s'"%nom
return pd.read_sql_query(sql,con_gn)['id_organisme'].values
def remove_special_char(obj,space=False):
dict_char = {
r'[]':"'",
r'[àáâãäå]':'a',
r'[èéêë]':'e',
r'[ìíîï]':'i',
r'[òóôõö]':'o',
r'[ùúûü]':'u',
# r'[ ]':"",
r'[]':"-"
}
if space:
dict_char = {**dict_char, **{r'[ ]':""}}
return obj.replace(dict_char,regex=True)
def recup_cols_table(table,con,schema='pr_zh'):
lst_cols = con.dialect.get_columns(con,table,schema)
return [x['name'] for x in lst_cols]
def t_nomenclature_ZH(bib_mnemo=None,source='ZONES_HUMIDES'):
sch = 'ref_nomenclatures'
tab = ['t_nomenclatures','bib_nomenclatures_types']
sql = """
SELECT
a.id_nomenclature,
a.cd_nomenclature,
a.mnemonique mnemo,
a.label_default as label,
a.definition_default def,
b.mnemonique bib_mnemo,
b.label_default bib_label,
b.definition_default bib_def,
a.active
FROM {sch}.{tab0} a
JOIN {sch}.{tab1} b USING (id_type)
""".format(sch=sch,tab0=tab[0],tab1=tab[1])
if source is not None or bib_mnemo is not None:
sql += " WHERE "
if source is not None:
sql += """b."source" = '%s'"""%source
sql += " AND " if bib_mnemo is not None else ''
if bib_mnemo is not None:
sql += "b.mnemonique = '%s'"%bib_mnemo
return pd.read_sql_query(sql,con_gn).replace({r'[]':"'"},regex=True)
def recup_sdage(df):
# Récupération de l'id SDAGE dans la typo
df['id_sdage'] = [str(int(re.search(r'\d+', x).group())) for x in df.typo_sdage.sort_index()]
sdage = t_nomenclature_ZH(bib_mnemo='SDAGE')
dict_sdage = dict(zip(sdage.cd_nomenclature,sdage.id_nomenclature))
df['id_sdage'].replace(dict_sdage,inplace=True)
return df
def recup_sage(df):
# Identification de la précision SAGE
df['id_sage'] = [df.loc[df.typo_sdage==x,'mnemo_sdage'].values[0] if re.search(r'\.\d+', x) else None for x in df.typo_sdage.sort_index()]
sage = t_nomenclature_ZH(bib_mnemo='SAGE')
dict_sage = dict(zip(sage.mnemo,sage.id_nomenclature))
df['id_sage'].replace(dict_sage,inplace=True)
return df
def cor_lim_list(crit_delim):
"""Remplis la table pr_zh.cor_lim_list et retourn les uuid associés
"""
delim = t_nomenclature_ZH(bib_mnemo='CRIT_DELIM')
delim.mnemo = delim.mnemo.str.replace(r'.\(.*\)','',regex=True)
dict_delim = dict(zip(delim.mnemo.str.lower(),delim.id_nomenclature))
serie = crit_delim\
.fillna('non déterminé')\
.str.split(';',expand=True).stack().droplevel(-1).reset_index()
serie.columns = ['id','delim']
serie.set_index('id',inplace=True)
serie['id_lim'] = serie.delim.str.replace(r'.\(.*\)','',regex=True)
serie.id_lim.replace(dict_delim,inplace=True)
genuuid = serie.index.to_frame().drop_duplicates()
del genuuid['id']
genuuid['id_lim_list'] = [uuid.uuid4() for x in genuuid.index]
_cor_lim_list = pd.merge(serie,genuuid,how='inner',right_index=True,left_index=True)
uuidreturn = pd.merge(crit_delim,genuuid,how='left',right_index=True,left_index=True)
# Remplissage de la table pr_zh.cor_lim_list
_cor_lim_list[['id_lim_list','id_lim']].to_sql(
name='cor_lim_list',con=con_gn,schema='pr_zh',if_exists='append',index=False,
# dtype={
# 'id_lim_list':uuid.SafeUUID
# }
)
return uuidreturn.id_lim_list.sort_index()
def recup_delim_rmq(crit_delim):
serie = crit_delim.str.split(';',expand=True).stack().droplevel(-1).reset_index()
serie.columns = ['id','delim']
serie.set_index('id',inplace=True)
serie['remarks'] = [ x[x.find("(")+1:x.rfind(")")] if x.find("(") > -1 else None for x in serie.delim ]
uniserie = serie.groupby('id')['remarks'].apply(list).reset_index()
uniserie.columns = ['id','remarks']
uniserie.set_index('id',inplace=True)
uniserie.remarks = ['\n'.join(list(set(filter(None,x)))).strip() for x in uniserie.remarks]
uniserie.remarks.replace({'': None},inplace=True)
df_remarks = pd.merge(
crit_delim,
# serie.reset_index().drop_duplicates(subset='id').set_index('id'),
uniserie,
how='left',right_index=True,left_index=True
)
return df_remarks.remarks
def recup_subm(col_subm,typ_subm):
"""Correspondance subm--id_nomenclature.
@col_subm : Series. Colonne de submersion
@typ_subm : str. Type de submersion ['frequente','etendue','connexion']
"""
# Manque la fréquence "partiellement submergé"
if typ_subm == 'frequente':
nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_FREQ')
elif typ_subm == 'etendue':
nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_ETENDUE')
elif typ_subm == 'connexion':
nom_subm = t_nomenclature_ZH(bib_mnemo='TYPE_CONNEXION')
dict_submfreq = dict(zip(nom_subm.mnemo,nom_subm.id_nomenclature))
serie = col_subm.replace({'Inconnu':'Non déterminé'})
return serie.replace(dict_submfreq)
def to_bib_organismes_util():
table = 'bib_organismes'
isin_db = pd.read_sql_table(
table,con_gn,'utilisateurs',['id_organisme'],columns=['nom_organisme']
).replace({r'[]':"'"},regex=True)
insert_from = pers.get_organisme()
to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.nom_organisme.str.lower())]
to_insert\
.drop(columns='abbrev')\
.rename(columns={'nom':'nom_organisme'})\
.to_sql(name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False)
def to_bib_organismes_przh():
table = 'bib_organismes'
# if first_time:
# sql = 'DELETE FROM pr_zh.%s'%table
# with con_gn.begin() as cnx:
# cnx.execute(sql)
isin_db = pd.read_sql_table(
table,con_gn,'pr_zh',['id_org'],columns=['name']
).replace({r'[]':"'"},regex=True)
insert_from = pers.get_organisme()\
.replace({'Inconnu':'Autre'})
to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.name.str.lower())]
to_insert.abbrev = remove_special_char(to_insert.abbrev,space=True)\
.str.upper()\
.str[:6]
to_insert.loc[to_insert.abbrev.notna()]\
.rename(columns={'nom':'name','abbrev':'abbrevation'})\
.to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False)
to_insert.loc[to_insert.abbrev.isna()]\
.rename(columns={'nom':'name'})\
.drop(columns=['abbrev'])\
.to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False)
def get_bib_organismes(schema='utilisateurs'):
table = 'bib_organismes'
return pd.read_sql_table(
table,con_gn,schema,['id_organisme'],columns=['nom_organisme']
).replace({r'[]':"'"},regex=True)
def to_t_roles():
table = 't_roles'
isin_db = pd.read_sql_table(
table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme']
).replace({r'[]':"'"},regex=True)
bib_organismes = get_bib_organismes(schema='utilisateurs')
t_roles = pd.merge(isin_db,bib_organismes,'left',left_on='id_organisme',right_index=True)
insert_from = pers.get_auteur2().replace({'GENTIANA':'Gentiana'})
ident_notin_db = [
x for x in insert_from.index.sort_values()
if t_roles[(t_roles.nom_role==insert_from.nom[x]) & (t_roles.prenom_role==insert_from.prenom[x]) & (t_roles.nom_organisme==insert_from.organisme[x])].empty
]
dict_orga = dict(zip(bib_organismes.nom_organisme,bib_organismes.index))
to_insert = insert_from[insert_from.index.isin(ident_notin_db)]\
.drop(columns=['nom_prenom'])\
.rename(columns={'nom':'nom_role','prenom':'prenom_role','organisme':'id_organisme',})\
.replace({**dict_orga,**{'Inconnu':-1}})
to_insert.to_sql(
name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False
)
def get_t_roles(id_role=None):
table = 't_roles'
t_roles = pd.read_sql_table(
table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme']
).replace({r'[]':"'"},regex=True).sort_index()
if id_role:
t_roles = t_roles.iloc[[id_role]]
return pd.merge(t_roles,get_bib_organismes(),'left',left_on='id_organisme',right_index=True)
def recup_id_role(author): # A finir !
adapt_auth = author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().unique()
azalee_auth = pers.get_auteur2().sort_index()#.replace({' ':' '},regex=True)
azalee_auth = azalee_auth[azalee_auth.nom_prenom.isin(adapt_auth)].replace({'Inconnu':'Autre'})
# azalee_auth.nom_prenom.replace({'Inconnu':'Autre'},regex=True,inplace=True)
tr = get_t_roles().reset_index().replace({'':None})
t_roles = pd.merge(tr,azalee_auth, how='inner',left_on=['nom_role','prenom_role','nom_organisme'],right_on=['nom','prenom','organisme'])
dict_role = dict(zip(t_roles.nom_prenom,t_roles.id_role))
return author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().replace(dict_role)
def get_id_t_zh(code=None):
"""@code : str, list, Serie, Index. Code à 12 characters maximum de la zone humide.
"""
sql = "SELECT id_zh,zh_uuid,code FROM pr_zh.t_zh"
if isinstance(code,str):
sql += " WHERE code='%s'"%code
elif isinstance(code,list) or isinstance(code,pd.Series) or isinstance(code,pd.Index):
sql += " WHERE code IN %s"%str(tuple(code))
return pd.read_sql_query(sql,con_gn)
def get_id_org_przh():
return pd.read_sql_table('bib_organismes',con_gn,'pr_zh')
def _cor_zh_hydro(tzh_code):
"""
@tzh : pd.Serie. Série de valeurs
correspondants à la colonne pr_zh.t_zh."code".
"""
table = 'cor_zh_hydro'
sql = '''
SELECT h.id_hydro,zh.id_zh
FROM pr_zh.t_hydro_area h, pr_zh.t_zh zh
WHERE zh."code" in {tzh_code}
AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326)))
AND (h.id_hydro,zh.id_zh) NOT IN (SELECT id_hydro,id_zh FROM pr_zh.cor_zh_hydro)
'''.format(tzh_code=tuple(tzh_code))
df = pd.read_sql_query(sql,con_gn)
if not df.empty:
df.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
print('INSERT %i correspondances'%df.shape[0])
else:
print('AUCUNE nouvelles correspondances identifiées')
def _cor_zh_(tzh_code,typ):
"""
@tzh : pd.Serie. Série de valeurs
correspondants à la colonne pr_zh.t_zh."code".
@typ : str. [hydro,rb]
"""
typ = typ.lower()
table = 'cor_zh_%s'%typ
tab_typ = 't_hydro_area' if typ == 'hydro' else 't_river_basin'
id_typ = 'id_hydro' if typ == 'hydro' else 'id_rb'
sql = '''
SELECT h.{id_typ},zh.id_zh
FROM pr_zh.{tab_typ} h, pr_zh.t_zh zh
WHERE zh."code" in {tzh_code}
AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326)))
AND (h.{id_typ},zh.id_zh) NOT IN (SELECT {id_typ},id_zh FROM pr_zh.{tab_to})
;'''.format(
tzh_code = tuple(tzh_code),
id_typ = id_typ,
tab_typ = tab_typ,
tab_to = table)
df = pd.read_sql_query(sql,con_gn)
if not df.empty:
df.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
print('INSERT %i correspondances'%df.shape[0])
else:
print('AUCUNE nouvelles correspondances identifiées')
def _calc_recouvrmt(df1,df2,how='inner'):
'''
Calcule le recouvrement de df2 sur df1
pour chaque géométrie de df1:
Parameters
----------
df1 : GeoDataFrame.
df2 : GeoDataFrame.
'''
iddf1 = df1.columns[0]
iddf2 = df2.columns[0]
# Jointure spaciale
tmp = gpd.sjoin(
df1,
df2[['geom']],
predicate = 'intersects',
how = how)
tmp.dropna(subset=['index_right'],inplace=True)
tmp.index_right = tmp.index_right.astype(int)
tmp.reset_index(inplace=True)
tmp = tmp.join(
df2[['geom',iddf2]].rename(columns={'geom': 'right_geom'}),
on=['index_right'], how='left')
tmp2 = tmp[['index_right','right_geom',iddf2]].copy() \
.rename(columns={'right_geom': 'geom'}) \
.set_geometry('geom')
tmp1 = tmp[[iddf1,'geom']].copy() \
.set_geometry('geom')
if not tmp1.geom.values.is_valid.all():
tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0)
if not tmp2.geom.values.is_valid.all():
tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0)
tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100
return tmp[[iddf1,iddf2,'perc_rcvmt']]
def _cor_zh_areaBis(tzh_code,typ,cover=False):
"""
@tzh : pd.Serie. Série de valeurs
correspondants à la colonne pr_zh.t_zh."code".
@typ : str. COM, DEP, ref_geo
"""
from math import ceil
table = 'cor_zh_area'
sqltzh = """
SELECT zh.id_zh, zh.geom FROM pr_zh.t_zh zh WHERE zh."code" in {tzh_code}
""".format(tzh_code=tuple(tzh_code))
tzh = gpd.read_postgis(sqltzh,con_gn,crs=4326)
if tzh.crs.srs=='epsg:4326':
tzh.to_crs(2154,inplace=True)
sqllarea = """
SELECT l.id_area, l.geom FROM ref_geo.l_areas l
JOIN ref_geo.bib_areas_types bib USING (id_type)
WHERE bib.type_code='{typ}' and l."enable"
""".format(typ=typ)
larea = gpd.read_postgis(sqllarea,con_gn,crs=2154)
df = _calc_recouvrmt(larea,tzh).rename(columns={'perc_rcvmt':'cover'})
if cover:
df['cover'] = [ceil(x) for x in df.cover]
else :
df.drop(columns=['cover'],inplace=True)
# return df
if not df.empty:
df.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
print('INSERT %i correspondances'%df.shape[0])
else:
print('AUCUNE nouvelles correspondances identifiées')
def _cor_zh_area(tzh_code,typ):
"""
@tzh : pd.Serie. Série de valeurs
correspondants à la colonne pr_zh.t_zh."code".
@typ : str. COM, DEP, ref_geo
"""
from math import ceil
table = 'cor_zh_area'
if typ == 'COM':
cd1 = """,
( ST_Area(ST_INTERSECTION( l.geom,ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) )) *100) / ST_Area(l.geom) AS cover
"""
cd2 = ' AND l."enable"'
else: cd1 = cd2 = ''
sql = '''
SELECT l.id_area,zh.id_zh {cover1}
FROM ref_geo.l_areas l
JOIN ref_geo.bib_areas_types bib USING (id_type),
pr_zh.t_zh zh
WHERE zh."code" in {tzh_code}
AND bib.type_code='{typ}'
AND ST_INTERSECTS( ST_SetSRID(l.geom,2154), ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) )
AND (l.id_area,zh.id_zh) NOT IN (SELECT id_area,id_zh FROM pr_zh.cor_zh_area)
{cd2}
'''.format(tzh_code=tuple(tzh_code),typ=typ,cover1=cd1,cd2=cd2)
df = pd.read_sql_query(sql,con_gn)
if cd1 != '':
df['cover'] = [ceil(x) for x in df.cover.sort_index()]
if not df.empty:
df.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
print('INSERT %i correspondances'%df.shape[0])
else:
sql = '''
SELECT l.id_area
FROM ref_geo.l_areas l
JOIN ref_geo.bib_areas_types bib USING (id_type)
WHERE bib.type_code='{typ}'
'''.format(typ=typ)
res = pd.read_sql_query(sql,con_gn)
if not res.empty:
print('AUCUNE nouvelles correspondances identifiées')
else :
print('AUCUNE geometrie dans la table `ref_geo.l_areas` pour le `type_code` %s'%typ)
def find_nb_hab_bylbcode(df):
to_corzhcb, not_bib = __filter_lb_code__(
df, join_ch=True
)
return to_corzhcb.groupby('id_zh',dropna=False)\
.agg({'is_ch':sum})\
.rename(columns={'is_ch':'nb_hab'})\
.reset_index()
def to_t_zh(DF):
"""Need IN : columns['typo_sdage','mnemo_sdage']
"""
from geoalchemy2 import Geometry
df = DF.copy()
table = 't_zh'
t_role = get_t_roles().sort_index()
org = get_id_org_przh()
dict_org = dict(zip(org.name,org.id_org))
# First modif = update_author. Important
test_auth = df.create_author.str.contains(';',na=False)
if test_auth.any():
df.loc[test_auth,'update_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[1]
df.loc[test_auth,'create_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[0]
df['create_author'] = recup_id_role(author=df['create_author'])
df['update_author'] = recup_id_role(author=df['update_author'])
df['id_org'] = [t_role.loc[t_role.index==x,'nom_organisme'].values[0] for x in df['create_author']]
df.id_org.replace(dict_org,inplace=True)
df['id_lim_list'] = cor_lim_list(crit_delim=df.crit_delim)
df['remark_lim'] = recup_delim_rmq(crit_delim=df.crit_delim)
recup_sdage(df)
recup_sage(df)
df['remark_pres'] = df.rmq_site.copy()
# df['v_habref'] = None
# df['ef_area'] = None # A ne pas remplir. Nos inventaires ne s'en sont pas préocupé.
# df['global_remark_activity'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires.
df['id_thread'] = None
df['id_frequency'] = recup_subm(col_subm=df.subm_freq,typ_subm='frequente') # Manque la fréquence "partiellement submergé"
df['id_spread'] = recup_subm(col_subm=df.subm_etend,typ_subm='etendue')
df['id_connexion'] = recup_subm(col_subm=df.connexion,typ_subm='connexion')\
.replace({'Non déterminé':None})
# df['id_diag_hydro'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires.
# df['id_diag_bio'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires.
# df['id_strat_gestion'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires.
# df['remark_diag'] = None
# df['is_other_inventory'] = None # A mettre à jour depuis MEDWET
# df['is_carto_hab'] = None # défault : False
# df['nb_hab'] = [len(x) if x else None for x in df.code_cb.str.split(';')] # Concerne les HABs Patrimoniaux
df = df.merge(find_nb_hab_bylbcode(df),on='id_zh',how='left')
# df['total_hab_cover'] = 100 # Concerne les HABs Patrimoniaux ; Ne peut pas être rempli.
df['remark_eval_functions'] = df.rmq_fct_majeur.copy()
df['remark_eval_heritage'] = df.rmq_interet_patri.copy()
df['remark_eval_thread'] = df.rmq_bilan_menace.copy()
df['remark_eval_actions'] = df.rmq_orient_act.copy()
df['area'] = round(df.geom.area,2)
tzh_cols = recup_cols_table(table,con_gn)
lst_cols = df.columns[df.columns.isin(tzh_cols)]
to_tzh = df[lst_cols].copy()
print('Columns non intégrés : %s'%str([x for x in tzh_cols if x not in lst_cols]))
if to_tzh.crs.srs=='EPSG:2154':
to_tzh.to_crs(4326,inplace=True)
# dict_crs = to_tzh.crs.to_json_dict()
# dict_crs['id']['code'] = 0
# to_tzh.crs.from_json_dict(dict_crs)
# to_tzh.geom = to_tzh.geom.to_wkt().copy()
to_tzh.to_wkt().to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False,
dtype={
'geom': Geometry(srid=4326)
# 'id_lim_list':uuid.SafeUUID
}
)
print('INSERT t_zh OK !')
_cor_zh_area(tzh_code=to_tzh.code,typ='DEP')
print('INSERT cor_zh_area DEP OK !')
_cor_zh_areaBis(tzh_code=to_tzh.code,typ='COM',cover=True)
print('INSERT cor_zh_area COM OK !')
_cor_zh_area(tzh_code=to_tzh.code,typ='ZPS')
print('INSERT cor_zh_area ZPS OK !')
_cor_zh_area(tzh_code=to_tzh.code,typ='SIC')
print('INSERT cor_zh_area SIC OK !')
# _cor_zh_area(tzh_code=to_tzh.code,typ='ZSC')
# print('INSERT cor_zh_area ZSC OK !')
# _cor_zh_area(tzh_code=to_tzh.code,typ='PSIC')
# print('INSERT cor_zh_area PSIC OK !')
_cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF1')
print('INSERT cor_zh_area ZNIEFF1 OK !')
_cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF2')
print('INSERT cor_zh_area ZNIEFF2 OK !')
_cor_zh_hydro(tzh_code=to_tzh.code)
print('INSERT cor_zh_hydro OK !')
_cor_zh_(tzh_code=to_tzh.code,typ='rb')
print('INSERT cor_zh_rb OK !')
def to_cor_zh_lim_fs(df):
df = df[['code','crit_def_esp']]\
.fillna('Non déterminé')
cor_zh_lim_fs = pd.merge(df,get_id_t_zh(df.code),on='code')\
.rename(columns={'crit_def_esp':'id_lim_fs'})
crit_def_esp_fct = t_nomenclature_ZH('CRIT_DEF_ESP_FCT')
dict_crit = dict(zip(crit_def_esp_fct.mnemo,crit_def_esp_fct.id_nomenclature))
cor_zh_lim_fs.id_lim_fs.replace(dict_crit, inplace=True)
cor_zh_lim_fs[['id_zh','id_lim_fs']].to_sql(
name='cor_zh_lim_fs',con=con_gn,schema='pr_zh',if_exists='append',index=False
)
def get_azalee_activity():
sql = """
SELECT
g.id_site code,
CASE WHEN length(pa.id::varchar)=1
THEN '0'||pa.id::varchar||' - '||pa.nom
ELSE pa.id::varchar||' - '||pa.nom
END activ_hum,
pp.description "position",
pi.nom impact,
a.activ_hum_autre||'\n'||a.remarques rmq_activ_hum,
a."valid"
FROM zones_humides.r_site_usageprocess a
LEFT JOIN zones_humides.param_activ_hum pa ON pa.id = a.id_activ_hum
LEFT JOIN zones_humides.param_position pp ON pp.id = a.id_position
LEFT JOIN zones_humides.param_impact pi ON pi.id = a.id_impact
JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site)
ON g.id = a.id_geom_site
WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat))
and a."valid"
and s.date_fin is NULL
"""
return pd.read_sql_query(sql,zh.con)
def get_cor_impact_types():
return pd.read_sql_table('cor_impact_types',con_gn,'pr_zh')
def to_t_activity(df=None):
table = 't_activity'
if df is None:
df = get_azalee_activity()\
.drop_duplicates()
else:
df = df[['code','activ_hum','impact','position','rmq_activ_hum']]
activ_hum = df.activ_hum.str.split(';',expand=True).stack()
impact = df.impact.str.split(';',expand=True).stack()
position = df.position.str.split(';',expand=True).stack()
rmq_activ_hum = df.rmq_activ_hum.str.split(';',expand=True).stack()
# df['activ_hum'] = remove_special_char(df['activ_hum'],space=True)
# df['impact'] = remove_special_char(df['impact'],space=True)
df['impact'] = remove_special_char(df['impact'].str.lower(),space=True)
df['impact'].fillna('aucun',inplace=True)
# df['position'] = remove_special_char(df['position'],space=True)
no_activ_hum = t_nomenclature_ZH('ACTIV_HUM')
dict_activ_hum = dict(zip(no_activ_hum.mnemo,no_activ_hum.id_nomenclature))
no_impact = t_nomenclature_ZH('IMPACTS').rename(columns={'id_nomenclature':'id_impact'})
cor_impact_types = pd.merge(get_cor_impact_types(),no_impact[['id_impact','mnemo']],on='id_impact')
dict_impact = dict(zip(remove_special_char(cor_impact_types.mnemo.str.lower(),space=True),cor_impact_types.id_cor_impact_types))
no_position = t_nomenclature_ZH('LOCALISATION')
dict_position = dict(zip(no_position.mnemo,no_position.id_nomenclature))
df['activ_hum'].replace(dict_activ_hum, inplace=True)
df['impact'].replace(dict_impact, inplace=True)
df['position'].replace(dict_position, inplace=True)
df.rename(columns={
'activ_hum':'id_activity',
'impact':'id_cor_impact_types',
'position':'id_position',
'rmq_activ_hum':'remark_activity'
},inplace=True)
# group_df = df.groupby(['code','id_activity','id_position'])['id_cor_impact_types'].apply(list).reset_index()
group_df = df.groupby(['code','id_activity'],dropna=False)\
.agg({'id_position':list,'id_cor_impact_types':list,'remark_activity':list}).reset_index()
group_df.id_position = [list(set(x)) for x in group_df.id_position ]
group_df.id_position = [
x[0] if len(x)==1 else no_position.loc[no_position.cd_nomenclature=='3','id_nomenclature'].values[0]
for x in group_df.id_position
]
group_df.remark_activity = ['\n'.join(list(set(x))) if list(set(x)) != [None] else None for x in group_df.remark_activity]
group_df['id_impact_list'] = [uuid.uuid4() for x in group_df.index]
cor_impact_list = group_df[['id_impact_list','id_cor_impact_types']]\
.explode('id_cor_impact_types')\
.drop_duplicates()
# activity = pd.merge(group_df[['code','id_activity','id_impact_list','id_position']],df,on=['code','id_activity','id_position'],how='left')
# t_activity = pd.merge(activity,get_id_t_zh(df.code),on='code')
t_activity = pd.merge(group_df,get_id_t_zh(df.code),on='code',how='left')
tactiv_cols = recup_cols_table(table,con_gn)
lst_cols = t_activity.columns[t_activity.columns.isin(tactiv_cols)]
to_tactiv = t_activity[lst_cols]
to_tactiv.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False,
# dtype={
# 'id_impact_list':uuid.UUID
# }
)
cor_impact_list.to_sql(
name='cor_impact_list', con=con_gn, schema='pr_zh',
if_exists='append', index=False,
# dtype={
# 'id_impact_list':uuid.UUID
# }
)
def get_azalee_functions():
sql = """
SELECT
g.id_site code,
pa.nom id_function,
a."quantite",
a.description justification,
a."valid"
FROM zones_humides.r_site_fctecosociopatri a
LEFT JOIN zones_humides.param_fct_eco_socio_patri pa ON pa.id = a.id_fct
JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site)
ON g.id = a.id_geom_site
WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat))
and a."valid"
and s.date_fin is NULL
"""
return pd.read_sql_query(sql,zh.con)
def to_t_functions(df=None):
table = 't_functions'
if df is None:
func = get_azalee_functions()
func.id_function.replace({' / ':'/'},regex=True,inplace=True)
else:
func = df[['code','fct_bio','fct_hydro','int_patri','val_socioEco']].set_index('code').unstack()
func = func.str.split(';',expand=True).stack()\
.str.split(' \(',1,expand=True)
func.columns = ['id_function','justification']
func.justification = func.justification.str.rsplit('\)',1,expand=True)[0]
functions = pd.concat([
t_nomenclature_ZH('FONCTIONS_HYDRO'),t_nomenclature_ZH('FONCTIONS_BIO'),
t_nomenclature_ZH('VAL_SOC_ECO'),t_nomenclature_ZH('INTERET_PATRIM'),
])
functions.mnemo.replace({
r'..- ':'',
r' \(.*\)':''
},regex=True,inplace=True)
dict_func = dict(zip(functions.mnemo,functions.id_nomenclature))
func.id_function.replace(dict_func,inplace=True)
not_idfunc = ['non documenté','aucune fonction hydrologique','aucune valeur socio-économique']
del_index = func[func.id_function.isin(not_idfunc)].index
func.drop(del_index,inplace=True)
funct = func.groupby(['code','id_function']).agg(list).reset_index()
funct.justification = ['\n'.join(x) if x != [None] else None for x in funct.justification]
qualif = t_nomenclature_ZH('FONCTIONS_QUALIF')
knowle = t_nomenclature_ZH('FONCTIONS_CONNAISSANCE')
funct['id_qualification'] = qualif.loc[qualif.mnemo=='Non évaluée','id_nomenclature'].values[0]
funct['id_knowledge'] = knowle.loc[knowle.mnemo=='Lacunaire ou nulle','id_nomenclature'].values[0]
t_func = pd.merge(funct,get_id_t_zh(funct.code),on='code')
tactiv_cols = recup_cols_table(table,con_gn)
lst_cols = t_func.columns[t_func.columns.isin(tactiv_cols)]
to_tfunction = t_func[lst_cols]
to_tfunction.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
def add_remark_pres(not_bib):
table = 't_zh'
rmq_pres = pd.read_sql_table(table,con_gn,'pr_zh',columns=['id_zh','remark_pres'])
rmq_pres.remark_pres.replace({'':None},inplace=True)
not_bib = rmq_pres.merge(not_bib,on='id_zh')
not_bib.loc[not_bib.remark_pres.notna(),'remark_pres'] = \
not_bib[not_bib.remark_pres.notna()].remark_pres+ '\n' + \
not_bib[not_bib.remark_pres.notna()].lb_code
not_bib.loc[not_bib.remark_pres.isna(),'remark_pres'] = \
not_bib[not_bib.remark_pres.isna()].lb_code
not_bib.drop(columns='lb_code',inplace=True)
from pycen import update_to_sql
update_to_sql(
df=not_bib,
con=con_gn,
table_name=table,
schema_name='pr_zh',
key_name='id_zh',
)
def __format_lb_code__(t):
table = 'cor_zh_cb'
cols = ['code','code_cb']
df_cb = t[cols].copy()
df_cb.set_index('code',inplace=True)
zh_cb = df_cb.code_cb.str.split(';',expand=True)\
.stack()\
.droplevel(-1)\
.reset_index()
zh_cb.columns = cols
zh_cb.rename(columns={'code_cb':'lb_code'},inplace=True)
cor_zh_cb = pd.merge(zh_cb,get_id_t_zh(zh_cb.code.unique()),on='code')
tzhcb = recup_cols_table(table,con_gn)
lst_cols = cor_zh_cb.columns[cor_zh_cb.columns.isin(tzhcb)]
to_corzhcb = cor_zh_cb[lst_cols].copy()
to_corzhcb.lb_code = to_corzhcb.lb_code.astype(str)
return to_corzhcb
def __filter_lb_code__(t, join_ch=False):
to_corzhcb = __format_lb_code__(t)
bib_cb = pd.read_sql_table('bib_cb',con_gn,'pr_zh')
bib_cb.lb_code = bib_cb.lb_code.astype(str)
not_bib = to_corzhcb[~to_corzhcb.lb_code.isin(bib_cb.lb_code)]
to_corzhcb.drop(not_bib.index,inplace=True)
not_bib = not_bib\
.groupby('id_zh').agg(','.join)
if join_ch:
to_corzhcb = to_corzhcb.merge(
bib_cb.drop(columns='humidity'),on='lb_code',how='left'
)
return to_corzhcb, not_bib
def to_cor_zh_cb(t):
table = 'cor_zh_cb'
to_corzhcb, not_bib = __filter_lb_code__(t, join_ch=False)
not_bib.lb_code = 'Autre(s) habitat(s) décrit(s) :\n' + not_bib.lb_code
add_remark_pres(not_bib)
to_corzhcb.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
def to_t_flow(DF, type_flow=None):
"""
@df : pd.DataFrame
@type_flow : str. ['inflow' or 'outflow']
"""
if type_flow=='inflow':
table = 't_inflow'
cols = ['entree_eau_reg', 'entree_eau_perm', 'entree_eau_topo']
_flow = t_nomenclature_ZH('ENTREE_EAU')
_perm = t_nomenclature_ZH('PERMANENCE_ENTREE')
dict_table = {
'flow':'id_inflow',
'perm':'id_permanance'}
elif type_flow=='outflow':
table = 't_outflow'
cols = ['sortie_eau_reg', 'sortie_eau_perm', 'sortie_eau_topo']
_flow = t_nomenclature_ZH('SORTIE_EAU')
_perm = t_nomenclature_ZH('PERMANENCE_SORTIE')
dict_table = {
'flow':'id_outflow',
'perm':'id_permanance'}
df = DF.copy().set_index('code')
# df[cols]
flow = df[cols[0]].str.split(';',expand=True).stack()
perm = df[cols[1]].str.split(';',expand=True).stack()
topo = df[cols[2]].str.split(';',expand=True).stack()
inflow = pd.concat(
[flow,perm,topo],axis=1,join='outer'
).droplevel(-1)
# inflow2 = flow.to_frame()\
# .merge(perm.to_frame(),left_index=True,right_index=True,how='outer')\
# .merge(topo.to_frame(),left_index=True,right_index=True,how='outer')
inflow.columns = ['flow','perm','topo']
inflow.reset_index(drop=False,inplace=True)
dict_flow = dict(zip(_flow.mnemo,_flow.id_nomenclature))
dict_perm = dict(zip(_perm.mnemo.str.lower(),_perm.id_nomenclature))
inflow.flow.replace(dict_flow,inplace=True)
inflow.perm.fillna('non déterminé',inplace=True)
inflow.perm.replace({'inconnu':'non déterminé','':'non déterminé'},inplace=True)
inflow.perm.replace(dict_perm,inplace=True)
inflow.rename(columns=dict_table, inplace=True)
t_flow = pd.merge(inflow,get_id_t_zh(inflow.code.unique()),on='code')
tflow = recup_cols_table(table,con_gn)
lst_cols = t_flow.columns[t_flow.columns.isin(tflow)]
to_tflow = t_flow[lst_cols]
to_tflow.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
def cor_zh_corine_cover():
table = 'cor_zh_corine_cover'
sql = 'SELECT id_zh,geom FROM pr_zh.t_zh'
df = gpd.read_postgis(sql,con_gn,crs=4326)
df.to_crs(2154,inplace=True)
df.geom = df.buffer(-0.5)
mask = df.to_crs(2154).unary_union
clc_path = '/home/colas/Documents/5_BDD/CLC/'
clc_file = clc_path+'CLC12_FR_RGF.shp'
clc_mfil = clc_path+'Metadonnees/CLC_nomenclature.xls'
clc = gpd.read_file(clc_file,mask=mask)
clc.rename_geometry('geom', inplace=True)
meta_clc0 = pd.read_excel(clc_mfil,0).rename(columns={'code_clc_niveau_1':'code_clc'})
meta_clc1 = pd.read_excel(clc_mfil,1).rename(columns={'code_clc_niveau_2':'code_clc'})
meta_clc2 = pd.read_excel(clc_mfil,2).rename(columns={'code_clc_niveau_3':'code_clc'})
meta_clc = pd.concat([meta_clc0,meta_clc1,meta_clc2])
meta_clc.code_clc = meta_clc.code_clc.astype(str)
gn_occsol = t_nomenclature_ZH('OCCUPATION_SOLS',source=None)
dict_clc1 = dict(zip(meta_clc.code_clc,['.'.join(x) for x in meta_clc.code_clc]))
dict_clc2 = dict(zip(gn_occsol.cd_nomenclature,gn_occsol.id_nomenclature))
tmp = gpd.sjoin(
df,
clc[['CODE_12','geom']],
predicate = 'intersects',
how = 'inner')
cor_zh_clc = tmp[['id_zh','CODE_12']]\
.drop_duplicates()\
.rename(columns={'CODE_12':'id_cover'})\
.replace({'id_cover':dict_clc1})\
.replace({'id_cover':dict_clc2})
cor_zh_clc.to_sql(
name=table, con=con_gn, schema='pr_zh',
if_exists='append', index=False
)
def migrate_to_gnZH(df:pd.DataFrame=None):
to_bib_organismes_przh()
to_t_zh(df)
to_cor_zh_lim_fs(df)
to_t_activity(df)
to_t_functions(df)
to_t_flow(df,type_flow='inflow')
to_t_flow(df,type_flow='outflow')
cor_zh_corine_cover()
def to_t_references(db_file, suffixe_refnum=None):
import pandas_access as mdb
table = 't_references'
t_ref = pd.read_sql_table(table,con_gn,'pr_zh')
dic_col_ref = {
'REF_NO':'ref_number',
'REFERENCE':'reference',
'AUTHOR':'authors',
'TITLE':'title',
'YEAR':'pub_year',
'PUBLISHER':'editor',
'LOCATION':'editor_location',
}
df = mdb.read_table(db_file, 'MWDREF')\
.rename(columns=dic_col_ref)
df.ref_number = df.ref_number.astype(str)
siteref = mdb.read_table(db_file, 'SITEREF')
siteref.REF_NO = siteref.REF_NO.astype(str)
df = df[df.ref_number.isin(siteref.REF_NO)]
df.loc[df.title.isna(),'title'] = df[df.title.isna()].reference
if suffixe_refnum is not None:
df.ref_number = suffixe_refnum + df.ref_number
df.to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False)
def to_cor_zh_ref(db_file, suffixe_refnum=None):
import pandas_access as mdb
dict_col_cor = {
'REF_NO':'id_ref'
}
table = 'cor_zh_ref'
ref = pd.read_sql_table('t_references',con_gn,'pr_zh')
dict_idref = dict(zip(ref.ref_number,ref.id_reference))
# SITEREF
cor = mdb.read_table(db_file, 'SITEREF')\
.rename(columns=dict_col_cor)
cor.id_ref = cor.id_ref.astype(str)
if suffixe_refnum is not None:
cor.id_ref = suffixe_refnum + cor.id_ref
t_zh = get_id_t_zh(cor.SITE_COD)
to_tab = cor.merge(
t_zh.drop(columns='zh_uuid'),
left_on='SITE_COD',
right_on='code',
).drop(columns=['SITE_COD','code'])\
.replace(dict_idref)
to_tab.to_sql(
name=table,con=con_gn,schema='pr_zh',
if_exists='append',index=False)
def OTHERINV_to_tref(db_file):
import pandas_access as mdb
table = 't_zh'
dic = {
'FFn' :'FF n',
r'n \° ' :'',
r'n \°' :'',
r'n\+' :'',
r'n\° ':'',
r'n\° ':'',
r'n\° ' :'',
r' ' :' ',
}
sitinfo = mdb.read_table(db_file, 'SITEINFO')\
.set_index('SITE_COD')
otinv = sitinfo[['OTHER_INV']]\
.dropna().OTHER_INV\
.str.split(';',expand=True).stack()\
.str.strip()\
.replace(dic,regex=True)\
.str.split(', Z',expand=True,regex=True).stack()\
.str.strip()
znieff = otinv[otinv.str.startswith('ZNIEF')]
znieff = pd.concat([znieff,otinv[otinv.str.startswith('NIEF')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('ZNEIF')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('Site N')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('Sites N')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('2606')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('3817')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('Inventaire N')]])
znieff = pd.concat([znieff,otinv[otinv.str.startswith('Mais aussi ZPS')]])
filter_inv = otinv[~otinv.isin(znieff)]\
.droplevel([-1,-2])\
.reset_index()
other_inv = filter_inv.groupby('SITE_COD').agg('\n'.join)\
.reset_index()\
.rename(columns={0:'remark_is_other_inventory'})
t_zh = get_id_t_zh(other_inv.SITE_COD)
to_tab = other_inv.merge(
t_zh.drop(columns='zh_uuid'),
left_on='SITE_COD',
right_on='code',
).drop(columns=['SITE_COD','code'])
to_tab['is_other_inventory'] = True
from pycen import update_to_sql
update_to_sql(
df=to_tab,
con=con_gn,
table_name=table,
schema_name='pr_zh',
key_name='id_zh',
)
def trunc_table(table,cascade=False):
"""
Tronque la table pr_zh.table
Parameters
----------
table : str
Nom de la table à tronquer
cascade : bool, optional
Si True, la suppression se fait avec l'option CASCADE. Cela signifie que
les clés étrangères pointant vers cette table seront également supprimées
(par exemple, si vous supprimez une zone humide, vous supprimez
automatiquement les données de suivis liées à cette zone). Par défaut,
cette option est à False.
"""
cascade = 'CASCADE;' if cascade else ';'
sql = 'TRUNCATE pr_zh.%s %s'%(table,cascade)
with con_gn.begin() as cnx:
cnx.execute(sql)
if __name__ == "__main__":
# TRUNCATE TABLE
# trunc_table('t_zh',cascade=True)
# trunc_table('cor_zh_area')
# trunc_table('t_reference')
from pycen.geonature import pr_zh
t_zh = pr_zh.t_zh()
drop_cols = ['auteur_geom','date_geom','type_milieu','type_site',]
DF = zh.v_zoneshumides()
DF.rename(columns=DICT_TZH,inplace=True)
DF.drop(columns=drop_cols,inplace=True)
df = DF.copy()
df = DF[~DF.code.isin(t_zh.code)].copy()
migrate_to_gnZH(df)
# to_bib_organismes_util() # Fait sch:'utilisateurs'
# to_bib_organismes_przh() # Fait sch:'pr_zh'
# to_t_roles() # Fait
# to_t_zh(df) # Fait
# to_cor_zh_lim_fs(df) # Fait
# to_t_activity(df) # Fait
# to_t_functions(df) # Fait
# to_t_flow(df,type_flow='inflow') # Fait
# to_t_flow(df,type_flow='outflow') # Fait
# t_river_basin # OK ! cf.insert_lareas.py (sous bassin-versant SDAGE)
# t_hydro_area # OK ! cf.insert_lareas.py (bassin-versant Topographique)
# cor_zh_area # OK with to_t_zh ! ; bib_area = [COM,DEP,ref for ref_geo_referentiels of conf_gn_module.toml]
# cor_zh_rb # OK with to_t_zh ! ;
# cor_zh_hydro # OK with to_t_zh ! ;
# cor_zh_fct_area # Dépendand de t_fct_area (vide) : table des aires de fonctionnalités
# cor_zh_corine_cover # OK !
# fct_delim
# to_cor_zh_cb() # Prêt
# DF[DF.sortie_eau_reg.str.contains('diffus',na=False)].code.tolist()
# ['38BB0109', '38BB0128', '38BB0129']
# get_cor_zh_corine_cover() # A FAIRE via MEDWET
# get_cor_zh_protection() # A FAIRE via MEDWET
# get_t_ownership() # A FAIRE via MEDWET
# get_t_table_heritage() # A FAIRE (pas sûre..)
# get_t_instruments() # A FAIRE (pas sûre..)
# get_t_management_structures() # A FAIRE (pas sûre..)
t_nomenclature_ZH(bib_mnemo='EVAL_GLOB_MENACES')
def drop_table(table):
sql = 'TRUNCATE pr_zh.%s'%table
with con_gn.begin() as cnx:
cnx.execute(sql)
cnx.commit()
cnx.close()