#!/usr/bin/env python3 # -*- coding: UTF-8 -*-. from pycen import con_gn, zh, pers import pandas as pd import geopandas as gpd import uuid import re zh = zh() DICT_TZH = { 'site_code':'code', 'nom':'main_name', 'autre_nom':'secondary_name', 'auteur_site':'create_author', ##'auteur_geom', 'auteur_last_maj':'update_author', 'date_site':'create_date', ##'date_geom', 'date_last_maj':'update_date', # 'CEN Isère': 'id_org', # organisme opérateur ##'type_milieu', ##'type_site', # 'typo_sdage':'id_sdage', # 'id_sage' #A récup # 'rmq_site':'remark_pres', # 'rmq_fct_majeur':'', # 'rmq_interet_patri':'', # 'rmq_bilan_menace':'', # 'rmq_orient_act':'', # 'rmq_usage_process':'', # 'code_cb':'', # 'lib_cb':'', # 'activ_hum':'', # 'impact':'', # 'position':'', # 'rmq_activ_hum':'', # 'connexion':'', # 'subm_orig':'', # 'subm_freq':'', # 'subm_etend':'', # 'fct_bio':'', # 'fct_hydro':'', # 'int_patri':'', # 'val_socioEco':'', # 'crit_delim':'', # 'crit_def_esp':'', # 'entree_eau_reg':'', # 'entree_eau_perm':'', # 'entree_eau_topo':'', # 'sortie_eau_reg':'', # 'sortie_eau_perm':'', # 'sortie_eau_topo':'', # 'geom':'geom' } def get_id_organisme(nom): if "'" in nom: nom = nom.replace("'","’") sql = "SELECT id_organisme FROM utilisateurs.bib_organismes WHERE nom_organisme = '%s'"%nom return pd.read_sql_query(sql,con_gn)['id_organisme'].values def remove_special_char(obj,space=False): dict_char = { r'[’]':"'", r'[àáâãäå]':'a', r'[èéêë]':'e', r'[ìíîï]':'i', r'[òóôõö]':'o', r'[ùúûü]':'u', # r'[ ]':"", r'[–]':"-" } if space: dict_char = {**dict_char, **{r'[ ]':""}} return obj.replace(dict_char,regex=True) def recup_cols_table(table,con,schema='pr_zh'): lst_cols = con.dialect.get_columns(con,table,schema) return [x['name'] for x in lst_cols] def t_nomenclature_ZH(bib_mnemo=None,source='ZONES_HUMIDES'): sch = 'ref_nomenclatures' tab = ['t_nomenclatures','bib_nomenclatures_types'] sql = """ SELECT a.id_nomenclature, a.cd_nomenclature, a.mnemonique mnemo, a.label_default as label, a.definition_default def, b.mnemonique bib_mnemo, b.label_default bib_label, b.definition_default bib_def, a.active FROM {sch}.{tab0} a JOIN {sch}.{tab1} b USING (id_type) """.format(sch=sch,tab0=tab[0],tab1=tab[1]) if source is not None or bib_mnemo is not None: sql += " WHERE " if source is not None: sql += """b."source" = '%s'"""%source sql += " AND " if bib_mnemo is not None else '' if bib_mnemo is not None: sql += "b.mnemonique = '%s'"%bib_mnemo return pd.read_sql_query(sql,con_gn).replace({r'[’]':"'"},regex=True) def recup_sdage(df): # Récupération de l'id SDAGE dans la typo df['id_sdage'] = [str(int(re.search(r'\d+', x).group())) for x in df.typo_sdage.sort_index()] sdage = t_nomenclature_ZH(bib_mnemo='SDAGE') dict_sdage = dict(zip(sdage.cd_nomenclature,sdage.id_nomenclature)) df['id_sdage'].replace(dict_sdage,inplace=True) return df def recup_sage(df): # Identification de la précision SAGE df['id_sage'] = [df.loc[df.typo_sdage==x,'mnemo_sdage'].values[0] if re.search(r'\.\d+', x) else None for x in df.typo_sdage.sort_index()] sage = t_nomenclature_ZH(bib_mnemo='SAGE') dict_sage = dict(zip(sage.mnemo,sage.id_nomenclature)) df['id_sage'].replace(dict_sage,inplace=True) return df def cor_lim_list(crit_delim): """Remplis la table pr_zh.cor_lim_list et retourn les uuid associés """ delim = t_nomenclature_ZH(bib_mnemo='CRIT_DELIM') delim.mnemo = delim.mnemo.str.replace(r'.\(.*\)','',regex=True) dict_delim = dict(zip(delim.mnemo.str.lower(),delim.id_nomenclature)) serie = crit_delim\ .fillna('non déterminé')\ .str.split(';',expand=True).stack().droplevel(-1).reset_index() serie.columns = ['id','delim'] serie.set_index('id',inplace=True) serie['id_lim'] = serie.delim.str.replace(r'.\(.*\)','',regex=True) serie.id_lim.replace(dict_delim,inplace=True) genuuid = serie.index.to_frame().drop_duplicates() del genuuid['id'] genuuid['id_lim_list'] = [uuid.uuid4() for x in genuuid.index] _cor_lim_list = pd.merge(serie,genuuid,how='inner',right_index=True,left_index=True) uuidreturn = pd.merge(crit_delim,genuuid,how='left',right_index=True,left_index=True) # Remplissage de la table pr_zh.cor_lim_list _cor_lim_list[['id_lim_list','id_lim']].to_sql( name='cor_lim_list',con=con_gn,schema='pr_zh',if_exists='append',index=False, # dtype={ # 'id_lim_list':uuid.SafeUUID # } ) return uuidreturn.id_lim_list.sort_index() def recup_delim_rmq(crit_delim): serie = crit_delim.str.split(';',expand=True).stack().droplevel(-1).reset_index() serie.columns = ['id','delim'] serie.set_index('id',inplace=True) serie['remarks'] = [ x[x.find("(")+1:x.rfind(")")] if x.find("(") > -1 else None for x in serie.delim ] uniserie = serie.groupby('id')['remarks'].apply(list).reset_index() uniserie.columns = ['id','remarks'] uniserie.set_index('id',inplace=True) uniserie.remarks = ['\n'.join(list(set(filter(None,x)))).strip() for x in uniserie.remarks] uniserie.remarks.replace({'': None},inplace=True) df_remarks = pd.merge( crit_delim, # serie.reset_index().drop_duplicates(subset='id').set_index('id'), uniserie, how='left',right_index=True,left_index=True ) return df_remarks.remarks def recup_subm(col_subm,typ_subm): """Correspondance subm--id_nomenclature. @col_subm : Series. Colonne de submersion @typ_subm : str. Type de submersion ['frequente','etendue','connexion'] """ # Manque la fréquence "partiellement submergé" if typ_subm == 'frequente': nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_FREQ') elif typ_subm == 'etendue': nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_ETENDUE') elif typ_subm == 'connexion': nom_subm = t_nomenclature_ZH(bib_mnemo='TYPE_CONNEXION') dict_submfreq = dict(zip(nom_subm.mnemo,nom_subm.id_nomenclature)) serie = col_subm.replace({'Inconnu':'Non déterminé'}) return serie.replace(dict_submfreq) def to_bib_organismes_util(): table = 'bib_organismes' isin_db = pd.read_sql_table( table,con_gn,'utilisateurs',['id_organisme'],columns=['nom_organisme'] ).replace({r'[’]':"'"},regex=True) insert_from = pers.get_organisme() to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.nom_organisme.str.lower())] to_insert\ .drop(columns='abbrev')\ .rename(columns={'nom':'nom_organisme'})\ .to_sql(name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False) def to_bib_organismes_przh(): table = 'bib_organismes' # if first_time: # sql = 'DELETE FROM pr_zh.%s'%table # with con_gn.begin() as cnx: # cnx.execute(sql) isin_db = pd.read_sql_table( table,con_gn,'pr_zh',['id_org'],columns=['name'] ).replace({r'[’]':"'"},regex=True) insert_from = pers.get_organisme()\ .replace({'Inconnu':'Autre'}) to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.name.str.lower())] to_insert.abbrev = remove_special_char(to_insert.abbrev,space=True)\ .str.upper()\ .str[:6] to_insert.loc[to_insert.abbrev.notna()]\ .rename(columns={'nom':'name','abbrev':'abbrevation'})\ .to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) to_insert.loc[to_insert.abbrev.isna()]\ .rename(columns={'nom':'name'})\ .drop(columns=['abbrev'])\ .to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) def get_bib_organismes(schema='utilisateurs'): table = 'bib_organismes' return pd.read_sql_table( table,con_gn,schema,['id_organisme'],columns=['nom_organisme'] ).replace({r'[’]':"'"},regex=True) def to_t_roles(): table = 't_roles' isin_db = pd.read_sql_table( table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] ).replace({r'[’]':"'"},regex=True) bib_organismes = get_bib_organismes(schema='utilisateurs') t_roles = pd.merge(isin_db,bib_organismes,'left',left_on='id_organisme',right_index=True) insert_from = pers.get_auteur2().replace({'GENTIANA':'Gentiana'}) ident_notin_db = [ x for x in insert_from.index.sort_values() if t_roles[(t_roles.nom_role==insert_from.nom[x]) & (t_roles.prenom_role==insert_from.prenom[x]) & (t_roles.nom_organisme==insert_from.organisme[x])].empty ] dict_orga = dict(zip(bib_organismes.nom_organisme,bib_organismes.index)) to_insert = insert_from[insert_from.index.isin(ident_notin_db)]\ .drop(columns=['nom_prenom'])\ .rename(columns={'nom':'nom_role','prenom':'prenom_role','organisme':'id_organisme',})\ .replace({**dict_orga,**{'Inconnu':-1}}) to_insert.to_sql( name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False ) def get_t_roles(id_role=None): table = 't_roles' t_roles = pd.read_sql_table( table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] ).replace({r'[’]':"'"},regex=True).sort_index() if id_role: t_roles = t_roles.iloc[[id_role]] return pd.merge(t_roles,get_bib_organismes(),'left',left_on='id_organisme',right_index=True) def recup_id_role(author): # A finir ! adapt_auth = author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().unique() azalee_auth = pers.get_auteur2().sort_index()#.replace({' ':' '},regex=True) azalee_auth = azalee_auth[azalee_auth.nom_prenom.isin(adapt_auth)].replace({'Inconnu':'Autre'}) # azalee_auth.nom_prenom.replace({'Inconnu':'Autre'},regex=True,inplace=True) t_roles = pd.merge(get_t_roles().reset_index(),azalee_auth, how='inner',left_on=['nom_role','prenom_role','nom_organisme'],right_on=['nom','prenom','organisme']) dict_role = dict(zip(t_roles.nom_prenom,t_roles.id_role)) return author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().replace(dict_role) def get_id_t_zh(code=None): """@code : str, list, Serie, Index. Code à 12 characters maximum de la zone humide. """ sql = "SELECT id_zh,zh_uuid,code FROM pr_zh.t_zh" if isinstance(code,str): sql += " WHERE code='%s'"%code elif isinstance(code,list) or isinstance(code,pd.Series) or isinstance(code,pd.Index): sql += " WHERE code IN %s"%str(tuple(code)) return pd.read_sql_query(sql,con_gn) def get_id_org_przh(): return pd.read_sql_table('bib_organismes',con_gn,'pr_zh') def _cor_zh_hydro(tzh_code): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". """ table = 'cor_zh_hydro' sql = ''' SELECT h.id_hydro,zh.id_zh FROM pr_zh.t_hydro_area h, pr_zh.t_zh zh WHERE zh."code" in {tzh_code} AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) AND (h.id_hydro,zh.id_zh) NOT IN (SELECT id_hydro,id_zh FROM pr_zh.cor_zh_hydro) '''.format(tzh_code=tuple(tzh_code)) df = pd.read_sql_query(sql,con_gn) if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: print('AUCUNE nouvelles correspondances identifiées') def _cor_zh_(tzh_code,typ): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". @typ : str. [hydro,rb] """ typ = typ.lower() table = 'cor_zh_%s'%typ tab_typ = 't_hydro_area' if typ == 'hydro' else 't_river_basin' id_typ = 'id_hydro' if typ == 'hydro' else 'id_rb' sql = ''' SELECT h.{id_typ},zh.id_zh FROM pr_zh.{tab_typ} h, pr_zh.t_zh zh WHERE zh."code" in {tzh_code} AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) AND (h.{id_typ},zh.id_zh) NOT IN (SELECT {id_typ},id_zh FROM pr_zh.{tab_to}) ;'''.format( tzh_code = tuple(tzh_code), id_typ = id_typ, tab_typ = tab_typ, tab_to = table) df = pd.read_sql_query(sql,con_gn) if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: print('AUCUNE nouvelles correspondances identifiées') def _calc_recouvrmt(df1,df2,how='inner'): ''' Calcule le recouvrement de df2 sur df1 pour chaque géométrie de df1: Parameters ---------- df1 : GeoDataFrame. df2 : GeoDataFrame. ''' iddf1 = df1.columns[0] iddf2 = df2.columns[0] # Jointure spaciale tmp = gpd.sjoin( df1, df2[['geom']], predicate = 'intersects', how = how) tmp.dropna(subset=['index_right'],inplace=True) tmp.index_right = tmp.index_right.astype(int) tmp.reset_index(inplace=True) tmp = tmp.join( df2[['geom',iddf2]].rename(columns={'geom': 'right_geom'}), on=['index_right'], how='left') tmp2 = tmp[['index_right','right_geom',iddf2]].copy() \ .rename(columns={'right_geom': 'geom'}) \ .set_geometry('geom') tmp1 = tmp[[iddf1,'geom']].copy() \ .set_geometry('geom') if not tmp1.geom.values.is_valid.all(): tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) if not tmp2.geom.values.is_valid.all(): tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 return tmp[[iddf1,iddf2,'perc_rcvmt']] def _cor_zh_areaBis(tzh_code,typ,cover=False): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". @typ : str. COM, DEP, ref_geo """ from math import ceil table = 'cor_zh_area' sqltzh = """ SELECT zh.id_zh, zh.geom FROM pr_zh.t_zh zh WHERE zh."code" in {tzh_code} """.format(tzh_code=tuple(tzh_code)) tzh = gpd.read_postgis(sqltzh,con_gn,crs=4326) if tzh.crs.srs=='epsg:4326': tzh.to_crs(2154,inplace=True) sqllarea = """ SELECT l.id_area, l.geom FROM ref_geo.l_areas l JOIN ref_geo.bib_areas_types bib USING (id_type) WHERE bib.type_code='{typ}' and l."enable" """.format(typ=typ) larea = gpd.read_postgis(sqllarea,con_gn,crs=2154) df = _calc_recouvrmt(larea,tzh).rename(columns={'perc_rcvmt':'cover'}) if cover: df['cover'] = [ceil(x) for x in df.cover] else : df.drop(columns=['cover'],inplace=True) # return df if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: print('AUCUNE nouvelles correspondances identifiées') def _cor_zh_area(tzh_code,typ): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". @typ : str. COM, DEP, ref_geo """ from math import ceil table = 'cor_zh_area' if typ == 'COM': cd1 = """, ( ST_Area(ST_INTERSECTION( l.geom,ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) )) *100) / ST_Area(l.geom) AS cover """ cd2 = ' AND l."enable"' else: cd1 = cd2 = '' sql = ''' SELECT l.id_area,zh.id_zh {cover1} FROM ref_geo.l_areas l JOIN ref_geo.bib_areas_types bib USING (id_type), pr_zh.t_zh zh WHERE zh."code" in {tzh_code} AND bib.type_code='{typ}' AND ST_INTERSECTS( ST_SetSRID(l.geom,2154), ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) ) AND (l.id_area,zh.id_zh) NOT IN (SELECT id_area,id_zh FROM pr_zh.cor_zh_area) {cd2} '''.format(tzh_code=tuple(tzh_code),typ=typ,cover1=cd1,cd2=cd2) df = pd.read_sql_query(sql,con_gn) if cd1 != '': df['cover'] = [ceil(x) for x in df.cover.sort_index()] if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: sql = ''' SELECT l.id_area FROM ref_geo.l_areas l JOIN ref_geo.bib_areas_types bib USING (id_type) WHERE bib.type_code='{typ}' '''.format(typ=typ) res = pd.read_sql_query(sql,con_gn) if not res.empty: print('AUCUNE nouvelles correspondances identifiées') else : print('AUCUNE geometrie dans la table `ref_geo.l_areas` pour le `type_code` %s'%typ) def find_nb_hab_bylbcode(df): to_corzhcb, not_bib = __filter_lb_code__( df, join_ch=True ) return to_corzhcb.groupby('id_zh',dropna=False)\ .agg({'is_ch':sum})\ .rename(columns={'is_ch':'nb_hab'})\ .reset_index() def to_t_zh(DF): """Need IN : columns['typo_sdage','mnemo_sdage'] """ from geoalchemy2 import Geometry df = DF.copy() table = 't_zh' t_role = get_t_roles().sort_index() org = get_id_org_przh() dict_org = dict(zip(org.name,org.id_org)) # First modif = update_author. Important test_auth = df.create_author.str.contains(';',na=False) if test_auth.any(): df.loc[test_auth,'update_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[1] df.loc[test_auth,'create_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[0] df['create_author'] = recup_id_role(author=df['create_author']) df['update_author'] = recup_id_role(author=df['update_author']) df['id_org'] = [t_role.loc[t_role.index==x,'nom_organisme'].values[0] for x in df['create_author']] df.id_org.replace(dict_org,inplace=True) df['id_lim_list'] = cor_lim_list(crit_delim=df.crit_delim) df['remark_lim'] = recup_delim_rmq(crit_delim=df.crit_delim) recup_sdage(df) recup_sage(df) df['remark_pres'] = df.rmq_site.copy() # df['v_habref'] = None # df['ef_area'] = None # A ne pas remplir. Nos inventaires ne s'en sont pas préocupé. # df['global_remark_activity'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. df['id_thread'] = None df['id_frequency'] = recup_subm(col_subm=df.subm_freq,typ_subm='frequente') # Manque la fréquence "partiellement submergé" df['id_spread'] = recup_subm(col_subm=df.subm_etend,typ_subm='etendue') df['id_connexion'] = recup_subm(col_subm=df.connexion,typ_subm='connexion')\ .replace({'Non déterminé':None}) # df['id_diag_hydro'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. # df['id_diag_bio'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. # df['id_strat_gestion'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. # df['remark_diag'] = None # df['is_other_inventory'] = None # A mettre à jour depuis MEDWET # df['is_carto_hab'] = None # défault : False # df['nb_hab'] = [len(x) if x else None for x in df.code_cb.str.split(';')] # Concerne les HABs Patrimoniaux df = df.merge(find_nb_hab_bylbcode(df),on='id_zh',how='left') # df['total_hab_cover'] = 100 # Concerne les HABs Patrimoniaux ; Ne peut pas être rempli. df['remark_eval_functions'] = df.rmq_fct_majeur.copy() df['remark_eval_heritage'] = df.rmq_interet_patri.copy() df['remark_eval_thread'] = df.rmq_bilan_menace.copy() df['remark_eval_actions'] = df.rmq_orient_act.copy() df['area'] = round(df.geom.area,2) tzh_cols = recup_cols_table(table,con_gn) lst_cols = df.columns[df.columns.isin(tzh_cols)] to_tzh = df[lst_cols].copy() print('Columns non intégrés : %s'%str([x for x in tzh_cols if x not in lst_cols])) if to_tzh.crs.srs=='EPSG:2154': to_tzh.to_crs(4326,inplace=True) # dict_crs = to_tzh.crs.to_json_dict() # dict_crs['id']['code'] = 0 # to_tzh.crs.from_json_dict(dict_crs) # to_tzh.geom = to_tzh.geom.to_wkt().copy() to_tzh.to_wkt().to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False, dtype={ 'geom': Geometry(srid=4326) # 'id_lim_list':uuid.SafeUUID } ) print('INSERT t_zh OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='DEP') print('INSERT cor_zh_area DEP OK !') _cor_zh_areaBis(tzh_code=to_tzh.code,typ='COM',cover=True) print('INSERT cor_zh_area COM OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='ZPS') print('INSERT cor_zh_area ZPS OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='SIC') print('INSERT cor_zh_area SIC OK !') # _cor_zh_area(tzh_code=to_tzh.code,typ='ZSC') # print('INSERT cor_zh_area ZSC OK !') # _cor_zh_area(tzh_code=to_tzh.code,typ='PSIC') # print('INSERT cor_zh_area PSIC OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF1') print('INSERT cor_zh_area ZNIEFF1 OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF2') print('INSERT cor_zh_area ZNIEFF2 OK !') _cor_zh_hydro(tzh_code=to_tzh.code) print('INSERT cor_zh_hydro OK !') _cor_zh_(tzh_code=to_tzh.code,typ='rb') print('INSERT cor_zh_rb OK !') def to_cor_zh_lim_fs(df): df = df[['code','crit_def_esp']]\ .fillna('Non déterminé') cor_zh_lim_fs = pd.merge(df,get_id_t_zh(df.code),on='code')\ .rename(columns={'crit_def_esp':'id_lim_fs'}) crit_def_esp_fct = t_nomenclature_ZH('CRIT_DEF_ESP_FCT') dict_crit = dict(zip(crit_def_esp_fct.mnemo,crit_def_esp_fct.id_nomenclature)) cor_zh_lim_fs.id_lim_fs.replace(dict_crit, inplace=True) cor_zh_lim_fs[['id_zh','id_lim_fs']].to_sql( name='cor_zh_lim_fs',con=con_gn,schema='pr_zh',if_exists='append',index=False ) def get_azalee_activity(): sql = """ SELECT g.id_site code, CASE WHEN length(pa.id::varchar)=1 THEN '0'||pa.id::varchar||' - '||pa.nom ELSE pa.id::varchar||' - '||pa.nom END activ_hum, pp.description "position", pi.nom impact, a.activ_hum_autre||'\n'||a.remarques rmq_activ_hum, a."valid" FROM zones_humides.r_site_usageprocess a LEFT JOIN zones_humides.param_activ_hum pa ON pa.id = a.id_activ_hum LEFT JOIN zones_humides.param_position pp ON pp.id = a.id_position LEFT JOIN zones_humides.param_impact pi ON pi.id = a.id_impact JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site) ON g.id = a.id_geom_site WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat)) and a."valid" and s.date_fin is NULL """ return pd.read_sql_query(sql,zh.con) def get_cor_impact_types(): return pd.read_sql_table('cor_impact_types',con_gn,'pr_zh') def to_t_activity(df=None): table = 't_activity' if df is None: df = get_azalee_activity()\ .drop_duplicates() else: df = df[['code','activ_hum','impact','position','rmq_activ_hum']] activ_hum = df.activ_hum.str.split(';',expand=True).stack() impact = df.impact.str.split(';',expand=True).stack() position = df.position.str.split(';',expand=True).stack() rmq_activ_hum = df.rmq_activ_hum.str.split(';',expand=True).stack() # df['activ_hum'] = remove_special_char(df['activ_hum'],space=True) # df['impact'] = remove_special_char(df['impact'],space=True) df['impact'] = remove_special_char(df['impact'].str.lower(),space=True) df['impact'].fillna('aucun',inplace=True) # df['position'] = remove_special_char(df['position'],space=True) no_activ_hum = t_nomenclature_ZH('ACTIV_HUM') dict_activ_hum = dict(zip(no_activ_hum.mnemo,no_activ_hum.id_nomenclature)) no_impact = t_nomenclature_ZH('IMPACTS').rename(columns={'id_nomenclature':'id_impact'}) cor_impact_types = pd.merge(get_cor_impact_types(),no_impact[['id_impact','mnemo']],on='id_impact') dict_impact = dict(zip(remove_special_char(cor_impact_types.mnemo.str.lower(),space=True),cor_impact_types.id_cor_impact_types)) no_position = t_nomenclature_ZH('LOCALISATION') dict_position = dict(zip(no_position.mnemo,no_position.id_nomenclature)) df['activ_hum'].replace(dict_activ_hum, inplace=True) df['impact'].replace(dict_impact, inplace=True) df['position'].replace(dict_position, inplace=True) df.rename(columns={ 'activ_hum':'id_activity', 'impact':'id_cor_impact_types', 'position':'id_position', 'rmq_activ_hum':'remark_activity' },inplace=True) # group_df = df.groupby(['code','id_activity','id_position'])['id_cor_impact_types'].apply(list).reset_index() group_df = df.groupby(['code','id_activity'],dropna=False)\ .agg({'id_position':list,'id_cor_impact_types':list,'remark_activity':list}).reset_index() group_df.id_position = [list(set(x)) for x in group_df.id_position ] group_df.id_position = [ x[0] if len(x)==1 else no_position.loc[no_position.cd_nomenclature=='3','id_nomenclature'].values[0] for x in group_df.id_position ] group_df.remark_activity = ['\n'.join(list(set(x))) if list(set(x)) != [None] else None for x in group_df.remark_activity] group_df['id_impact_list'] = [uuid.uuid4() for x in group_df.index] cor_impact_list = group_df[['id_impact_list','id_cor_impact_types']]\ .explode('id_cor_impact_types')\ .drop_duplicates() # activity = pd.merge(group_df[['code','id_activity','id_impact_list','id_position']],df,on=['code','id_activity','id_position'],how='left') # t_activity = pd.merge(activity,get_id_t_zh(df.code),on='code') t_activity = pd.merge(group_df,get_id_t_zh(df.code),on='code',how='left') tactiv_cols = recup_cols_table(table,con_gn) lst_cols = t_activity.columns[t_activity.columns.isin(tactiv_cols)] to_tactiv = t_activity[lst_cols] to_tactiv.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False, # dtype={ # 'id_impact_list':uuid.UUID # } ) cor_impact_list.to_sql( name='cor_impact_list', con=con_gn, schema='pr_zh', if_exists='append', index=False, # dtype={ # 'id_impact_list':uuid.UUID # } ) def get_azalee_functions(): sql = """ SELECT g.id_site code, pa.nom id_function, a."quantite", a.description justification, a."valid" FROM zones_humides.r_site_fctecosociopatri a LEFT JOIN zones_humides.param_fct_eco_socio_patri pa ON pa.id = a.id_fct JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site) ON g.id = a.id_geom_site WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat)) and a."valid" and s.date_fin is NULL """ return pd.read_sql_query(sql,zh.con) def to_t_functions(df=None): table = 't_functions' if df is None: func = get_azalee_functions() func.id_function.replace({' / ':'/'},regex=True,inplace=True) else: func = df[['code','fct_bio','fct_hydro','int_patri','val_socioEco']].set_index('code').unstack() func = func.str.split(';',expand=True).stack()\ .str.split(' \(',1,expand=True) func.columns = ['id_function','justification'] func.justification = func.justification.str.rsplit('\)',1,expand=True)[0] functions = pd.concat([ t_nomenclature_ZH('FONCTIONS_HYDRO'),t_nomenclature_ZH('FONCTIONS_BIO'), t_nomenclature_ZH('VAL_SOC_ECO'),t_nomenclature_ZH('INTERET_PATRIM'), ]) functions.mnemo.replace({ r'..- ':'', r' \(.*\)':'' },regex=True,inplace=True) dict_func = dict(zip(functions.mnemo,functions.id_nomenclature)) func.id_function.replace(dict_func,inplace=True) not_idfunc = ['non documenté','aucune fonction hydrologique','aucune valeur socio-économique'] del_index = func[func.id_function.isin(not_idfunc)].index func.drop(del_index,inplace=True) funct = func.groupby(['code','id_function']).agg(list).reset_index() funct.justification = ['\n'.join(x) if x != [None] else None for x in funct.justification] qualif = t_nomenclature_ZH('FONCTIONS_QUALIF') knowle = t_nomenclature_ZH('FONCTIONS_CONNAISSANCE') funct['id_qualification'] = qualif.loc[qualif.mnemo=='Non évaluée','id_nomenclature'].values[0] funct['id_knowledge'] = knowle.loc[knowle.mnemo=='Lacunaire ou nulle','id_nomenclature'].values[0] t_func = pd.merge(funct,get_id_t_zh(funct.code),on='code') tactiv_cols = recup_cols_table(table,con_gn) lst_cols = t_func.columns[t_func.columns.isin(tactiv_cols)] to_tfunction = t_func[lst_cols] to_tfunction.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def add_remark_pres(not_bib): table = 't_zh' rmq_pres = pd.read_sql_table(table,con_gn,'pr_zh',columns=['id_zh','remark_pres']) rmq_pres.remark_pres.replace({'':None},inplace=True) not_bib = rmq_pres.merge(not_bib,on='id_zh') not_bib.loc[not_bib.remark_pres.notna(),'remark_pres'] = \ not_bib[not_bib.remark_pres.notna()].remark_pres+ '\n' + \ not_bib[not_bib.remark_pres.notna()].lb_code not_bib.loc[not_bib.remark_pres.isna(),'remark_pres'] = \ not_bib[not_bib.remark_pres.isna()].lb_code not_bib.drop(columns='lb_code',inplace=True) from pycen import update_to_sql update_to_sql( df=not_bib, con=con_gn, table_name=table, schema_name='pr_zh', key_name='id_zh', ) def __format_lb_code__(t): table = 'cor_zh_cb' cols = ['code','code_cb'] df_cb = t[cols].copy() df_cb.set_index('code',inplace=True) zh_cb = df_cb.code_cb.str.split(';',expand=True)\ .stack()\ .droplevel(-1)\ .reset_index() zh_cb.columns = cols zh_cb.rename(columns={'code_cb':'lb_code'},inplace=True) cor_zh_cb = pd.merge(zh_cb,get_id_t_zh(zh_cb.code.unique()),on='code') tzhcb = recup_cols_table(table,con_gn) lst_cols = cor_zh_cb.columns[cor_zh_cb.columns.isin(tzhcb)] to_corzhcb = cor_zh_cb[lst_cols].copy() to_corzhcb.lb_code = to_corzhcb.lb_code.astype(str) return to_corzhcb def __filter_lb_code__(t, join_ch=False): to_corzhcb = __format_lb_code__(t) bib_cb = pd.read_sql_table('bib_cb',con_gn,'pr_zh') bib_cb.lb_code = bib_cb.lb_code.astype(str) not_bib = to_corzhcb[~to_corzhcb.lb_code.isin(bib_cb.lb_code)] to_corzhcb.drop(not_bib.index,inplace=True) not_bib = not_bib\ .groupby('id_zh').agg(','.join) if join_ch: to_corzhcb = to_corzhcb.merge( bib_cb.drop(columns='humidity'),on='lb_code',how='left' ) return to_corzhcb, not_bib def to_cor_zh_cb(t): table = 'cor_zh_cb' to_corzhcb, not_bib = __filter_lb_code__(t, join_ch=False) not_bib.lb_code = 'Autre(s) habitat(s) décrit(s) :\n' + not_bib.lb_code add_remark_pres(not_bib) to_corzhcb.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def to_t_flow(DF, type_flow=None): """ @df : pd.DataFrame @type_flow : str. ['inflow' or 'outflow'] """ if type_flow=='inflow': table = 't_inflow' cols = ['entree_eau_reg', 'entree_eau_perm', 'entree_eau_topo'] _flow = t_nomenclature_ZH('ENTREE_EAU') _perm = t_nomenclature_ZH('PERMANENCE_ENTREE') dict_table = { 'flow':'id_inflow', 'perm':'id_permanance'} elif type_flow=='outflow': table = 't_outflow' cols = ['sortie_eau_reg', 'sortie_eau_perm', 'sortie_eau_topo'] _flow = t_nomenclature_ZH('SORTIE_EAU') _perm = t_nomenclature_ZH('PERMANENCE_SORTIE') dict_table = { 'flow':'id_outflow', 'perm':'id_permanance'} df = DF.copy().set_index('code') # df[cols] flow = df[cols[0]].str.split(';',expand=True).stack() perm = df[cols[1]].str.split(';',expand=True).stack() topo = df[cols[2]].str.split(';',expand=True).stack() inflow = pd.concat( [flow,perm,topo],axis=1,join='outer' ).droplevel(-1) # inflow2 = flow.to_frame()\ # .merge(perm.to_frame(),left_index=True,right_index=True,how='outer')\ # .merge(topo.to_frame(),left_index=True,right_index=True,how='outer') inflow.columns = ['flow','perm','topo'] inflow.reset_index(drop=False,inplace=True) dict_flow = dict(zip(_flow.mnemo,_flow.id_nomenclature)) dict_perm = dict(zip(_perm.mnemo.str.lower(),_perm.id_nomenclature)) inflow.flow.replace(dict_flow,inplace=True) inflow.perm.fillna('non déterminé',inplace=True) inflow.perm.replace({'inconnu':'non déterminé','':'non déterminé'},inplace=True) inflow.perm.replace(dict_perm,inplace=True) inflow.rename(columns=dict_table, inplace=True) t_flow = pd.merge(inflow,get_id_t_zh(inflow.code.unique()),on='code') tflow = recup_cols_table(table,con_gn) lst_cols = t_flow.columns[t_flow.columns.isin(tflow)] to_tflow = t_flow[lst_cols] to_tflow.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def cor_zh_corine_cover(): table = 'cor_zh_corine_cover' sql = 'SELECT id_zh,geom FROM pr_zh.t_zh' df = gpd.read_postgis(sql,con_gn,crs=4326) df.to_crs(2154,inplace=True) df.geom = df.buffer(-0.5) mask = df.to_crs(2154).unary_union clc_path = '/home/colas/Documents/5_BDD/CLC/' clc_file = clc_path+'CLC12_FR_RGF.shp' clc_mfil = clc_path+'Metadonnees/CLC_nomenclature.xls' clc = gpd.read_file(clc_file,mask=mask) clc.rename_geometry('geom', inplace=True) meta_clc0 = pd.read_excel(clc_mfil,0).rename(columns={'code_clc_niveau_1':'code_clc'}) meta_clc1 = pd.read_excel(clc_mfil,1).rename(columns={'code_clc_niveau_2':'code_clc'}) meta_clc2 = pd.read_excel(clc_mfil,2).rename(columns={'code_clc_niveau_3':'code_clc'}) meta_clc = pd.concat([meta_clc0,meta_clc1,meta_clc2]) meta_clc.code_clc = meta_clc.code_clc.astype(str) gn_occsol = t_nomenclature_ZH('OCCUPATION_SOLS',source=None) dict_clc1 = dict(zip(meta_clc.code_clc,['.'.join(x) for x in meta_clc.code_clc])) dict_clc2 = dict(zip(gn_occsol.cd_nomenclature,gn_occsol.id_nomenclature)) tmp = gpd.sjoin( df, clc[['CODE_12','geom']], predicate = 'intersects', how = 'inner') cor_zh_clc = tmp[['id_zh','CODE_12']]\ .drop_duplicates()\ .rename(columns={'CODE_12':'id_cover'})\ .replace({'id_cover':dict_clc1})\ .replace({'id_cover':dict_clc2}) cor_zh_clc.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def migrate_to_gnZH(df:pd.DataFrame=None): to_bib_organismes_przh() to_t_zh(df) to_cor_zh_lim_fs(df) to_t_activity(df) to_t_functions(df) to_t_flow(df,type_flow='inflow') to_t_flow(df,type_flow='outflow') cor_zh_corine_cover() def to_t_references(db_file, suffixe_refnum=None): import pandas_access as mdb table = 't_references' t_ref = pd.read_sql_table(table,con_gn,'pr_zh') dic_col_ref = { 'REF_NO':'ref_number', 'REFERENCE':'reference', 'AUTHOR':'authors', 'TITLE':'title', 'YEAR':'pub_year', 'PUBLISHER':'editor', 'LOCATION':'editor_location', } df = mdb.read_table(db_file, 'MWDREF')\ .rename(columns=dic_col_ref) df.ref_number = df.ref_number.astype(str) siteref = mdb.read_table(db_file, 'SITEREF') siteref.REF_NO = siteref.REF_NO.astype(str) df = df[df.ref_number.isin(siteref.REF_NO)] df.loc[df.title.isna(),'title'] = df[df.title.isna()].reference if suffixe_refnum is not None: df.ref_number = suffixe_refnum + df.ref_number df.to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) def to_cor_zh_ref(db_file, suffixe_refnum=None): import pandas_access as mdb dict_col_cor = { 'REF_NO':'id_ref' } table = 'cor_zh_ref' ref = pd.read_sql_table('t_references',con_gn,'pr_zh') dict_idref = dict(zip(ref.ref_number,ref.id_reference)) # SITEREF cor = mdb.read_table(db_file, 'SITEREF')\ .rename(columns=dict_col_cor) cor.id_ref = cor.id_ref.astype(str) if suffixe_refnum is not None: cor.id_ref = suffixe_refnum + cor.id_ref t_zh = get_id_t_zh(cor.SITE_COD) to_tab = cor.merge( t_zh.drop(columns='zh_uuid'), left_on='SITE_COD', right_on='code', ).drop(columns=['SITE_COD','code'])\ .replace(dict_idref) to_tab.to_sql( name=table,con=con_gn,schema='pr_zh', if_exists='append',index=False) def OTHERINV_to_tref(db_file): import pandas_access as mdb table = 't_zh' dic = { 'FFn' :'FF n', r'n \° ' :'n°', r'n \°' :'n°', r'n\+' :'n°', r'n\° ':'n°', r'n\° ':'n°', r'n\° ' :'n°', r' ' :' ', } sitinfo = mdb.read_table(db_file, 'SITEINFO')\ .set_index('SITE_COD') otinv = sitinfo[['OTHER_INV']]\ .dropna().OTHER_INV\ .str.split(';',expand=True).stack()\ .str.strip()\ .replace(dic,regex=True)\ .str.split(', Z',expand=True,regex=True).stack()\ .str.strip() znieff = otinv[otinv.str.startswith('ZNIEF')] znieff = pd.concat([znieff,otinv[otinv.str.startswith('NIEF')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('ZNEIF')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('n°')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Site N')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Sites N')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('2606')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('3817')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Inventaire N')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Mais aussi ZPS')]]) filter_inv = otinv[~otinv.isin(znieff)]\ .droplevel([-1,-2])\ .reset_index() other_inv = filter_inv.groupby('SITE_COD').agg('\n'.join)\ .reset_index()\ .rename(columns={0:'remark_is_other_inventory'}) t_zh = get_id_t_zh(other_inv.SITE_COD) to_tab = other_inv.merge( t_zh.drop(columns='zh_uuid'), left_on='SITE_COD', right_on='code', ).drop(columns=['SITE_COD','code']) to_tab['is_other_inventory'] = True from pycen import update_to_sql update_to_sql( df=to_tab, con=con_gn, table_name=table, schema_name='pr_zh', key_name='id_zh', ) if __name__ == "__main__": from pycen.geonature import pr_zh t_zh = pr_zh.t_zh() drop_cols = ['auteur_geom','date_geom','type_milieu','type_site',] DF = zh.v_zoneshumides() DF.rename(columns=DICT_TZH,inplace=True) DF.drop(columns=drop_cols,inplace=True) df = DF[~DF.code.isin(t_zh.code)].copy() migrate_to_gnZH(df) # to_bib_organismes_util() # Fait sch:'utilisateurs' # to_bib_organismes_przh() # Fait sch:'pr_zh' # to_t_roles() # Fait # to_t_zh(df) # Fait # to_cor_zh_lim_fs(df) # Fait # to_t_activity(df) # Fait # to_t_functions(df) # Fait # to_t_flow(df,type_flow='inflow') # Fait # to_t_flow(df,type_flow='outflow') # Fait # t_river_basin # OK ! cf.insert_lareas.py (sous bassin-versant SDAGE) # t_hydro_area # OK ! cf.insert_lareas.py (bassin-versant Topographique) # cor_zh_area # OK with to_t_zh ! ; bib_area = [COM,DEP,ref for ref_geo_referentiels of conf_gn_module.toml] # cor_zh_rb # OK with to_t_zh ! ; # cor_zh_hydro # OK with to_t_zh ! ; # cor_zh_fct_area # Dépendand de t_fct_area (vide) : table des aires de fonctionnalités # cor_zh_corine_cover # OK ! # fct_delim # to_cor_zh_cb() # Prêt # DF[DF.sortie_eau_reg.str.contains('diffus',na=False)].code.tolist() # ['38BB0109', '38BB0128', '38BB0129'] # get_cor_zh_corine_cover() # A FAIRE via MEDWET # get_cor_zh_protection() # A FAIRE via MEDWET # get_t_ownership() # A FAIRE via MEDWET # get_t_table_heritage() # A FAIRE (pas sûre..) # get_t_instruments() # A FAIRE (pas sûre..) # get_t_management_structures() # A FAIRE (pas sûre..) t_nomenclature_ZH(bib_mnemo='EVAL_GLOB_MENACES') def drop_table(table): sql = 'TRUNCATE pr_zh.%s'%table with con_gn.begin() as cnx: cnx.execute(sql) cnx.commit() cnx.close()