#!/usr/bin/env python3 # -*- coding: UTF-8 -*-. from pycen import con_gn, zh, pers import pandas as pd import geopandas as gpd import uuid import re zh = zh() DICT_TZH = { 'site_code':'code', 'nom':'main_name', 'autre_nom':'secondary_name', 'auteur_site':'create_author', ##'auteur_geom', 'auteur_last_maj':'update_author', 'date_site':'create_date', ##'date_geom', 'date_last_maj':'update_date', # 'CEN Isère': 'id_org', # organisme opérateur ##'type_milieu', ##'type_site', # 'typo_sdage':'id_sdage', # 'id_sage' #A récup # 'rmq_site':'remark_pres', # 'rmq_fct_majeur':'', # 'rmq_interet_patri':'', # 'rmq_bilan_menace':'', # 'rmq_orient_act':'', # 'rmq_usage_process':'', # 'code_cb':'', # 'lib_cb':'', # 'activ_hum':'', # 'impact':'', # 'position':'', # 'rmq_activ_hum':'', # 'connexion':'', # 'subm_orig':'', # 'subm_freq':'', # 'subm_etend':'', # 'fct_bio':'', # 'fct_hydro':'', # 'int_patri':'', # 'val_socioEco':'', # 'crit_delim':'', # 'crit_def_esp':'', # 'entree_eau_reg':'', # 'entree_eau_perm':'', # 'entree_eau_topo':'', # 'sortie_eau_reg':'', # 'sortie_eau_perm':'', # 'sortie_eau_topo':'', # 'geom':'geom' } def get_id_organisme(nom): if "'" in nom: nom = nom.replace("'","’") sql = "SELECT id_organisme FROM utilisateurs.bib_organismes WHERE nom_organisme = '%s'"%nom return pd.read_sql_query(sql,con_gn)['id_organisme'].values def remove_special_char(obj,space=False): dict_char = { r'[’]':"'", r'[àáâãäå]':'a', r'[èéêë]':'e', r'[ìíîï]':'i', r'[òóôõö]':'o', r'[ùúûü]':'u', # r'[ ]':"", r'[–]':"-" } if space: dict_char = {**dict_char, **{r'[ ]':""}} return obj.replace(dict_char,regex=True) def recup_cols_table(table,con,schema='pr_zh'): lst_cols = con.dialect.get_columns(con,table,schema) return [x['name'] for x in lst_cols] def t_nomenclature_ZH(bib_mnemo=None,source='ZONES_HUMIDES'): sch = 'ref_nomenclatures' tab = ['t_nomenclatures','bib_nomenclatures_types'] sql = """ SELECT a.id_nomenclature, a.cd_nomenclature, a.mnemonique mnemo, a.label_default as label, a.definition_default def, b.mnemonique bib_mnemo, b.label_default bib_label, b.definition_default bib_def, a.active FROM {sch}.{tab0} a JOIN {sch}.{tab1} b USING (id_type) """.format(sch=sch,tab0=tab[0],tab1=tab[1]) if source is not None or bib_mnemo is not None: sql += " WHERE " if source is not None: sql += """b."source" = '%s'"""%source sql += " AND " if bib_mnemo is not None else '' if bib_mnemo is not None: sql += "b.mnemonique = '%s'"%bib_mnemo return pd.read_sql_query(sql,con_gn).replace({r'[’]':"'"},regex=True) def recup_sdage(df): # Récupération de l'id SDAGE dans la typo df['id_sdage'] = [str(int(re.search(r'\d+', x).group())) for x in df.typo_sdage.sort_index()] sdage = t_nomenclature_ZH(bib_mnemo='SDAGE') dict_sdage = dict(zip(sdage.cd_nomenclature,sdage.id_nomenclature)) df['id_sdage'].replace(dict_sdage,inplace=True) return df def recup_sage(df): # Identification de la précision SAGE df['id_sage'] = [df.loc[df.typo_sdage==x,'mnemo_sdage'].values[0] if re.search(r'\.\d+', x) else None for x in df.typo_sdage.sort_index()] sage = t_nomenclature_ZH(bib_mnemo='SAGE') dict_sage = dict(zip(sage.mnemo,sage.id_nomenclature)) df['id_sage'].replace(dict_sage,inplace=True) return df def cor_lim_list(crit_delim): """Remplis la table pr_zh.cor_lim_list et retourn les uuid associés """ delim = t_nomenclature_ZH(bib_mnemo='CRIT_DELIM') delim.mnemo = delim.mnemo.str.replace(r'.\(.*\)','',regex=True) dict_delim = dict(zip(delim.mnemo.str.lower(),delim.id_nomenclature)) serie = crit_delim\ .fillna('non déterminé')\ .str.split(';',expand=True).stack().droplevel(-1).reset_index() serie.columns = ['id','delim'] serie.set_index('id',inplace=True) serie['id_lim'] = serie.delim.str.replace(r'.\(.*\)','',regex=True) serie.id_lim.replace(dict_delim,inplace=True) genuuid = serie.index.to_frame().drop_duplicates() del genuuid['id'] genuuid['id_lim_list'] = [uuid.uuid4() for x in genuuid.index] _cor_lim_list = pd.merge(serie,genuuid,how='inner',right_index=True,left_index=True) uuidreturn = pd.merge(crit_delim,genuuid,how='left',right_index=True,left_index=True) # Remplissage de la table pr_zh.cor_lim_list _cor_lim_list[['id_lim_list','id_lim']].to_sql( name='cor_lim_list',con=con_gn,schema='pr_zh',if_exists='append',index=False, # dtype={ # 'id_lim_list':uuid.SafeUUID # } ) return uuidreturn.id_lim_list.sort_index() def recup_delim_rmq(crit_delim): serie = crit_delim.str.split(';',expand=True).stack().droplevel(-1).reset_index() serie.columns = ['id','delim'] serie.set_index('id',inplace=True) serie['remarks'] = [ x[x.find("(")+1:x.rfind(")")] if x.find("(") > -1 else None for x in serie.delim ] uniserie = serie.groupby('id')['remarks'].apply(list).reset_index() uniserie.columns = ['id','remarks'] uniserie.set_index('id',inplace=True) uniserie.remarks = ['\n'.join(list(set(filter(None,x)))).strip() for x in uniserie.remarks] uniserie.remarks.replace({'': None},inplace=True) df_remarks = pd.merge( crit_delim, # serie.reset_index().drop_duplicates(subset='id').set_index('id'), uniserie, how='left',right_index=True,left_index=True ) return df_remarks.remarks def recup_subm(col_subm,typ_subm): """Correspondance subm--id_nomenclature. @col_subm : Series. Colonne de submersion @typ_subm : str. Type de submersion ['frequente','etendue','connexion'] """ # Manque la fréquence "partiellement submergé" if typ_subm == 'frequente': nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_FREQ') elif typ_subm == 'etendue': nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_ETENDUE') elif typ_subm == 'connexion': nom_subm = t_nomenclature_ZH(bib_mnemo='TYPE_CONNEXION') dict_submfreq = dict(zip(nom_subm.mnemo,nom_subm.id_nomenclature)) serie = col_subm.replace({'Inconnu':'Non déterminé'}) return serie.replace(dict_submfreq) def to_bib_organismes_util(): table = 'bib_organismes' isin_db = pd.read_sql_table( table,con_gn,'utilisateurs',['id_organisme'],columns=['nom_organisme'] ).replace({r'[’]':"'"},regex=True) insert_from = pers.get_organisme() to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.nom_organisme.str.lower())] to_insert\ .drop(columns='abbrev')\ .rename(columns={'nom':'nom_organisme'})\ .to_sql(name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False) def to_bib_organismes_przh(): table = 'bib_organismes' # if first_time: # sql = 'DELETE FROM pr_zh.%s'%table # with con_gn.begin() as cnx: # cnx.execute(sql) isin_db = pd.read_sql_table( table,con_gn,'pr_zh',['id_org'],columns=['name'] ).replace({r'[’]':"'"},regex=True) insert_from = pers.get_organisme()\ .replace({'Inconnu':'Autre'}) to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.name.str.lower())] to_insert.abbrev = remove_special_char(to_insert.abbrev,space=True)\ .str.upper()\ .str[:6] to_insert.loc[to_insert.abbrev.notna()]\ .rename(columns={'nom':'name','abbrev':'abbrevation'})\ .to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) to_insert.loc[to_insert.abbrev.isna()]\ .rename(columns={'nom':'name'})\ .drop(columns=['abbrev'])\ .to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) def get_bib_organismes(schema='utilisateurs'): table = 'bib_organismes' return pd.read_sql_table( table,con_gn,schema,['id_organisme'],columns=['nom_organisme'] ).replace({r'[’]':"'"},regex=True) def to_t_roles(): table = 't_roles' isin_db = pd.read_sql_table( table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] ).replace({r'[’]':"'"},regex=True) bib_organismes = get_bib_organismes(schema='utilisateurs') t_roles = pd.merge(isin_db,bib_organismes,'left',left_on='id_organisme',right_index=True) insert_from = pers.get_auteur2().replace({'GENTIANA':'Gentiana'}) ident_notin_db = [ x for x in insert_from.index.sort_values() if t_roles[(t_roles.nom_role==insert_from.nom[x]) & (t_roles.prenom_role==insert_from.prenom[x]) & (t_roles.nom_organisme==insert_from.organisme[x])].empty ] dict_orga = dict(zip(bib_organismes.nom_organisme,bib_organismes.index)) to_insert = insert_from[insert_from.index.isin(ident_notin_db)]\ .drop(columns=['nom_prenom'])\ .rename(columns={'nom':'nom_role','prenom':'prenom_role','organisme':'id_organisme',})\ .replace({**dict_orga,**{'Inconnu':-1}}) to_insert.to_sql( name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False ) def get_t_roles(id_role=None): table = 't_roles' t_roles = pd.read_sql_table( table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] ).replace({r'[’]':"'"},regex=True).sort_index() if id_role: t_roles = t_roles.iloc[[id_role]] return pd.merge(t_roles,get_bib_organismes(),'left',left_on='id_organisme',right_index=True) def recup_id_role(author): # A finir ! adapt_auth = author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().unique() azalee_auth = pers.get_auteur2().sort_index()#.replace({' ':' '},regex=True) azalee_auth = azalee_auth[azalee_auth.nom_prenom.isin(adapt_auth)].replace({'Inconnu':'Autre'}) # azalee_auth.nom_prenom.replace({'Inconnu':'Autre'},regex=True,inplace=True) tr = get_t_roles().reset_index().replace({'':None}) t_roles = pd.merge(tr,azalee_auth, how='inner',left_on=['nom_role','prenom_role','nom_organisme'],right_on=['nom','prenom','organisme']) dict_role = dict(zip(t_roles.nom_prenom,t_roles.id_role)) return author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().replace(dict_role) def get_id_t_zh(code=None): """@code : str, list, Serie, Index. Code à 12 characters maximum de la zone humide. """ sql = "SELECT id_zh,zh_uuid,code FROM pr_zh.t_zh" if isinstance(code,str): sql += " WHERE code='%s'"%code elif isinstance(code,list) or isinstance(code,pd.Series) or isinstance(code,pd.Index): sql += " WHERE code IN %s"%str(tuple(code)) return pd.read_sql_query(sql,con_gn) def get_id_org_przh(): return pd.read_sql_table('bib_organismes',con_gn,'pr_zh') def _cor_zh_hydro(tzh_code): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". """ table = 'cor_zh_hydro' sql = ''' SELECT h.id_hydro,zh.id_zh FROM pr_zh.t_hydro_area h, pr_zh.t_zh zh WHERE zh."code" in {tzh_code} AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) AND (h.id_hydro,zh.id_zh) NOT IN (SELECT id_hydro,id_zh FROM pr_zh.cor_zh_hydro) '''.format(tzh_code=tuple(tzh_code)) df = pd.read_sql_query(sql,con_gn) if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: print('AUCUNE nouvelles correspondances identifiées') def _cor_zh_(tzh_code,typ): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". @typ : str. [hydro,rb] """ typ = typ.lower() table = 'cor_zh_%s'%typ tab_typ = 't_hydro_area' if typ == 'hydro' else 't_river_basin' id_typ = 'id_hydro' if typ == 'hydro' else 'id_rb' sql = ''' SELECT h.{id_typ},zh.id_zh FROM pr_zh.{tab_typ} h, pr_zh.t_zh zh WHERE zh."code" in {tzh_code} AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) AND (h.{id_typ},zh.id_zh) NOT IN (SELECT {id_typ},id_zh FROM pr_zh.{tab_to}) ;'''.format( tzh_code = tuple(tzh_code), id_typ = id_typ, tab_typ = tab_typ, tab_to = table) df = pd.read_sql_query(sql,con_gn) if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: print('AUCUNE nouvelles correspondances identifiées') def _calc_recouvrmt(df1,df2,how='inner'): ''' Calcule le recouvrement de df2 sur df1 pour chaque géométrie de df1: Parameters ---------- df1 : GeoDataFrame. df2 : GeoDataFrame. ''' iddf1 = df1.columns[0] iddf2 = df2.columns[0] # Jointure spaciale tmp = gpd.sjoin( df1, df2[['geom']], predicate = 'intersects', how = how) tmp.dropna(subset=['index_right'],inplace=True) tmp.index_right = tmp.index_right.astype(int) tmp.reset_index(inplace=True) tmp = tmp.join( df2[['geom',iddf2]].rename(columns={'geom': 'right_geom'}), on=['index_right'], how='left') tmp2 = tmp[['index_right','right_geom',iddf2]].copy() \ .rename(columns={'right_geom': 'geom'}) \ .set_geometry('geom') tmp1 = tmp[[iddf1,'geom']].copy() \ .set_geometry('geom') if not tmp1.geom.values.is_valid.all(): tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) if not tmp2.geom.values.is_valid.all(): tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 return tmp[[iddf1,iddf2,'perc_rcvmt']] def _cor_zh_areaBis(tzh_code,typ,cover=False): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". @typ : str. COM, DEP, ref_geo """ from math import ceil table = 'cor_zh_area' sqltzh = """ SELECT zh.id_zh, zh.geom FROM pr_zh.t_zh zh WHERE zh."code" in {tzh_code} """.format(tzh_code=tuple(tzh_code)) tzh = gpd.read_postgis(sqltzh,con_gn,crs=4326) if tzh.crs.srs=='epsg:4326': tzh.to_crs(2154,inplace=True) sqllarea = """ SELECT l.id_area, l.geom FROM ref_geo.l_areas l JOIN ref_geo.bib_areas_types bib USING (id_type) WHERE bib.type_code='{typ}' and l."enable" """.format(typ=typ) larea = gpd.read_postgis(sqllarea,con_gn,crs=2154) df = _calc_recouvrmt(larea,tzh).rename(columns={'perc_rcvmt':'cover'}) if cover: df['cover'] = [ceil(x) for x in df.cover] else : df.drop(columns=['cover'],inplace=True) # return df if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: print('AUCUNE nouvelles correspondances identifiées') def _cor_zh_area(tzh_code,typ): """ @tzh : pd.Serie. Série de valeurs correspondants à la colonne pr_zh.t_zh."code". @typ : str. COM, DEP, ref_geo """ from math import ceil table = 'cor_zh_area' if typ == 'COM': cd1 = """, ( ST_Area(ST_INTERSECTION( l.geom,ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) )) *100) / ST_Area(l.geom) AS cover """ cd2 = ' AND l."enable"' else: cd1 = cd2 = '' sql = ''' SELECT l.id_area,zh.id_zh {cover1} FROM ref_geo.l_areas l JOIN ref_geo.bib_areas_types bib USING (id_type), pr_zh.t_zh zh WHERE zh."code" in {tzh_code} AND bib.type_code='{typ}' AND ST_INTERSECTS( ST_SetSRID(l.geom,2154), ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) ) AND (l.id_area,zh.id_zh) NOT IN (SELECT id_area,id_zh FROM pr_zh.cor_zh_area) {cd2} '''.format(tzh_code=tuple(tzh_code),typ=typ,cover1=cd1,cd2=cd2) df = pd.read_sql_query(sql,con_gn) if cd1 != '': df['cover'] = [ceil(x) for x in df.cover.sort_index()] if not df.empty: df.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) print('INSERT %i correspondances'%df.shape[0]) else: sql = ''' SELECT l.id_area FROM ref_geo.l_areas l JOIN ref_geo.bib_areas_types bib USING (id_type) WHERE bib.type_code='{typ}' '''.format(typ=typ) res = pd.read_sql_query(sql,con_gn) if not res.empty: print('AUCUNE nouvelles correspondances identifiées') else : print('AUCUNE geometrie dans la table `ref_geo.l_areas` pour le `type_code` %s'%typ) def find_nb_hab_bylbcode(df): to_corzhcb, not_bib = __filter_lb_code__( df, join_ch=True ) return to_corzhcb.groupby('id_zh',dropna=False)\ .agg({'is_ch':sum})\ .rename(columns={'is_ch':'nb_hab'})\ .reset_index() def to_t_zh(DF): """Need IN : columns['typo_sdage','mnemo_sdage'] """ from geoalchemy2 import Geometry df = DF.copy() table = 't_zh' t_role = get_t_roles().sort_index() org = get_id_org_przh() dict_org = dict(zip(org.name,org.id_org)) # First modif = update_author. Important test_auth = df.create_author.str.contains(';',na=False) if test_auth.any(): df.loc[test_auth,'update_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[1] df.loc[test_auth,'create_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[0] df['create_author'] = recup_id_role(author=df['create_author']) df['update_author'] = recup_id_role(author=df['update_author']) df['id_org'] = [t_role.loc[t_role.index==x,'nom_organisme'].values[0] for x in df['create_author']] df.id_org.replace(dict_org,inplace=True) df['id_lim_list'] = cor_lim_list(crit_delim=df.crit_delim) df['remark_lim'] = recup_delim_rmq(crit_delim=df.crit_delim) recup_sdage(df) recup_sage(df) df['remark_pres'] = df.rmq_site.copy() # df['v_habref'] = None # df['ef_area'] = None # A ne pas remplir. Nos inventaires ne s'en sont pas préocupé. # df['global_remark_activity'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. df['id_thread'] = None df['id_frequency'] = recup_subm(col_subm=df.subm_freq,typ_subm='frequente') # Manque la fréquence "partiellement submergé" df['id_spread'] = recup_subm(col_subm=df.subm_etend,typ_subm='etendue') df['id_connexion'] = recup_subm(col_subm=df.connexion,typ_subm='connexion')\ .replace({'Non déterminé':None}) # df['id_diag_hydro'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. # df['id_diag_bio'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. # df['id_strat_gestion'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. # df['remark_diag'] = None # df['is_other_inventory'] = None # A mettre à jour depuis MEDWET # df['is_carto_hab'] = None # défault : False # df['nb_hab'] = [len(x) if x else None for x in df.code_cb.str.split(';')] # Concerne les HABs Patrimoniaux df = df.merge(find_nb_hab_bylbcode(df),on='id_zh',how='left') # df['total_hab_cover'] = 100 # Concerne les HABs Patrimoniaux ; Ne peut pas être rempli. df['remark_eval_functions'] = df.rmq_fct_majeur.copy() df['remark_eval_heritage'] = df.rmq_interet_patri.copy() df['remark_eval_thread'] = df.rmq_bilan_menace.copy() df['remark_eval_actions'] = df.rmq_orient_act.copy() df['area'] = round(df.geom.area,2) tzh_cols = recup_cols_table(table,con_gn) lst_cols = df.columns[df.columns.isin(tzh_cols)] to_tzh = df[lst_cols].copy() print('Columns non intégrés : %s'%str([x for x in tzh_cols if x not in lst_cols])) if to_tzh.crs.srs=='EPSG:2154': to_tzh.to_crs(4326,inplace=True) # dict_crs = to_tzh.crs.to_json_dict() # dict_crs['id']['code'] = 0 # to_tzh.crs.from_json_dict(dict_crs) # to_tzh.geom = to_tzh.geom.to_wkt().copy() to_tzh.to_wkt().to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False, dtype={ 'geom': Geometry(srid=4326) # 'id_lim_list':uuid.SafeUUID } ) print('INSERT t_zh OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='DEP') print('INSERT cor_zh_area DEP OK !') _cor_zh_areaBis(tzh_code=to_tzh.code,typ='COM',cover=True) print('INSERT cor_zh_area COM OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='ZPS') print('INSERT cor_zh_area ZPS OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='SIC') print('INSERT cor_zh_area SIC OK !') # _cor_zh_area(tzh_code=to_tzh.code,typ='ZSC') # print('INSERT cor_zh_area ZSC OK !') # _cor_zh_area(tzh_code=to_tzh.code,typ='PSIC') # print('INSERT cor_zh_area PSIC OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF1') print('INSERT cor_zh_area ZNIEFF1 OK !') _cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF2') print('INSERT cor_zh_area ZNIEFF2 OK !') _cor_zh_hydro(tzh_code=to_tzh.code) print('INSERT cor_zh_hydro OK !') _cor_zh_(tzh_code=to_tzh.code,typ='rb') print('INSERT cor_zh_rb OK !') def to_cor_zh_lim_fs(df): df = df[['code','crit_def_esp']]\ .fillna('Non déterminé') cor_zh_lim_fs = pd.merge(df,get_id_t_zh(df.code),on='code')\ .rename(columns={'crit_def_esp':'id_lim_fs'}) crit_def_esp_fct = t_nomenclature_ZH('CRIT_DEF_ESP_FCT') dict_crit = dict(zip(crit_def_esp_fct.mnemo,crit_def_esp_fct.id_nomenclature)) cor_zh_lim_fs.id_lim_fs.replace(dict_crit, inplace=True) cor_zh_lim_fs[['id_zh','id_lim_fs']].to_sql( name='cor_zh_lim_fs',con=con_gn,schema='pr_zh',if_exists='append',index=False ) def get_azalee_activity(): sql = """ SELECT g.id_site code, CASE WHEN length(pa.id::varchar)=1 THEN '0'||pa.id::varchar||' - '||pa.nom ELSE pa.id::varchar||' - '||pa.nom END activ_hum, pp.description "position", pi.nom impact, a.activ_hum_autre||'\n'||a.remarques rmq_activ_hum, a."valid" FROM zones_humides.r_site_usageprocess a LEFT JOIN zones_humides.param_activ_hum pa ON pa.id = a.id_activ_hum LEFT JOIN zones_humides.param_position pp ON pp.id = a.id_position LEFT JOIN zones_humides.param_impact pi ON pi.id = a.id_impact JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site) ON g.id = a.id_geom_site WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat)) and a."valid" and s.date_fin is NULL """ return pd.read_sql_query(sql,zh.con) def get_cor_impact_types(): return pd.read_sql_table('cor_impact_types',con_gn,'pr_zh') def to_t_activity(df=None): table = 't_activity' if df is None: df = get_azalee_activity()\ .drop_duplicates() else: df = df[['code','activ_hum','impact','position','rmq_activ_hum']] activ_hum = df.activ_hum.str.split(';',expand=True).stack() impact = df.impact.str.split(';',expand=True).stack() position = df.position.str.split(';',expand=True).stack() rmq_activ_hum = df.rmq_activ_hum.str.split(';',expand=True).stack() # df['activ_hum'] = remove_special_char(df['activ_hum'],space=True) # df['impact'] = remove_special_char(df['impact'],space=True) df['impact'] = remove_special_char(df['impact'].str.lower(),space=True) df['impact'].fillna('aucun',inplace=True) # df['position'] = remove_special_char(df['position'],space=True) no_activ_hum = t_nomenclature_ZH('ACTIV_HUM') dict_activ_hum = dict(zip(no_activ_hum.mnemo,no_activ_hum.id_nomenclature)) no_impact = t_nomenclature_ZH('IMPACTS').rename(columns={'id_nomenclature':'id_impact'}) cor_impact_types = pd.merge(get_cor_impact_types(),no_impact[['id_impact','mnemo']],on='id_impact') dict_impact = dict(zip(remove_special_char(cor_impact_types.mnemo.str.lower(),space=True),cor_impact_types.id_cor_impact_types)) no_position = t_nomenclature_ZH('LOCALISATION') dict_position = dict(zip(no_position.mnemo,no_position.id_nomenclature)) df['activ_hum'].replace(dict_activ_hum, inplace=True) df['impact'].replace(dict_impact, inplace=True) df['position'].replace(dict_position, inplace=True) df.rename(columns={ 'activ_hum':'id_activity', 'impact':'id_cor_impact_types', 'position':'id_position', 'rmq_activ_hum':'remark_activity' },inplace=True) # group_df = df.groupby(['code','id_activity','id_position'])['id_cor_impact_types'].apply(list).reset_index() group_df = df.groupby(['code','id_activity'],dropna=False)\ .agg({'id_position':list,'id_cor_impact_types':list,'remark_activity':list}).reset_index() group_df.id_position = [list(set(x)) for x in group_df.id_position ] group_df.id_position = [ x[0] if len(x)==1 else no_position.loc[no_position.cd_nomenclature=='3','id_nomenclature'].values[0] for x in group_df.id_position ] group_df.remark_activity = ['\n'.join(list(set(x))) if list(set(x)) != [None] else None for x in group_df.remark_activity] group_df['id_impact_list'] = [uuid.uuid4() for x in group_df.index] cor_impact_list = group_df[['id_impact_list','id_cor_impact_types']]\ .explode('id_cor_impact_types')\ .drop_duplicates() # activity = pd.merge(group_df[['code','id_activity','id_impact_list','id_position']],df,on=['code','id_activity','id_position'],how='left') # t_activity = pd.merge(activity,get_id_t_zh(df.code),on='code') t_activity = pd.merge(group_df,get_id_t_zh(df.code),on='code',how='left') tactiv_cols = recup_cols_table(table,con_gn) lst_cols = t_activity.columns[t_activity.columns.isin(tactiv_cols)] to_tactiv = t_activity[lst_cols] to_tactiv.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False, # dtype={ # 'id_impact_list':uuid.UUID # } ) cor_impact_list.to_sql( name='cor_impact_list', con=con_gn, schema='pr_zh', if_exists='append', index=False, # dtype={ # 'id_impact_list':uuid.UUID # } ) def get_azalee_functions(): sql = """ SELECT g.id_site code, pa.nom id_function, a."quantite", a.description justification, a."valid" FROM zones_humides.r_site_fctecosociopatri a LEFT JOIN zones_humides.param_fct_eco_socio_patri pa ON pa.id = a.id_fct JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site) ON g.id = a.id_geom_site WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat)) and a."valid" and s.date_fin is NULL """ return pd.read_sql_query(sql,zh.con) def to_t_functions(df=None): table = 't_functions' if df is None: func = get_azalee_functions() func.id_function.replace({' / ':'/'},regex=True,inplace=True) else: func = df[['code','fct_bio','fct_hydro','int_patri','val_socioEco']].set_index('code').unstack() func = func.str.split(';',expand=True).stack()\ .str.split(' \(',1,expand=True) func.columns = ['id_function','justification'] func.justification = func.justification.str.rsplit('\)',1,expand=True)[0] functions = pd.concat([ t_nomenclature_ZH('FONCTIONS_HYDRO'),t_nomenclature_ZH('FONCTIONS_BIO'), t_nomenclature_ZH('VAL_SOC_ECO'),t_nomenclature_ZH('INTERET_PATRIM'), ]) functions.mnemo.replace({ r'..- ':'', r' \(.*\)':'' },regex=True,inplace=True) dict_func = dict(zip(functions.mnemo,functions.id_nomenclature)) func.id_function.replace(dict_func,inplace=True) not_idfunc = ['non documenté','aucune fonction hydrologique','aucune valeur socio-économique'] del_index = func[func.id_function.isin(not_idfunc)].index func.drop(del_index,inplace=True) funct = func.groupby(['code','id_function']).agg(list).reset_index() funct.justification = ['\n'.join(x) if x != [None] else None for x in funct.justification] qualif = t_nomenclature_ZH('FONCTIONS_QUALIF') knowle = t_nomenclature_ZH('FONCTIONS_CONNAISSANCE') funct['id_qualification'] = qualif.loc[qualif.mnemo=='Non évaluée','id_nomenclature'].values[0] funct['id_knowledge'] = knowle.loc[knowle.mnemo=='Lacunaire ou nulle','id_nomenclature'].values[0] t_func = pd.merge(funct,get_id_t_zh(funct.code),on='code') tactiv_cols = recup_cols_table(table,con_gn) lst_cols = t_func.columns[t_func.columns.isin(tactiv_cols)] to_tfunction = t_func[lst_cols] to_tfunction.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def add_remark_pres(not_bib): table = 't_zh' rmq_pres = pd.read_sql_table(table,con_gn,'pr_zh',columns=['id_zh','remark_pres']) rmq_pres.remark_pres.replace({'':None},inplace=True) not_bib = rmq_pres.merge(not_bib,on='id_zh') not_bib.loc[not_bib.remark_pres.notna(),'remark_pres'] = \ not_bib[not_bib.remark_pres.notna()].remark_pres+ '\n' + \ not_bib[not_bib.remark_pres.notna()].lb_code not_bib.loc[not_bib.remark_pres.isna(),'remark_pres'] = \ not_bib[not_bib.remark_pres.isna()].lb_code not_bib.drop(columns='lb_code',inplace=True) from pycen import update_to_sql update_to_sql( df=not_bib, con=con_gn, table_name=table, schema_name='pr_zh', key_name='id_zh', ) def __format_lb_code__(t): table = 'cor_zh_cb' cols = ['code','code_cb'] df_cb = t[cols].copy() df_cb.set_index('code',inplace=True) zh_cb = df_cb.code_cb.str.split(';',expand=True)\ .stack()\ .droplevel(-1)\ .reset_index() zh_cb.columns = cols zh_cb.rename(columns={'code_cb':'lb_code'},inplace=True) cor_zh_cb = pd.merge(zh_cb,get_id_t_zh(zh_cb.code.unique()),on='code') tzhcb = recup_cols_table(table,con_gn) lst_cols = cor_zh_cb.columns[cor_zh_cb.columns.isin(tzhcb)] to_corzhcb = cor_zh_cb[lst_cols].copy() to_corzhcb.lb_code = to_corzhcb.lb_code.astype(str) return to_corzhcb def __filter_lb_code__(t, join_ch=False): to_corzhcb = __format_lb_code__(t) bib_cb = pd.read_sql_table('bib_cb',con_gn,'pr_zh') bib_cb.lb_code = bib_cb.lb_code.astype(str) not_bib = to_corzhcb[~to_corzhcb.lb_code.isin(bib_cb.lb_code)] to_corzhcb.drop(not_bib.index,inplace=True) not_bib = not_bib\ .groupby('id_zh').agg(','.join) if join_ch: to_corzhcb = to_corzhcb.merge( bib_cb.drop(columns='humidity'),on='lb_code',how='left' ) return to_corzhcb, not_bib def to_cor_zh_cb(t): table = 'cor_zh_cb' to_corzhcb, not_bib = __filter_lb_code__(t, join_ch=False) not_bib.lb_code = 'Autre(s) habitat(s) décrit(s) :\n' + not_bib.lb_code add_remark_pres(not_bib) to_corzhcb.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def to_t_flow(DF, type_flow=None): """ @df : pd.DataFrame @type_flow : str. ['inflow' or 'outflow'] """ if type_flow=='inflow': table = 't_inflow' cols = ['entree_eau_reg', 'entree_eau_perm', 'entree_eau_topo'] _flow = t_nomenclature_ZH('ENTREE_EAU') _perm = t_nomenclature_ZH('PERMANENCE_ENTREE') dict_table = { 'flow':'id_inflow', 'perm':'id_permanance'} elif type_flow=='outflow': table = 't_outflow' cols = ['sortie_eau_reg', 'sortie_eau_perm', 'sortie_eau_topo'] _flow = t_nomenclature_ZH('SORTIE_EAU') _perm = t_nomenclature_ZH('PERMANENCE_SORTIE') dict_table = { 'flow':'id_outflow', 'perm':'id_permanance'} df = DF.copy().set_index('code') # df[cols] flow = df[cols[0]].str.split(';',expand=True).stack() perm = df[cols[1]].str.split(';',expand=True).stack() topo = df[cols[2]].str.split(';',expand=True).stack() inflow = pd.concat( [flow,perm,topo],axis=1,join='outer' ).droplevel(-1) # inflow2 = flow.to_frame()\ # .merge(perm.to_frame(),left_index=True,right_index=True,how='outer')\ # .merge(topo.to_frame(),left_index=True,right_index=True,how='outer') inflow.columns = ['flow','perm','topo'] inflow.reset_index(drop=False,inplace=True) dict_flow = dict(zip(_flow.mnemo,_flow.id_nomenclature)) dict_perm = dict(zip(_perm.mnemo.str.lower(),_perm.id_nomenclature)) inflow.flow.replace(dict_flow,inplace=True) inflow.perm.fillna('non déterminé',inplace=True) inflow.perm.replace({'inconnu':'non déterminé','':'non déterminé'},inplace=True) inflow.perm.replace(dict_perm,inplace=True) inflow.rename(columns=dict_table, inplace=True) t_flow = pd.merge(inflow,get_id_t_zh(inflow.code.unique()),on='code') tflow = recup_cols_table(table,con_gn) lst_cols = t_flow.columns[t_flow.columns.isin(tflow)] to_tflow = t_flow[lst_cols] to_tflow.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def cor_zh_corine_cover(): table = 'cor_zh_corine_cover' sql = 'SELECT id_zh,geom FROM pr_zh.t_zh' df = gpd.read_postgis(sql,con_gn,crs=4326) df.to_crs(2154,inplace=True) df.geom = df.buffer(-0.5) mask = df.to_crs(2154).unary_union clc_path = '/home/colas/Documents/5_BDD/CLC/' clc_file = clc_path+'CLC12_FR_RGF.shp' clc_mfil = clc_path+'Metadonnees/CLC_nomenclature.xls' clc = gpd.read_file(clc_file,mask=mask) clc.rename_geometry('geom', inplace=True) meta_clc0 = pd.read_excel(clc_mfil,0).rename(columns={'code_clc_niveau_1':'code_clc'}) meta_clc1 = pd.read_excel(clc_mfil,1).rename(columns={'code_clc_niveau_2':'code_clc'}) meta_clc2 = pd.read_excel(clc_mfil,2).rename(columns={'code_clc_niveau_3':'code_clc'}) meta_clc = pd.concat([meta_clc0,meta_clc1,meta_clc2]) meta_clc.code_clc = meta_clc.code_clc.astype(str) gn_occsol = t_nomenclature_ZH('OCCUPATION_SOLS',source=None) dict_clc1 = dict(zip(meta_clc.code_clc,['.'.join(x) for x in meta_clc.code_clc])) dict_clc2 = dict(zip(gn_occsol.cd_nomenclature,gn_occsol.id_nomenclature)) tmp = gpd.sjoin( df, clc[['CODE_12','geom']], predicate = 'intersects', how = 'inner') cor_zh_clc = tmp[['id_zh','CODE_12']]\ .drop_duplicates()\ .rename(columns={'CODE_12':'id_cover'})\ .replace({'id_cover':dict_clc1})\ .replace({'id_cover':dict_clc2}) cor_zh_clc.to_sql( name=table, con=con_gn, schema='pr_zh', if_exists='append', index=False ) def migrate_to_gnZH(df:pd.DataFrame=None): to_bib_organismes_przh() to_t_zh(df) to_cor_zh_lim_fs(df) to_t_activity(df) to_t_functions(df) to_t_flow(df,type_flow='inflow') to_t_flow(df,type_flow='outflow') cor_zh_corine_cover() def to_t_references(db_file, suffixe_refnum=None): import pandas_access as mdb table = 't_references' t_ref = pd.read_sql_table(table,con_gn,'pr_zh') dic_col_ref = { 'REF_NO':'ref_number', 'REFERENCE':'reference', 'AUTHOR':'authors', 'TITLE':'title', 'YEAR':'pub_year', 'PUBLISHER':'editor', 'LOCATION':'editor_location', } df = mdb.read_table(db_file, 'MWDREF')\ .rename(columns=dic_col_ref) df.ref_number = df.ref_number.astype(str) siteref = mdb.read_table(db_file, 'SITEREF') siteref.REF_NO = siteref.REF_NO.astype(str) df = df[df.ref_number.isin(siteref.REF_NO)] df.loc[df.title.isna(),'title'] = df[df.title.isna()].reference if suffixe_refnum is not None: df.ref_number = suffixe_refnum + df.ref_number df.to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) def to_cor_zh_ref(db_file, suffixe_refnum=None): import pandas_access as mdb dict_col_cor = { 'REF_NO':'id_ref' } table = 'cor_zh_ref' ref = pd.read_sql_table('t_references',con_gn,'pr_zh') dict_idref = dict(zip(ref.ref_number,ref.id_reference)) # SITEREF cor = mdb.read_table(db_file, 'SITEREF')\ .rename(columns=dict_col_cor) cor.id_ref = cor.id_ref.astype(str) if suffixe_refnum is not None: cor.id_ref = suffixe_refnum + cor.id_ref t_zh = get_id_t_zh(cor.SITE_COD) to_tab = cor.merge( t_zh.drop(columns='zh_uuid'), left_on='SITE_COD', right_on='code', ).drop(columns=['SITE_COD','code'])\ .replace(dict_idref) to_tab.to_sql( name=table,con=con_gn,schema='pr_zh', if_exists='append',index=False) def OTHERINV_to_tref(db_file): import pandas_access as mdb table = 't_zh' dic = { 'FFn' :'FF n', r'n \° ' :'n°', r'n \°' :'n°', r'n\+' :'n°', r'n\° ':'n°', r'n\° ':'n°', r'n\° ' :'n°', r' ' :' ', } sitinfo = mdb.read_table(db_file, 'SITEINFO')\ .set_index('SITE_COD') otinv = sitinfo[['OTHER_INV']]\ .dropna().OTHER_INV\ .str.split(';',expand=True).stack()\ .str.strip()\ .replace(dic,regex=True)\ .str.split(', Z',expand=True,regex=True).stack()\ .str.strip() znieff = otinv[otinv.str.startswith('ZNIEF')] znieff = pd.concat([znieff,otinv[otinv.str.startswith('NIEF')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('ZNEIF')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('n°')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Site N')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Sites N')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('2606')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('3817')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Inventaire N')]]) znieff = pd.concat([znieff,otinv[otinv.str.startswith('Mais aussi ZPS')]]) filter_inv = otinv[~otinv.isin(znieff)]\ .droplevel([-1,-2])\ .reset_index() other_inv = filter_inv.groupby('SITE_COD').agg('\n'.join)\ .reset_index()\ .rename(columns={0:'remark_is_other_inventory'}) t_zh = get_id_t_zh(other_inv.SITE_COD) to_tab = other_inv.merge( t_zh.drop(columns='zh_uuid'), left_on='SITE_COD', right_on='code', ).drop(columns=['SITE_COD','code']) to_tab['is_other_inventory'] = True from pycen import update_to_sql update_to_sql( df=to_tab, con=con_gn, table_name=table, schema_name='pr_zh', key_name='id_zh', ) def trunc_table(table,cascade=False): """ Tronque la table pr_zh.table Parameters ---------- table : str Nom de la table à tronquer cascade : bool, optional Si True, la suppression se fait avec l'option CASCADE. Cela signifie que les clés étrangères pointant vers cette table seront également supprimées (par exemple, si vous supprimez une zone humide, vous supprimez automatiquement les données de suivis liées à cette zone). Par défaut, cette option est à False. """ cascade = 'CASCADE;' if cascade else ';' sql = 'TRUNCATE pr_zh.%s %s'%(table,cascade) with con_gn.begin() as cnx: cnx.execute(sql) if __name__ == "__main__": # TRUNCATE TABLE # trunc_table('t_zh',cascade=True) # trunc_table('cor_zh_area') # trunc_table('t_reference') from pycen.geonature import pr_zh t_zh = pr_zh.t_zh() drop_cols = ['auteur_geom','date_geom','type_milieu','type_site',] DF = zh.v_zoneshumides() DF.rename(columns=DICT_TZH,inplace=True) DF.drop(columns=drop_cols,inplace=True) df = DF.copy() df = DF[~DF.code.isin(t_zh.code)].copy() migrate_to_gnZH(df) # to_bib_organismes_util() # Fait sch:'utilisateurs' # to_bib_organismes_przh() # Fait sch:'pr_zh' # to_t_roles() # Fait # to_t_zh(df) # Fait # to_cor_zh_lim_fs(df) # Fait # to_t_activity(df) # Fait # to_t_functions(df) # Fait # to_t_flow(df,type_flow='inflow') # Fait # to_t_flow(df,type_flow='outflow') # Fait # t_river_basin # OK ! cf.insert_lareas.py (sous bassin-versant SDAGE) # t_hydro_area # OK ! cf.insert_lareas.py (bassin-versant Topographique) # cor_zh_area # OK with to_t_zh ! ; bib_area = [COM,DEP,ref for ref_geo_referentiels of conf_gn_module.toml] # cor_zh_rb # OK with to_t_zh ! ; # cor_zh_hydro # OK with to_t_zh ! ; # cor_zh_fct_area # Dépendand de t_fct_area (vide) : table des aires de fonctionnalités # cor_zh_corine_cover # OK ! # fct_delim # to_cor_zh_cb() # Prêt # DF[DF.sortie_eau_reg.str.contains('diffus',na=False)].code.tolist() # ['38BB0109', '38BB0128', '38BB0129'] # get_cor_zh_corine_cover() # A FAIRE via MEDWET # get_cor_zh_protection() # A FAIRE via MEDWET # get_t_ownership() # A FAIRE via MEDWET # get_t_table_heritage() # A FAIRE (pas sûre..) # get_t_instruments() # A FAIRE (pas sûre..) # get_t_management_structures() # A FAIRE (pas sûre..) t_nomenclature_ZH(bib_mnemo='EVAL_GLOB_MENACES') def drop_table(table): sql = 'TRUNCATE pr_zh.%s'%table with con_gn.begin() as cnx: cnx.execute(sql) cnx.commit() cnx.close()