#!/usr/bin/env python3 # -*- coding: UTF-8 -*- #Nom : : recup_ps.py #Description : #Copyright : 2021, CEN38 #Auteur : Colas Geier #Version : 1.0 # import re from os import register_at_fork import types from numpy.lib.shape_base import expand_dims import pandas as pd import geopandas as gpd from pandas.io.pytables import incompatibility_doc from shapely import wkb import numpy as np from sqlalchemy.sql.expression import column # from pycen import bdd import pycen from sqlalchemy import create_engine from sqlalchemy.engine import URL from geoalchemy2 import Geometry isin_bdd = True lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"', '"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"', '"PS_VERCORS_CEN38_2011"' ] # Parametres bdd IN user = 'cen_admin' pwd = '#CEN38@venir' adr = '192.168.0.189' base = 'bd-cen-38' schema = 'pelouse_seche' schema = 'habitat' url = URL.create('postgresql+psycopg2', username=user, password=pwd, host=adr, database=base, ) con = create_engine(url) table = '"cr_VERCORS_habitats_CBNA_1999-2007"' # table = '"PS_CHAMBARAN_CEN38_2013"' # bd = bdd.CEN( # user = user, # pwd = pwd, # adr = adr, # base = base # # schema = schema # ) # Parametres bdd OUT user_ps = 'colas_g' pwd_ps = 'adm1n*38*' adr_ps = '192.168.60.10' base_ps = 'bd_cen' url_ps = URL.create('postgresql+psycopg2', username=user_ps, password=pwd_ps, host=adr_ps, database=base_ps, ) con_ps = create_engine(url_ps) con_ps = pycen.con def get_pers(): sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1' ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme') df_pers = pd.read_sql( sql = sql, con = con_ps) df_pers.replace([None],'',inplace=True) df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom df_pers['auteur'] = df_pers['auteur'].str.strip() df_pers['organisme'] = df_pers['organisme'].str.strip() return pycen.pers.get_auteur() # def get_idgeomsite(lst_site): # ''' # :lst_site: tuple. # ''' # sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 ' # 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id ' # 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site) # df = pd.read_sql( # sql = sql, # con = con_ps) # return df def format_legende(leg): dic = { } df = pd.DataFrame({'leg':leg}) df.replace( ['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'], ['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'], regex=True, inplace=True) return df def format_date(df): df = df.copy() if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"', '"PS_CHAMBARAN_CEN38_2013"']: df.date = pd.to_datetime(df.date,format='%d/%m/%Y') if table == '"PS_BELLEDONNE_CEN38_2014"': df.date = pd.to_datetime(df.date,format='%d%b%Y') if table == '"PS_BIEVRE_CEN38_2016"': df.date = df.date.astype(str).replace('\.','',regex=True) df.date = df.date.str.rsplit(' ',1).str[0]\ .replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True) df.date = pd.to_datetime(df.date,format='%d %b %Y') if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"': # df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int) df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str) df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date']) if 'annee' in df.columns and 'date' not in df.columns: # Si pas de date mais année précisée df['annee'] = df['annee'].astype(int) df['date'] = df['annee'].astype(str) + '-06-01' df.date = pd.to_datetime(df.date,format='%Y-%m-%d') elif any(df.date.astype(str).str.len() <= 4): # Si dates non-homogènes, cohexistance date&année d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique() if None in d: df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01' else: df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01' df.date = pd.to_datetime(df.date,format='%Y-%m-%d') else: df.date = pd.to_datetime(df.date) return df def normalize_auteur(lst_auteur): ''' lst_auteur : pd.Series ''' dic = { 'Lise Duconte':'DUCONTE Lise', 'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie', 'CHABERT Chloé (AVENIR)':'CHABERT Chloé', 'CHABERT Chloé (AVNEIR)':'CHABERT Chloé', 'PACHE Gilles (CBNA)':'PACHE Gilles', 'Gilles PACHE':'PACHE Gilles', 'JAMEAU Laura (AVENIR)':'JAMEAU Laura', 'MARCIAU Roger (AVENIR)':'MARCIAU Roger', 'Roger MARCIAU':'MARCIAU Roger', 'Aude Massa':'MASSA Aude', 'Aude MASSA':'MASSA Aude', 'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse', 'Romain BARTHELD':'BARTHELD Romain', 'VILLARET J.C.':'VILLARET Jean-charles', 'Jean-Charles VILLARET':'VILLARET Jean-charles', 'Fabien ANTHELME':'ANTHELME Fabien', 'Jean-Christophe GATTUS':'GATTUS Jean-christophe', 'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis', 'Jonathan MALINEAU':'MALINEAU Jonathan', 'Aurélien DAUTREY':'DAUTREY Aurélien', 'Etienne MARY':'MARY Etienne', 'Alix GUEDOU': 'GUEDOU Alix', } lst_aut = lst_auteur.copy() lst_aut.replace(' & ',',',regex=True,inplace=True) if any(lst_aut.str.contains(',')): lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1) lst_aut.replace(dic,inplace=True) df_aut = lst_aut.str.rsplit(' ',n=1,expand=True) df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower() df_aut[0] = df_aut[0].str.upper() df_aut[2] = df_aut[0] df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1] lst_aut = df_aut[2].copy() aut = lst_aut.unique() res = [x for x in aut if x not in [*get_pers().nom_prenom]] if res: print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res) new = new_auteur(res) lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True) lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x)) return lst_aut def new_auteur(new): ''' new : list ''' df = pd.DataFrame({'auteur': new}) dic = {'aut':{}, 'org':[]} # Pour chaque auteur for pers in df.auteur: print(pers) while True: newaut = input("Insérer un nouvel auteur ? (Y/N)\n") if not newaut[0].upper() in ['Y','N']: continue else: break # Si : pas d'insertion d'un nouvel auteur if newaut[0].upper() == 'N': while True: rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n") if not rep[0].upper() in ['Y','N']: continue else: break if rep[0].upper() == 'Y': while True: org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) if not reporg in [*org.keys()]: continue else: break while True: aut = get_pers().sort_values('id') aut = aut[aut.id_organisme == reporg] aut = aut.nom_prenom.to_dict() repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut)) if not repaut in [*aut.keys()]: continue else: break dic['aut'] = {**dic['aut'],pers:aut[repaut]} dic['org'] = [*dic['org'],org[reporg]] # Autrement : insertion d'un nouvel auteur else: while True: print("Auteur : %s"%pers) rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n") if not rep[0].upper() in ['Y','N']: continue elif rep[0].upper() == 'Y': repaut = input("Saisisser un nouveau nom :\n") dic['aut'] = {**dic['aut'],pers:repaut} pers = repaut break else: dic['aut'] = {**dic['aut'],pers:pers} break while True: org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] org = {-1: 'Autre', **org} reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) if not reporg in [*org.keys()]: continue elif reporg == -1: reporg = input("Saisisser un nouvel organisme :\n") dic['org'] = [*dic['org'],reporg] insert_newOrganisme(reporg) org = pycen.pers.get_organisme() idorg = org[org.nom==reporg].index[0] insert_newAuteur(pers,idorg) break else: dic['org'] = [*dic['org'],org[reporg]] insert_newAuteur(pers,reporg) break return dic def insert_newAuteur(aut,idorg): aut = pd.Series(aut) idorg = pd.Series(idorg) df = pd.DataFrame({'aut':aut,'id_organisme':idorg}) # print(df) tmp = df.aut.str.rsplit(' ',1,expand=True) if tmp.shape[1] == 1: df[['nom']] = tmp elif tmp.shape[1] == 2: df[['nom', 'prenom']] = tmp else: raise ValueError('NOM Prénom est de longueur inconnu :\ntmp') del df['aut'] try: df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema, if_exists='append', index=False) print("Nouvel auteur inséré : '%s'"%aut) except: print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut) def insert_newOrganisme(nom): nom = pd.Series(nom) df = pd.DataFrame({'nom':nom}) try: df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema, if_exists='append', index=False) print("Nouvel organisme inséré : '%s'"%nom) except: print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom) def normalize_colname(df): df.columns = df.columns.str.lower() df.columns = df.columns.str.replace('__','_',regex=True) return df.rename(columns={ 'n_polygone':'ident', 'id_site':'ident', 'ident_':'ident', 'id':'ident', 'idfinal':'id_origine', 'date_':'date', 'obs':'auteur', 's_p_brous' :'%_embrous', 's_p_brouss':'%_embrous', 'niv__embro':'niv_embrous', 'niv_embro' :'niv_embrous', 'niv_emb' :'niv_embrous', 'recouvreme':'recouvmnt', 'recouvr':'recouvmnt', 'recouv' :'recouvmnt', 'recouvr_' :'recouvmnt', 'remarque' :'remarques', 'remarq_' :'remarques', 'legendes' :'legende', 'legend' :'legende', 'sources' :'source', 'surf' :'surface', 'geometry' :'geom', }) def get_id_auteur(lst_autor): sch = 'personnes' tab = 'personne' sql = """ WITH tmp as (SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur FROM {sch}.{tab}) SELECT * FROM tmp WHERE auteur IN %(lst_autor)s""".format(sch=sch,tab=tab) df = pd.read_sql( sql = sql, con = pycen.con, params = {'lst_autor': tuple(lst_autor)}) return df def existing_ps(df): gdf = pycen.ps.get_sitesGeom() gdf['area'] = gdf.area ovl = gpd.overlay(df,gdf) ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference' ovl['ovl_area'] = ovl.area ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area'] return df def define_siteOrg(df): gdf = pycen.get_districtNat()[['abrev','geom']] tmp = df[['geom']].copy().set_geometry('geom') tmp = gpd.overlay(tmp,gdf, how='intersection') tmp['area'] = tmp.area tmp.reset_index(0, inplace=True) tmp.set_index('area',inplace=True) tmp.sort_index(inplace=True) tmp.drop_duplicates(subset=['index'],keep='last',inplace=True) tmp.reset_index(drop=True,inplace=True) del tmp['geometry'] df = df.merge(tmp,how='left',left_index=True,right_on='index') del df['index'] df.rename(columns={'abrev':'org'}, inplace=True) return df # tmp[tmp.id_site==334] # 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by' def define_siteName(df): df = define_siteOrg(df.copy()) df['dept'] = '38' # df['num'] = None if 'org' in df.columns: for org in df.org.unique(): tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu) # tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0]) tmp = tmp[tmp.org == org] if not tmp.empty: num = tmp.num.astype(int).max() seq = range(num+1, num + df.loc[df.org==org].shape[0]+1) df.loc[df.org==org,['num']] = list(seq) else: seq = range(1, df.loc[df.org==org].shape[0]+1) df.loc[df.org==org,['num']] = list(seq) else: tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu) df.loc[~df.num.isna(),'num'] = df[~df.num.isna()].num.astype(int).astype(str) df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] df['id_site'] = df['dept'] + df['org'] + df['num'] return df def normalize_formatIdsite(id_site): import re df = pd.DataFrame({'id_site':id_site}) df['dept'] = '38' df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]] del df['other'] df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] df['id_site'] = df['dept'] + df['org'] + df['num'] return df.id_site def normalize_paturage(pat): return pat.str.lower().replace( ['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'], ['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True) def normalize_pratiques(prat): dic = { 'pâturage equin':'pâturage équin', 'pâturage bovin':'pâturage bovin', 'paturâge cervidé':'pâturage cervidé', 'broyer': 'broyage', 'truffier': 'broyage', 'plantation': 'broyage', 'culture' : 'labourage', 'abandon recent': 'abandon', 'abandon verger': 'abandon', 'remblais et feu' : 'incertain', 'remblais, feu' : 'incertain', 'caprin': 'pâturage caprin', 'ovin': 'pâturage ovin', 'équin': 'pâturage équin', 'equin': 'pâturage équin', 'bovin': 'pâturage bovin', 'paturage': 'pâturage', 'paturâge': 'pâturage', 'f' : 'fauche', 'p' : 'pâturage', '0' : 'N.D', 'i' : 'incertain', 'a' : 'abandon', 'pv': 'pâturage', 'b' : 'broyage', 'pc': 'pâturage caprin', 'po': 'pâturage ovin', 'pe': 'pâturage équin', 'pb': 'pâturage bovin', '-' : 'N.D', '0' : 'N.D', 'ND' : 'N.D', 'n.d' : 'N.D', 'chemin carrossable non goudronné' : 'N.D', 'plantation de châtaigniers' : 'broyage', } p = prat.replace(['.et','/'],[',',', '],regex=True).copy() p = p.str.lower() p = normalize_paturage(p) p = p.replace(dic) p.fillna('N.D', inplace=True) p[p.str.contains('\?| ou ',na=False)] = 'incertain' pp = p.str.split(',',expand=True) pp.columns = 'p' + pp.columns.astype(str) for c in pp.columns: pp[c] = pp[c].str.strip() pp[c] = pp[c].replace(dic) return pp def select_ID(df,sch,tab): df = df.copy() sql = 'SELECT * FROM %s.%s'%(sch,tab) # df = df.copy().dropna(axis=1,how='all') df.replace("'","''",regex=True,inplace=True) if 'geom' in df.columns: del df['geom'] if 'date' in df.columns: df['date'] = df['date'].astype(str) elif 'date_deb' in df.columns: df['date_deb'] = df['date_deb'].astype(str) if 'id_cb' in df.columns: df['id_cb'] = df['id_cb'].astype(str) column = '(%s)'%','.join(df.columns) df_tuple = tuple(df.itertuples(index=False, name=None)) sql = sql + ' WHERE %s IN %s'%(column,df_tuple) if 'nan' in sql: sql = sql.replace('nan','NULL') if ',)' in sql: sql = sql.replace(',)',')') if '"' in sql: sql = sql.replace('"',"'") # print(sql) return pd.read_sql( sql=sql, con=pycen.con,) def insertAutorAttrs(df,colname_rsite, sch, tab): df = df.copy() df = df[['id', 'auteur']].copy() if any(df.auteur.str.contains(',')): tmp = df.auteur.str.split(',',expand=True).stack().droplevel(-1) tmp = pd.DataFrame({'auteur':tmp}) df = pd.merge(df[['id']],tmp,how='left',left_index=True,right_index=True) df_autor = get_id_auteur([*df.auteur.unique()]) df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) df.columns = [colname_rsite, 'id_auteur'] try : df.to_sql( name=tab, con=pycen.con, schema=sch, index=False, if_exists='append', method='multi' ) print('Insert author sites data OK !') return 'Insert author sites data OK !' except Exception as e: print('ERROR : News author insert impossible !') print(e) def _altertype(df1,df2): same_col = df1.columns[df1.columns.isin(df2.columns)] if 'date' in df2.columns: df2['date'] = df2['date'].astype(str) if 'geom' in same_col: same_col = same_col.drop('geom') for col in same_col: if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int: df1[col] = df1[col].astype(int) elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float: df1[col] = df1[col].astype(float) elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str: df1[col] = df1[col].astype(str) return [df1,df2] def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur): df_ins = df_ins.copy() if 'date' in df_ins.columns: df_ins['date'] = df_ins['date'].astype(str) elif 'date_deb' in df_ins.columns: df_ins['date_deb'] = df_ins['date_deb'].astype(str) tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema) tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema) if tab_data in tab_sit: sch = pycen.sites.schema elif tab_data in tab_ps: sch = pycen.ps.schema # ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data) ids = select_ID(df_ins[[id_data]],sch,tab_data) if 'date' in ids.columns: ids['date'] = ids['date'].astype(str) same_col = df_ins.columns[df_ins.columns.isin(ids.columns)] if 'geom' in same_col: same_col = same_col.drop('geom') df_ins,ids = _altertype(df_ins,ids) df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=[*same_col], how='left') insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur) def insert_site(df): df = df.copy() if 'nom' not in df.columns: df['nom'] = 'Inconnu' if 'type_site' not in df.columns: df['type_site'] = 'N.D.' if 'type_milieu' not in df.columns: df['type_milieu'] = 'Pelouses sèches' # if 'remarques' not in df.columns: # df['remarques'] = None df = df[['id_site','nom','date','auteur','type_site','type_milieu', # 'remarques' ]] typs = pycen.sites._get_typ_site() typm = pycen.sites._get_typ_milieux() for t in df.type_milieu.unique(): df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0] for s in df.type_site.unique(): df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0] df['id_type_milieu'] = df['id_type_milieu'].astype(int) # df['id_type_site'] = df['id_type_site'].astype(str) df['dept'] = df.id_site.str[:2] df['org'] = df.id_site.str[2:-4] df['num'] = df.id_site.str[-4:] df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True) columns = [col['name'] for col in pycen.sites.columns_sitetab] + ['auteur'] c = df.columns[df.columns.isin(columns)] df = df[c] print(df) try: df[df.columns.drop('auteur')].to_sql( name = 'sites', con = pycen.con, schema = pycen.sites.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news sites OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news sites impossible !') return [df,False] def check_org_lot(lot): """ :lot: list. liste des lots return ------- DataFrame des id_lot de la liste lot. """ tab_lot = pycen.sites._get_lots() not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ] if not_intab: df = pd.DataFrame({'libelle':not_intab}) df.to_sql( name='lots', con=con_ps, schema='sites', index=False, if_exists='append', method='multi', ) tab_lot = pycen.sites._get_lots() tab_lot = tab_lot[tab_lot.libelle.isin(lot)] return tab_lot[['id_lot','libelle']] def insert_geom(df): dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'} c = ['id_site','geom','auteur','date','remarques','legende','table_org','id_origine'] cc = df.columns[df.columns.isin(c)] df = df[cc].copy() df.set_geometry('geom', inplace=True) colindict = [ *df.columns[df.columns.isin(dic.keys())] ] if colindict: df.rename( columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}, inplace=True, ) if 'table_org' in df.columns: tab = [*df.table_org.unique()] lots = check_org_lot(tab) df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left') del df['table_org'] del df['libelle'] try: df[df.columns.drop('auteur')].to_postgis( name = 'r_sites_geom', con = pycen.con, schema = pycen.sites.schema, if_exists = 'append', index = False, geom_col = 'geom' ) print('Insert news geom OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news geom impossible !') return [df,False] def get_idGeomSite(lst_site): ''' :lst_site: list or pd.Series . ''' id_geom = pycen.ps.get_sitesGeom( id_site=[*lst_site]) if not id_geom.empty: id_geom = id_geom[['id','id_site']] id_geom.columns = ['id_geom_site','id_site'] return id_geom def insert_attrsPS(df): c = ['id_site','auteur','date','%_embrous','niv_embrous','recouvmnt','pratiques','statut'] cc = df.columns[df.columns.isin(c)] df = df[cc].copy() df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns') del df['pratiques'] for col in ['%_embrous','niv_embrous','recouvmnt']: df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True) df.niv_embrous.replace(['nan'],['N.D'],inplace=True) df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True) df.statut.replace(['ND'],['N.D'],inplace=True) # for t in df.dtypes[df.dtypes == int].index: # df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str) if '%_embrous' in df.columns: taux = True df.niv_embrous = df.niv_embrous + ';' + df['%_embrous'] del df['%_embrous'] else : taux = False df.set_index(['id_site','auteur','date'], inplace=True) df = df.stack().reset_index(-1) df.columns = ['type','param'] df.loc[df.type.str.len() < 4,'type'] = 'pratique' if taux: df[['param','taux']] = df.param.str.split(';',expand=True) df.taux.replace(['nan'],[None],inplace=True) df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True) df.param.replace({'ND':'N.D'}, inplace=True) df.reset_index(inplace=True,drop=False) param = pycen.ps.get_param() typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param') for t in df.type.unique(): tmp = param[param.type == t] df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id]) df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True) df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') del df['id_site'] del df['type'] df.rename(columns={'param':'id_param'}, inplace=True) try: df[df.columns.drop('auteur')].to_sql( name = 'r_site_param', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news attrs OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news attrs impossible !') return [df,False] def split_codehab(lst_codehab): lst = lst_codehab.copy() lst = lst.str.split('x|,|&|/',expand=True) \ .stack().str.strip() \ .droplevel(-1).reset_index(drop=False) return lst def format_codehab2insert(lst_codehab): lst = lst_codehab.copy() lst.replace('x|,','&',regex=True, inplace=True) lst.replace('&','&;',regex=True, inplace=True) lst.replace('/','/;',regex=True, inplace=True) lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1) lst.columns = ['ordre', 'code_hab'] lst.ordre = lst.ordre + 1 lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&' lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/' lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip() suspect = lst.code_hab.str.split('(',expand=True) if suspect.shape[1] > 1: raise Exception('habitat suspecté en développement') else: suspect.columns = ['code_hab'] del suspect['code_hab'] lst = pd.merge(lst,suspect,right_index=True,left_index=True) return lst def insert_cbPS(df): df = df.copy() df.date = df.date.astype(str) df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip() ids = select_ID(df[df.columns.drop(['code_hab','auteur'])],sch=pycen.ps.schema,tab='r_site_habitat') same_col = df.columns[df.columns.isin(ids.columns)] df,ids = _altertype(df,ids) df = df.merge(ids, on=[*same_col], how='left') df = df[['id', 'code_hab']].copy() df.set_index('id',inplace=True) # df = df.code_hab.str.split('x|,|&',expand=True) \ # .stack().str.strip() \ # .droplevel(-1).reset_index(drop=False) # df = split_codehab(df.code_hab) # df.columns = ['id_sitehab', 'code_hab'] df = format_codehab2insert(df.code_hab) try: df.to_sql( name = 'r_hab_cb', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news codes habitats OK !') except Exception as e: print(e) print('PS : Insert news codes habitats impossible !') def insert_habPS(df): import re c = ['id_site','auteur','date'] cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ] df = df[cc].copy() cc = df.columns[(~df.columns.str.contains('hab')) ] df.set_index([*cc], inplace=True) # df.set_index(['id_site','auteur','date'], inplace=True) # for t in df.dtypes[df.dtypes == int].index: # df[t] = df[t].astype(str) df = df.stack().reset_index(-1) df.columns = ['param','value'] df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]] del df['other'] df['index'] = df['index'].astype(int) df['value'] = df['value'].replace(['-'],[None]) df.dropna(subset=['value'], inplace=True) df = df.set_index(['index','param'], append=True).unstack() df = df.droplevel(0,axis=1) df.code_hab.replace(['0'],[None], inplace=True) df.dropna(subset=['code_hab'],inplace=True) df['n_hab'] = df['n_hab'].astype(int) df.reset_index(inplace=True,drop=False) df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') del df['id_site'] try: df[df.columns.drop(['auteur','code_hab'])].to_sql( name = 'r_site_habitat', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news attrs OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news attrs impossible !') return [df,False] def insert_legendCartoPS(df): c = ['id_site','auteur','date','leg_carto'] #,'legende'] cc = df.columns[df.columns.isin(c)] df = df[cc].copy() param = pycen.ps.get_listLegendePS() df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id]) df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') del df['id_site'] del df['leg_carto'] try: df[df.columns.drop('auteur')].to_sql( name = 'r_site_legcarto', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news legendes OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news legendes impossible !') return [df,False] def filter_saisierror(df): import re tmp = df.copy() lst_iderror = [] del tmp['geom'] cc = tmp.columns[(~tmp.columns.str.contains('hab')) ] tmp.set_index([*cc], inplace=True) tmp = tmp.stack().reset_index(-1) tmp.columns = ['param','value'] tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]] del tmp['other'] tmp['index'] = tmp['index'].astype(int) tmp['value'] = tmp['value'].replace(['-'],[None]) tmp.dropna(subset=['value'], inplace=True) tmp = tmp.set_index(['index','param'], append=True).unstack() tmp = tmp.droplevel(0,axis=1) tmp.reset_index(inplace=True,drop=False) lst_error1 = [] if '_hab' in tmp.columns: tmp.hab_.replace( ['Xerobromenion erecti', 'Mesobromion', 'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',], ['Xerobromion erecti', 'Mesobromion erecti', 'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',], inplace=True) for hab in tmp.hab_.sort_values().unique(): print(hab) print(tmp[tmp.hab_ == hab].code_hab.unique()) for cod in tmp[tmp.hab_ == hab].code_hab.unique(): perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count() if perc < 0.6: if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']: continue print('\t'+cod + ' : '+ perc.astype(str)) lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()] df[df.id_site.isin(lst_error1)].to_postgis( name='incoherence_habVScodehab', con=con, schema=schema, if_exists='replace', index=False, geom_col='geom', index_label='id_site', ) cb = pycen.ref_hab().get_CB() tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip() tmp.code_hab = tmp.code_hab.replace(['0'],[None]) tmp.dropna(subset=['code_hab'], inplace=True) tmpp = tmp[['id_site','code_hab']].copy() tmpp.set_index('id_site', inplace=True) # if any(tmpp.code_hab.str.contains('x|,|&')): # tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1) tmpp = split_codehab(tmpp.code_hab) tmpp.columns = ['id_site', 'code_hab'] tmpp = tmpp[~tmpp.code_hab.isin(cb.id)] lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()] lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()] df[df.id_site.isin(lst_error2)].to_postgis( name='PB_codehabCBNA_nonPresent_dans_corineBiotope', con=con, schema=schema, if_exists='append', index=False, geom_col='geom', index_label='id_site', ) lst_iderror = [*lst_error1,*lst_error2] lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index df.drop(lst_iderrorindex,inplace=True) return df.sort_values('id_site') if __name__ == "__main__": tutu = pd.DataFrame() for table in lst_tab: sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table) df = gpd.read_postgis( sql = sql, con = con ) if 'id' in df.columns and 'n_polygone' in df.columns: df['id'] = df['n_polygone'] del df['n_polygone'] elif 'id' in df.columns and 'polygon' in df.columns: df['id'] = df['polygon'] del df['polygon'] elif 'id' in df.columns and 'ident' in df.columns: df['id'] = df['ident'] del df['ident'] df = normalize_colname(df) if 'remarques' in df.columns and 'legende' not in df.columns: df['legende'] = df.remarques # df.rename(columns={'id':'id_site'}, inplace=True) if table == '"PS_VERCORS_CEN38_2011"': df = df[df.date != '?'].copy() df = format_date(df) # df.legende = format_legende(df.legende) if table == '"PS_CHAMBARAN_CEN38_2013"': df.type_patur = normalize_paturage(df.type_patur) df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True) df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \ df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \ df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur'] df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \ df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \ df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur'] df['pratiques'] = df.p1 df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2 df.drop(columns=['type_patur','p1','p2'], inplace=True) df['table_org'] = table[1:-1] # df['table_org'] = table tutu = pd.concat([tutu,df]) c = ['ident','id_origine','geom','auteur','hab_1','code_hab1','n_hab1', 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3', 'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6', 'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8', 'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10', 'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12', 'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14', 'hab_4','code_hab4','n_hab4','pratiques','niv_embrous', 'recouvmnt','remarques','phyto','statut','h_remarq','legende', 'leg_carto','date','annee','source','%_embrous','surf_emb', # 'org','dept','num', 'table_org' ] cols = tutu.columns[tutu.columns.isin(c)] df = tutu[cols].copy() df = define_siteName(df) df = df[~df.id_site.isna()].copy() df.sort_index(inplace=True) df['auteur'] = normalize_auteur(df.auteur) # ERROR : Longueur de sortie != df = filter_saisierror(df) # for d in df.dtypes[df.dtypes == object].index: # # df[d].replace([' '], [' '],regex=True,inplace=True) # df[d] = df[d].str.normalize('NFKC') DF = df.copy() # df = define_siteName(df) # tmp = df.copy() print(df) while True: res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n') if not res[0].upper() in ['Y','N']: continue else: break if res[0].upper() == 'Y': df_ins, ins = insert_site(df) if ins: insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') df_ins, ins = insert_geom(df) if ins: insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur') df_ins, ins = insert_attrsPS(df) if ins: insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur') df_ins, ins = insert_habPS(df) if ins: if df_ins.shape[0] > 4000: df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy() df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy() insert_cbPS(df_ins1) insert_cbPS(df_ins2) else: insert_cbPS(df_ins) insert_authordata( df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat', colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur') # df_ins, ins = insert_legendCartoPS(df) # if ins: # insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur') table = '' df = df2.copy() col_hab = df.columns[df.columns.str.contains('hab')] c = ['ident','id_origine','geom','auteur',*col_hab, # 'hab_1','code_hab1','n_hab1', # 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3', # 'hab_4','code_hab4','n_hab4', 'pratiques','niv_embrous', 'recouvmnt','remarques','phyto','statut','h_remarq','legende', 'leg_carto','date','annee','source','%_embrous','surf_emb', 'table_org' ] df.reset_index(drop=False,inplace=True) df = normalize_colname(df) df = format_date(df) # df.legende = format_legende(df.legende) cols = df.columns[df.columns.isin(c)] df = df[cols].copy() df = define_siteName(df) df['auteur'] = normalize_auteur(df.auteur) df.set_index('id_site', inplace=True) df.dropna(axis=1, how='all', inplace=True) col_hab = df.columns[df.columns.str.startswith('code_hab')] rm = pd.DataFrame() for col in col_hab: tmp = df[df[col].str.contains('/|\(',na=False)] if not tmp.empty: rm = pd.concat([rm,tmp]) rm.to_postgis( 'CBNA_codehab_error', con, schema='pelouse_seche', if_exists='replace', index=True, geom_col='geom' ) df.drop(tmp.index,inplace=True) # filter_saisierror(df) # if 'id_site' not in df.columns: # df.rename(columns={ # 'id':'id_site',}, # inplace=True) # df.sort_values('id_site', inplace=True) # df_site = df[['id_site', 'auteur', 'commune','lieu_dit', 'date']].copy() # if 'sources' in df.columns: # df_site = df[['id_site', 'auteur', 'sources', 'commune','lieu_dit', 'date']].copy() # df_autre = df[['id_site', 'h_remarq', 'remarques']].copy() # # sql = ('SELECT t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1' # # ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id').format(sch='personnes', tab1='personne', tab2='organisme') # # df_pers = pd.read_sql( # # sql = sql, # # con = con_ps # # ) # df_pers = get_pers() # tmp = df_site[['auteur']].copy() # if 'sources' in df_site.columns: # tmp = df_site[['auteur', 'sources']].copy() # tmp[['prenom', 'nom']] = df_site['auteur'].str.split(' ', expand=True) # tmp.drop_duplicates(inplace=True) # pers_add = tmp.loc[~tmp.nom.isin(df_pers.nom),['prenom', 'nom']] # if 'sources' in df_site.columns: # pers_add = tmp.loc[~tmp.nom.isin(df_pers.nom),['prenom', 'nom', 'sources']] # if not pers_add.empty: # pers_add.replace( # ['CEN Isere AVENIR'], # [1], # inplace=True # ) # pers_add.rename(columns={'sources':'id_organisme'}, inplace=True) # pers_add['id'] = max(df_pers.index)+1 # pers_add.set_index('id', inplace=True) # pers_add.to_sql( # name='personne', # con=con_ps, # schema='personnes', # if_exists='append', # index=True, # index_label='id' # ) # df_pers = get_pers() # # SITE # df_site['nom'] = 'Inconnu' # df_site['date_deb'] = pd.to_datetime(df_site.date) # df_site['id_type_milieu'] = 2 # df_site['id_type_site'] = 0 # df_site['dept'] = 38 # df_site['org'] = df_site.id_site.str[:2] # df_site['num'] = df_site.id_site.str[2:] # df_site['id_auteur'] = df_site['auteur'].copy() # df_site['id'] = df_site['id_site'].copy() # df_site.id_auteur.replace( # df_pers.auteur.tolist(), # df_pers.index.tolist(), # inplace=True) # df_site.drop( # columns=['id_site','auteur','sources','date','commune','lieu_dit'], # inplace=True) # if not isin_bdd: # df_site.to_sql( # name='sites', # con=con_ps, # schema='sites', # if_exists='append', # index=False, # # index_label='id' # ) # # df_geom # df_geom = df[['id_site', 'auteur', 'geom', 'date']].copy() # df_geom['date'] = pd.to_datetime(df_geom.date) # df_geom['id_auteur'] = df_geom['auteur'].copy() # df_geom.id_auteur.replace( # df_pers.auteur.tolist(), # df_pers.index.tolist(), # inplace=True) # df_geom.drop( # columns=['auteur'], # inplace=True) # df_geom.set_geometry('geom', crs='EPSG:2154', inplace=True) # df_geom.reset_index(drop=True, inplace=True) # if not isin_bdd: # df_geom.to_postgis( # name='r_sites_geom', # con=con_ps, # schema='sites', # if_exists='append', # index=False, # geom_col='geom' # # index_label='id' # ) # # HABITAT # df_hab = df[['id_site', 'code_hab1','n_hab1','code_hab2','n_hab2','code_hab3','n_hab3','code_hab4','n_hab4']].copy() # df_hab1 = df[['id_site', 'code_hab1','n_hab1']].copy() # df_hab2 = df[['id_site', 'code_hab2','n_hab2']].copy() # df_hab3 = df[['id_site', 'code_hab3','n_hab3']].copy() # df_hab4 = df[['id_site', 'code_hab4','n_hab4']].copy() # df_hab1.columns = ['id_site', 'code_hab', 'n_hab'] # df_hab2.columns = ['id_site', 'code_hab', 'n_hab'] # df_hab3.columns = ['id_site', 'code_hab', 'n_hab'] # df_hab4.columns = ['id_site', 'code_hab', 'n_hab'] # df_hab1['index'] = 1 # df_hab2['index'] = 2 # df_hab3['index'] = 3 # df_hab4['index'] = 4 # df_habF = pd.concat([df_hab1,df_hab2,df_hab3,df_hab4]) # df_habF.reset_index(drop=True, inplace=True) # idgeom = get_idgeomsite(tuple(df_hab.id_site)) \ # .rename(columns={'id':'id_geom_site'}) # idgeom.drop_duplicates(['id','id_site'],keep='last',inplace=True) # df_habF['id_geom_site'] = df_habF['id_site'].copy() # df_habF.id_geom_site.replace( # idgeom.id_site.tolist(), # idgeom.id.tolist(), # inplace=True) # df_habF.replace(['-'],[None], inplace=True) # df_habF.dropna(subset=['code_hab'],inplace=True) # df_cb = pd.read_sql_table( # table_name='corine_biotope', # schema='ref_habitats', # con = con_ps) # df_rSitHab = df_habF \ # .drop(columns=['id_site', 'code_hab']) # if not isin_bdd: # df_rSitHab.to_sql( # name='r_site_habitat', # con=con_ps, # schema='ps', # if_exists='append', # index=False, # method='multi', # # index_label='id' # ) # sql = ('SELECT * FROM {sch}.{tab} ' # 'WHERE id_geom_site IN {lst} ORDER BY id').format(sch='ps', tab='r_site_habitat', lst=tuple(df_rSitHab.id_geom_site)) # index_return = pd.read_sql( # sql = sql, # con = con_ps) \ # .rename(columns={'id':'id_site_hab'}) # df_habF = pd.merge(df_habF, index_return, on=['id_geom_site','n_hab','index'], how='left') # df_rHabCb = df_habF[['id_site_hab', 'code_hab']] # if not isin_bdd: # df_rHabCb.to_sql( # name='r_hab_cb', # con=con_ps, # schema='ps', # if_exists='append', # index=False, # method='multi', # # index_label='id' # ) # # parametre # columns_select = ['id_site','statut','pratiques','niv_embro','recouvmnt'] # df_Sparam = df[columns_select].copy() # if 's_p_brouss' in df.columns: # df_Sparam = df[columns_select + ['s_p_brouss','surf_emb']].copy() # df_Sparam = pd.merge(df_Sparam, idgeom[['id_geom_site','id_site']],how='left', on='id_site') # df_prm = get_param() # df_stt = df_Sparam[['id_site','id_geom_site','statut']].copy() # df_prt = df_Sparam[['id_site','id_geom_site','pratiques']].copy() # df_rcv = df_Sparam[['id_site','id_geom_site','recouvmnt']].copy() # df_brs = df_Sparam[['id_site','id_geom_site','niv_embro']].copy() # if 's_p_brouss' in df_Sparam.columns: # df_brs = df_Sparam[['id_site','id_geom_site','niv_embro','s_p_brouss']].copy() # if not df_brs.empty: # param = df_prm[df_prm.type_param=='embroussaillement'].copy() # # if not (df_brs.s_p_brouss.unique() < 3).all(): # # # Conversion des pourcentages en identifiant de fourchette # # param[['min', 'max']] = param.desc.str.split(' à ',expand=True) # # param.loc[param['min'].str.contains('<'),'max'] = param.loc[param['min'].str.contains('<'),'min'] # # param.loc[param['min'].str.contains('<'),'min'] = '0' # # param.loc[param['min'].str.contains('>'),'max'] = '100' # # param[['min', 'max']] = param[['min', 'max']].replace(r'[\%\<\>]','',regex=True).astype(int) # # df_brs['tx_brouss'] = df_brs['s_p_brouss'].copy() # # for r,row in param.iterrows(): # # df_brs.loc[(df_brs.tx_brouss > row['min']) & (df_brs.tx_brouss <= row['max']), 's_p_brouss'] = row.param # df_brs.niv_embro = df_brs.niv_embro.replace(list(param.param.astype(int)), param.id.tolist()) # df_brs.rename(columns={'niv_embro':'id_param','s_p_brouss':'taux'},inplace=True) # if not df_rcv.empty: # param = df_prm[df_prm.type_param=='recouvrement'] # df_rcv.recouvmnt = df_rcv.recouvmnt.replace(list(param.param.astype(int)), param.id.tolist()) # df_rcv.rename(columns={'recouvmnt':'id_param'},inplace=True) # if not df_prt.empty: # param = df_prm[df_prm.type_param=='pratique'] # df_prt.pratiques = df_prt.pratiques.str.lower() # df_prt.pratiques.replace(['paturage'],['pâturage'], regex=True, inplace=True) # # dissociation des multi-pratiques # d1 = df_prt.loc[df_prt.pratiques.str.contains('et'),].copy() # d1[['p1', 'p2']] = d1['pratiques'].str.split(' et ', expand=True) # d2 = df_prt.loc[df_prt.pratiques.str.contains('ou'),].copy() # d2[['p1', 'p2']] = d2['pratiques'].str.split(' ou ', expand=True) # d = pd.concat([d1,d2]) # # Uniformisation des champs p1 et p2 avant ajout de "?" # d.loc[(d.pratiques.str.contains('\?')),['p1','p2']] = d.loc[(d.pratiques.str.contains('\?')),['p1','p2']].replace(' \?', '', regex=True) # # Ajout de "?" # d.loc[(d.pratiques.str.contains('\?|ou')),['p1','p2']] = d.loc[(d.pratiques.str.contains('\?|ou')),['p1','p2']] + ' ?' # dd = pd.concat([ # d[['id_site','id_geom_site','p1']].rename(columns={'p1':'pratiques'}), # d[['id_site','id_geom_site','p2']].rename(columns={'p2':'pratiques'}) # ]).sort_index() # df_prt= pd.concat([ # df_prt.drop(index=dd.index.unique(),axis=0), # dd # ]) # df_prt.loc[ # df_prt.pratiques.str.contains('caprin|bovin|ovin|equin') & # (~df_prt.pratiques.str.contains('pâturage')), ['pratiques'] # ] = 'pâturage ' + df_prt.loc[ # df_prt.pratiques.str.contains('caprin|bovin|ovin|equin') & # (~df_prt.pratiques.str.contains('pâturage')), ['pratiques'] ] # df_prt.pratiques = df_prt.pratiques.replace(list(param.param), param.id.tolist()) # df_prt.rename(columns={'pratiques':'id_param'},inplace=True) # if not df_stt.empty: # param = df_prm[df_prm.type_param=='statut'] # df_stt.statut = df_stt.statut.replace(list(param.param), param.id.tolist()) # df_stt.rename(columns={'statut':'id_param'},inplace=True) # df_SparamF = pd.concat([df_stt,df_prt,df_rcv,df_brs]) # if not isin_bdd: # df_SparamF.drop(columns=['id_site']).to_sql( # name='r_site_param', # con=con_ps, # schema='ps', # if_exists='append', # index=False, # method='multi', # # index_label='id' # ) # # Legende Carto ! # df_leg = pd.DataFrame() # if 'legende' in df.columns and 'leg_carto' in df.columns: # df_leg = df[['id_site', 'leg_carto', 'legende']].copy() # df_leg.legende = df_leg.legende.replace(r'[\d+\.\|\(\)]','',regex=True).str.strip() # df_leg.legende = df_leg.legende.replace(' ',' ',regex=True) # df_leg[~df_leg.leg_carto.eq(df_leg.legende)] # elif 'leg_carto' in df.columns and 'legende' not in df.columns : # df_leg = df[['id_site', 'leg_carto']].copy() # elif 'legende' in df.columns and 'legende' not in df.columns : # df_leg = df[['id_site', 'legende']].copy() # if 'legende' in df.columns: # df_leg.legende = df_leg.legende.replace(r'[\d+\.\|\(\)]','',regex=True).str.strip() # df_leg.legende = df_leg.legende.replace(' ',' ',regex=True) # df_leg = pd.merge(df_leg, idgeom[['id_geom_site','id_site']],how='left', on='id_site') # # ... # # ... # # ... # if not isin_bdd: # df_leg.drop(columns=['id_site']).to_sql( # name='r_site_param', # con=con_ps, # schema='ps', # if_exists='append', # index=False, # method='multi', # # index_label='id' # ) # lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"', # '"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"', # '"PS_VERCORS_CEN38_2011"','"cr_PS_CBNA_habitats_aggreg_06_2020"','"c_ps_inventaire_bievre_valloire"' # ] # df = pd.DataFrame() # for tab in lst_tab: # sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=tab) # tmp = gpd.read_postgis( # sql = sql, # con = con # ) # df = pd.concat([df,tmp[['pratiques']] ]) # df.pratiques = df.pratiques.replace(['paturage', 'Paturage', 'Paturâge'],['pâturage', 'Pâturage', 'Pâturage'], regex=True) # df.pratiques = df.pratiques.str[0].str.upper() + df.pratiques.str[1:] # df.drop_duplicates(inplace=True) # df.dropna(inplace=True) # df.to_excel('~/Documents/tmp/pratiques_pelouses_seches.xlsx', index=False) # ###################################### # ###### UPDATE EMBROUSSAILLEMENT ###### # ###################################### # from pycen import update_to_sql # brs = df_brs.copy() # sql = ('SELECT id,id_geom_site FROM {sch}.{tab} ' # 'WHERE id_geom_site IN {gsit} ' # 'AND id_param IN {param};').format( # sch='ps', tab='r_site_param', # gsit=tuple(df_brs.id_geom_site.unique()), # param=tuple(df_brs.id_param.unique()) ) # data = pd.read_sql(sql=sql, con=con_ps) # brs = pd.merge(brs,data, on='id_geom_site').drop(columns=['id_site']) # update_to_sql(brs,con_ps,table_name='r_site_param',schema_name='ps',key_name=['id','id_geom_site']) # # sql = 'SELECT *, ST_ClusterDBSCAN(geom::geometry, eps := 1000, minpoints := 1) over () AS cluster_id FROM pelouse_seche.c_ps_inventaire_agreg' # # df = gpd.read_postgis(sql,con) # # df.sort_values('cluster_id', inplace=True) # # # df2 = gpd.GeoDataFrame({'geom':[]},geometry='geom', crs='EPSG:2154') # # df2 = gpd.GeoSeries() # # for i in df.cluster_id.unique().tolist(): # # print(i) # # tmp = gpd.GeoSeries(df.loc[df.cluster_id == i, 'geom'].unary_union) # # df2 = df2.append(tmp) # # df2 = gpd.GeoDataFrame({'geom':df2},geometry='geom', crs='EPSG:2154') # # df3 = df2.copy() # # df3 = df3.buffer(500) # # df3.to_file('/home/colas/Documents/tmp/cluster_500.shp') # # df2.to_file('/home/colas/Documents/tmp/cluster_withoutbuffer.shp')