#!/usr/bin/env python3 # -*- coding: UTF-8 -*- #Nom : : recup_ps.py #Description : #Copyright : 2021, CEN38 #Auteur : Colas Geier #Version : 1.0 # import re from contextlib import AsyncExitStack from os import register_at_fork import types from numpy.lib.shape_base import expand_dims import pandas as pd import geopandas as gpd from pandas.io.pytables import incompatibility_doc from shapely import wkb import numpy as np from sqlalchemy.sql.expression import column # from pycen import bdd from sys import exit import pycen # from sqlalchemy import create_engine # from sqlalchemy.engine import URL # from geoalchemy2 import Geometry # Parametres bdd IN # user = 'cen_admin' # pwd = '#CEN38@venir' # adr = '192.168.0.189' # base = 'bd-cen-38' # schema = 'pelouse_seche' # # schema = 'habitat' # url = URL.create('postgresql+psycopg2', # username=user, # password=pwd, # host=adr, # database=base, # ) # con = create_engine(url) con = pycen.con_bdcen con_ps = pycen.con def get_pers(): sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1' ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme') df_pers = pd.read_sql( sql = sql, con = pycen.con) df_pers.replace([None],'',inplace=True) df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom df_pers['auteur'] = df_pers['auteur'].str.strip() df_pers['organisme'] = df_pers['organisme'].str.strip() return pycen.pers.get_auteur() # def get_idgeomsite(lst_site): # ''' # :lst_site: tuple. # ''' # sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 ' # 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id ' # 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site) # df = pd.read_sql( # sql = sql, # con = pycen.con) # return df def format_legende(leg): dic = { } df = pd.DataFrame({'leg':leg}) df.replace( ['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'], ['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'], regex=True, inplace=True) return df def ident_format_date(col): return # "%d/%m/%Y" def format_date(df): df = df.copy() check_date = [x.startswith('date') or x.endswith('date') for x in df.columns] cols_date = df.columns[check_date] if 'date' not in df.columns and not cols_date.empty: # df[cols_date].replace({'/':None},inplace=True) for col in cols_date: df[col].replace({'/':None},inplace=True) df[col] = pd.to_datetime(df[col]) # df.loc[df[col].notna(),col] = pd.to_datetime(df[df[col].notna()][col]) df['date'] = df[cols_date].max(axis=1) if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"', '"PS_CHAMBARAN_CEN38_2013"']: df.date = pd.to_datetime(df.date,format='%d/%m/%Y') if table == '"PS_BELLEDONNE_CEN38_2014"': df.date = pd.to_datetime(df.date,format='%d%b%Y') if table == '"PS_BIEVRE_CEN38_2016"': df.date = df.date.astype(str).replace('\.','',regex=True) df.date = df.date.str.rsplit(' ',1).str[0]\ .replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True) df.date = pd.to_datetime(df.date,format='%d %b %Y') if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"': # df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int) df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str) df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date']) if 'annee' in df.columns and 'date' not in df.columns: # Si pas de date mais année précisée df['annee'] = df['annee'].astype(int) df['date'] = df['annee'].astype(str) + '-06-01' df.date = pd.to_datetime(df.date,format='%Y-%m-%d') elif any(df.date.astype(str).str.len() <= 4): # Si dates non-homogènes, cohexistance date&année d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique() if None in d: df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01' else: df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01' df.date = pd.to_datetime(df.date,format='%Y-%m-%d') else: df.date = pd.to_datetime(df.date) return df def normalize_auteur(lst_auteur): ''' lst_auteur : pd.Series ''' dic = { 'Lise Duconte':'DUCONTE Lise', 'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie', 'CHABERT Chloé (AVENIR)':'CHABERT Chloé', 'CHABERT Chloé (AVNEIR)':'CHABERT Chloé', 'PACHE Gilles (CBNA)':'PACHE Gilles', 'Gilles PACHE':'PACHE Gilles', 'JAMEAU Laura (AVENIR)':'JAMEAU Laura', 'MARCIAU Roger (AVENIR)':'MARCIAU Roger', 'Roger MARCIAU':'MARCIAU Roger', 'Aude Massa':'MASSA Aude', 'Aude MASSA':'MASSA Aude', 'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse', 'Romain BARTHELD':'BARTHELD Romain', 'VILLARET J.C.':'VILLARET Jean-charles', 'Jean-Charles VILLARET':'VILLARET Jean-charles', 'Fabien ANTHELME':'ANTHELME Fabien', 'Jean-Christophe GATTUS':'GATTUS Jean-christophe', 'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis', 'Jonathan MALINEAU':'MALINEAU Jonathan', 'Aurélien DAUTREY':'DAUTREY Aurélien', 'Etienne MARY':'MARY Etienne', 'Alix GUEDOU': 'GUEDOU Alix', 'BEGUIN Lucile': 'BÉGUIN Lucile', } lst_aut = lst_auteur.copy() lst_aut.replace(' & | - ',',',regex=True,inplace=True) if any(lst_aut.str.contains(',')): lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1) lst_aut.replace(dic,inplace=True) df_aut = lst_aut.str.rsplit(' ',n=1,expand=True) if df_aut.shape[1]==2: df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower() df_aut[0] = df_aut[0].str.upper() df_aut[2] = df_aut[0] df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1] lst_aut = df_aut[2].copy() else : lst_aut = df_aut[0].copy() aut = lst_aut.unique() res = [x for x in aut if x not in [*get_pers().nom_prenom]] if res: print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res) new = new_auteur(res) lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True) lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x)) return lst_aut def new_auteur(new): ''' new : list ''' df = pd.DataFrame({'auteur': new}) dic = {'aut':{}, 'org':[]} # Pour chaque auteur for pers in df.auteur: print(pers) while True: newaut = input("Insérer un nouvel auteur ? (Y/N)\n") if not newaut[0].upper() in ['Y','N']: continue else: break # Si : pas d'insertion d'un nouvel auteur if newaut[0].upper() == 'N': while True: rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n") if not rep[0].upper() in ['Y','N']: continue else: break if rep[0].upper() == 'Y': while True: org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) if not reporg in [*org.keys()]: continue else: break while True: aut = get_pers().sort_values('id') aut = aut[aut.id_organisme == reporg] aut = aut.nom_prenom.to_dict() repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut)) if not repaut in [*aut.keys()]: continue else: break dic['aut'] = {**dic['aut'],pers:aut[repaut]} dic['org'] = [*dic['org'],org[reporg]] # Autrement : insertion d'un nouvel auteur else: while True: print("Auteur : %s"%pers) rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n") if not rep[0].upper() in ['Y','N']: continue elif rep[0].upper() == 'Y': repaut = input("Saisisser un nouveau nom :\n") dic['aut'] = {**dic['aut'],pers:repaut} pers = repaut break else: dic['aut'] = {**dic['aut'],pers:pers} break while True: org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] org = {-1: 'Autre', **org} reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) if not reporg in [*org.keys()]: continue elif reporg == -1: reporg = input("Saisisser un nouvel organisme :\n") dic['org'] = [*dic['org'],reporg] insert_newOrganisme(reporg) org = pycen.pers.get_organisme() idorg = org[org.nom==reporg].index[0] insert_newAuteur(pers,idorg) break else: dic['org'] = [*dic['org'],org[reporg]] insert_newAuteur(pers,reporg) break return dic def insert_newAuteur(aut,idorg): aut = pd.Series(aut) idorg = pd.Series(idorg) df = pd.DataFrame({'aut':aut,'id_organisme':idorg}) # print(df) tmp = df.aut.str.rsplit(' ',1,expand=True) if tmp.shape[1] == 1: df[['nom']] = tmp elif tmp.shape[1] == 2: df[['nom', 'prenom']] = tmp else: raise ValueError('NOM Prénom est de longueur inconnu :\ntmp') del df['aut'] try: df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema, if_exists='append', index=False) print("Nouvel auteur inséré : '%s'"%aut) except: print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut) def insert_newOrganisme(nom): nom = pd.Series(nom) df = pd.DataFrame({'nom':nom}) try: df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema, if_exists='append', index=False) print("Nouvel organisme inséré : '%s'"%nom) except: print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom) def normalize_colname(df): df.columns = df.columns.str.lower() df.columns = df.columns.str.replace('__','_',regex=True) if df.columns.str.contains('cb_hab').any(): df.columns = df.columns.str.replace('cb_hab','code_hab',regex=True) if df.columns.str.contains('cod_hab').any(): df.columns = df.columns.str.replace('cod_hab','code_hab',regex=True) return df.rename(columns={ 'n_polygone':'ident', # 'id_site':'ident', 'code-ug':'ident', 'ident_':'ident', 'id':'ident', 'idfinal':'id_origine', 'site_code':'id_site', 'date_':'date', 'obs':'auteur', 'structur':'structure', 'pratiq_' :'pratiques', 'pratique' :'pratiques', 's_p_brous' :'%_embrous', 's_p_brouss':'%_embrous', 'taux_embrou':'%_embrous', 'tx_embrous':'%_embrous', 'niv__embro':'niv_embrous', 'niv_embro' :'niv_embrous', 'niv_embrou' :'niv_embrous', 'niv_emb' :'niv_embrous', 'embroussaillement' :'niv_embrous', 'embrouss' :'niv_embrous', 'taux_recvmt':'%_recouvmnt', 'recouvrement':'recouvmnt', 'recouvreme':'recouvmnt', 'recouvr':'recouvmnt', 'recouv' :'recouvmnt', 'recouvr_' :'recouvmnt', 'remarque' :'remarques', 'remarq_' :'remarques', 'rmq_intere' :'remarques', 'legendes' :'legende', 'legend' :'legende', 'sources' :'source', 'surf' :'surface', 'geometry' :'geom', }) def get_id_auteur(lst_author): sch = 'personnes' tabP = 'personne' tabO = 'organisme' # sql = """ # WITH str as # (SELECT id FROM {sch}.{tabO} # WHERE lower(nom) = lower(%(structure)s) OR lower(abbrev) = lower(%(structure)s)), # tmp as # (SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur, id_organisme FROM {sch}.{tabP}) # SELECT tmp.* FROM tmp,str WHERE auteur IN %(lst_author)s # AND id_organisme IN (str.id) """.format(sch=sch,tabP=tabP,tabO=tabO) sql = """ WITH tmp AS ( SELECT pers.id, TRIM(CONCAT(pers.nom,' ',pers.prenom)) AS auteur, pers.id_organisme, orga.nom organisme, orga.abbrev orga_abbrev FROM {sch}.{tab_auth} pers JOIN {sch}.{tab_orga} orga ON pers.id_organisme = orga.id ) SELECT * FROM tmp WHERE (auteur,organisme) IN %(lst_author)s OR (auteur,orga_abbrev) IN %(lst_author)s """.format(sch=sch, tab_auth=tabP, tab_orga=tabO) X = lst_author.to_dict('index') XX = [tuple(X[x].values()) for x in X] df = pd.read_sql( sql = sql, con = pycen.con, params = { 'lst_author': tuple(XX) }) return df def existing_ps(df): gdf = pycen.ps.get_sitesGeom() gdf['area'] = gdf.area # ovl = gpd.overlay(df,gdf) ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference' ovl['ovl_area'] = ovl.area ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area'] return df def define_siteOrg(df): gdf = pycen.get_districtNat()[['abrev','geom']] tmp = df[['geom']].copy().set_geometry('geom') tmp.reset_index(0, inplace=True) tmp = gpd.overlay(tmp,gdf, how='intersection') tmp['area'] = tmp.area tmp.set_index('area',inplace=True) tmp.sort_index(inplace=True) tmp.drop_duplicates(subset=['index'],keep='last',inplace=True) tmp.reset_index(drop=True,inplace=True) del tmp['geometry'] df = df.merge(tmp,how='left',left_index=True,right_on='index') del df['index'] df.rename(columns={'abrev':'org'}, inplace=True) return df # tmp[tmp.id_site==334] # 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by' def ident_newsite(df,rcvmt=10): ''' Identification des nouvelles géometries ou des mises à jours des géométries par recouvrement. MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0) ''' maj = [] if 'id_site' in df.columns: maj = df[~df.id_site.isna()] df = df[df.id_site.isna()] if df.empty: return df,df,maj # Récupération de la couche pelouses_seches en bdd. sql = 'SELECT site_code,geom FROM ps.v_pelouseseches' v_ps = gpd.read_postgis(sql, pycen.con) # Identification des superpositions new_site/old_site df_inters = gpd.sjoin(df,v_ps, how='left') del df_inters['index_right'] # news1 = df_inters[df_inters.site_code.isna()].id_origine lst_old_site = df_inters.site_code.unique() v_ps = v_ps[v_ps.site_code.isin(lst_old_site)].copy() v_ps.loc[:,'surf'] = v_ps.area # Re-définition des columns géometriques # if not isinstance(df,gpd.GeoDataFrame): # df.set_geometry('geom', inplace=True, crs=2154) # if not isinstance(v_ps,gpd.GeoDataFrame): # v_ps.set_geometry('geom', inplace=True, crs=2154) # Calcul du recouvrement tmp = gpd.overlay(v_ps,df[['id_origine','geom']],how='intersection',make_valid=True,keep_geom_type=False) if isinstance(tmp, gpd.GeoDataFrame) and tmp.geometry.name !='geom': tmp.rename_geometry('geom',inplace=True) tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf'] # Identification des sites : MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0) # Limite recouvrement = 10% tmpp = tmp[tmp.perc_rcvmt > rcvmt] site_maj = tmpp[ (~tmpp.id_origine.duplicated(keep=False)) & (~tmpp.site_code.duplicated(keep=False)) ].id_origine # site_repl = tmp[(tmp.perc_rcvmt > 10)&(tmp.id_origine.duplicated(keep=False))].id_origine.unique() # news2 = tmp[(tmp.perc_rcvmt <= 10)&(~tmp.id_origine.isin([*site_maj,*site_repl]))].id_origine # Isolement des sites à mettre à jour # maj2 = gpd.sjoin( # df[df.id_origine.isin(site_maj)], # v_ps.loc[v_ps.site_code.isin(tmpp.site_code),['site_code','geom']]) \ # .drop(columns=['index_right']) maj2 = df[df.id_origine.isin(site_maj)].merge( tmpp[['site_code','id_origine']], on=['id_origine']) maj2['id_site'] = maj2['site_code'] del maj2['site_code'] if maj : df_maj = pd.concat([maj,maj2]) else : df_maj = maj2 # Isolement des nouveaux sites orig_maj_all = df_maj.id_origine id_maj_all = df_maj.id_site df_new = df[~df.id_origine.isin(orig_maj_all)] # Isolement des correspondance new_site / old_site_toClose df_cor = tmpp.loc[ (~tmpp.site_code.isin(id_maj_all))&(~tmpp.id_origine.isin(orig_maj_all)), ['site_code','id_origine'] ] df_cor.rename(columns={'site_code':'id_site_old'}, inplace=True) return df_new, df_cor, df_maj def define_siteName(df,rcvmt=10): ''' Return df_new, df_replace, df_maj ''' dfnew, dfcor, df_maj = ident_newsite(df,rcvmt) df2 = df[~df.index.isin(df_maj.index)].copy() if not df2.empty: df2 = define_siteOrg(dfnew.copy()) df2['dept'] = '38' # df2['num'] = None if 'org' in df2.columns: for org in df2.org.unique(): tmp = pd.concat([ pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu), pycen.sites.get_sitesInfos(milieu='Trame thermophiles') ]) # tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0]) tmp = tmp[tmp.org == org] if not tmp.empty: num = tmp.num.astype(int).max() seq = range(num+1, num + df2.loc[df2.org==org].shape[0]+1) df2.loc[df2.org==org,['num']] = list(seq) else: seq = range(1, df2.loc[df2.org==org].shape[0]+1) df2.loc[df2.org==org,['num']] = list(seq) else: tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu) df2.loc[~df2.num.isna(),'num'] = df2[~df2.num.isna()].num.astype(int).astype(str) df2.loc[df2.num.str.len() == 1,'num'] = '000'+ df2.loc[df2.num.str.len() == 1,'num'] df2.loc[df2.num.str.len() == 2,'num'] = '00' + df2.loc[df2.num.str.len() == 2,'num'] df2.loc[df2.num.str.len() == 3,'num'] = '0' + df2.loc[df2.num.str.len() == 3,'num'] df2['id_site'] = df2['dept'] + df2['org'] + df2['num'] # Jointure id_site_new / id_site_old df_new = df2[~df2.id_origine.isin(dfcor.id_origine)] df_rep = df2[df2.id_origine.isin(dfcor.id_origine)] df_cor = pd.merge(dfcor,df2[['id_origine','id_site']], on='id_origine') df_cor.drop(columns=['id_origine'], inplace=True) df_cor.rename(columns={'id_site':'id_site_new'}, inplace=True) df_replace = { 'df' : df_rep, 'cor': df_cor } else: df_new = dfnew df_replace = {'df' : dfcor} return df_new, df_replace, df_maj def normalize_formatIdsite(id_site): import re df = pd.DataFrame({'id_site':id_site}) df['dept'] = '38' df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]] del df['other'] df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] df['id_site'] = df['dept'] + df['org'] + df['num'] return df.id_site def normalize_paturage(pat): return pat.str.lower().replace( ['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'], ['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True) def normalize_pratiques(prat): dic = { 'pâturage equin':'pâturage équin', 'pâturage bovin':'pâturage bovin', 'paturâge cervidé':'pâturage cervidé', 'paturâge anin':'pâturage asin', 'broyer': 'broyage', 'truffier': 'broyage', 'plantation': 'broyage', 'culture' : 'labourage', 'abandon recent': 'abandon', 'abandon verger': 'abandon', 'remblais et feu' : 'incertain', 'remblais, feu' : 'incertain', 'caprin': 'pâturage caprin', 'ovin': 'pâturage ovin', 'équin': 'pâturage équin', 'equin': 'pâturage équin', 'bovin': 'pâturage bovin', 'paturage': 'pâturage', 'paturâge': 'pâturage', 'pâturage difficile': 'pâturage', 'f' : 'fauche', 'p' : 'pâturage', '0' : 'N.D', 'i' : 'incertain', 'a' : 'abandon', 'pv': 'pâturage', 'b' : 'broyage', 'pc': 'pâturage caprin', 'po': 'pâturage ovin', 'pe': 'pâturage équin', 'pb': 'pâturage bovin', '-' : 'N.D', '0' : 'N.D', 'ND' : 'N.D', 'n.d' : 'N.D', 'chemin carrossable non goudronné' : 'N.D', 'plantation de châtaigniers' : 'broyage', } p = prat.replace(['.et','/'],[',',', '],regex=True).copy() p = p.str.lower() p = normalize_paturage(p) p = p.replace(dic) p.fillna('N.D', inplace=True) p[p.str.contains('\?| ou ',na=False)] = 'incertain' pp = p.str.split(',',expand=True) pp.columns = 'p' + pp.columns.astype(str) for c in pp.columns: pp[c] = pp[c].str.strip() pp[c] = pp[c].replace(dic) return pp def select_ID(df,sch,tab): df = df.copy().dropna(axis=1,how='any') pk = pycen.tools.__get_pkey__(pycen.con,tab,sch) if pk['constrained_columns']: pkey = pk['constrained_columns'] else : pkey = [] sql = 'SELECT %s FROM %s.%s'%(','.join([*pkey,*df.columns]),sch,tab) df.replace("'","''",regex=True,inplace=True) if 'geom' in df.columns: del df['geom'] if 'date' in df.columns: df['date'] = df['date'].astype(str) elif 'date_deb' in df.columns: df['date_deb'] = df['date_deb'].astype(str) if 'id_cb' in df.columns: df['id_cb'] = df['id_cb'].astype(str) column = '(%s)'%','.join(df.columns) df_tuple = tuple(df.itertuples(index=False, name=None)) sql = sql + ' WHERE %s IN %s'%(column,df_tuple) if 'nan' in sql: sql = sql.replace('nan','NULL') if ',)' in sql: sql = sql.replace(',)',')') if '"' in sql: sql = sql.replace('"',"'") # print(sql) return pd.read_sql( sql=sql, con=pycen.con) def insertAutorAttrs(df,colname_rsite, sch, tab): df = df.copy() df = df[['id','auteur','structure']].copy() # structure = [df.structure.unique()] # lst_author = df[['auteur','structure']].drop_duplicates() if any(df.auteur.str.contains(',')): tmp = df.auteur.str.split(',',expand=True)\ .stack().droplevel(-1)\ .str.strip() tmp = pd.DataFrame({'auteur':tmp}) df = pd.merge(df[['id','structure']],tmp,how='left',left_index=True,right_index=True) lst_author = df[['auteur','structure']]\ .drop_duplicates()\ .reset_index(drop=True) df_autor = get_id_auteur(lst_author=lst_author) for r,row in df_autor.iterrows(): df.loc[ (df.auteur.str.contains(row.auteur)) & ((df.structure==row.organisme) | (df.structure==row.orga_abbrev)), 'auteur'] = row.id del df['structure'] # df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) df.columns = [colname_rsite, 'id_auteur'] df.drop_duplicates(inplace=True) try : df.to_sql( name=tab, con=pycen.con, schema=sch, index=False, if_exists='append', method='multi' ) print('Insert author sites data OK !') return 'Insert author sites data OK !' except Exception as e: print('ERROR : News author insert impossible !') print(e) def _altertype(df1,df2,con=None,table_name=None, schema=None): if con and table_name and schema: tp = pycen.tools.__get_dtype__(con,table_name, schema) same_col = df1.columns[df1.columns.isin(df2.columns)] if 'date' in df2.columns: df2['date'] = df2['date'].astype(str) if 'geom' in same_col: same_col = same_col.drop('geom') for col in same_col: if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int: if df1.shape[0] == df2.shape[0]: df1[col] = df1[col].astype(int) else : df2[col] = df2[col].astype(float) elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float: df1[col] = df1[col].astype(float) elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str: df1[col] = df1[col].astype(str) elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == object: df1[col] = df1[col].astype(str) return [df1,df2] def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur): df_ins = df_ins.copy() col_auth = ['auteur','structure'] if 'date' in df_ins.columns: df_ins['date'] = df_ins['date'].astype(str) elif 'date_deb' in df_ins.columns: df_ins['date_deb'] = df_ins['date_deb'].astype(str) tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema) tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema) if tab_data in tab_sit: sch = pycen.sites.schema elif tab_data in tab_ps: sch = pycen.ps.schema # ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data) ids = select_ID(df_ins[[id_data]],sch,tab_data) if 'id_site' in ids.columns: if tab_data == 'r_sites_geom': col = 'id' else: col = 'id_geom_site' tp = ids.groupby(['id_site'])[col].max() ids = ids[ids[col].isin(tp)] # join_on = [id_data] same_col = df_ins.columns[df_ins.columns.isin(ids.columns)] if len(same_col) > 1 and ids[same_col].duplicated().any(): raise ValueError('Doublon détecté dans la jointure [df_ins VS ids] !') if 'geom' in same_col: same_col = same_col.drop('geom') join_on = pd.Series([*same_col,id_data]).unique() # if df_ins.shape[0] != ids.shape[0]: # join_on = same_col if 'date' in ids.columns: ids['date'] = ids['date'].astype(str) df_ins,ids = _altertype(df_ins,ids) if tab_data != 'sites': # df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*same_col], how='left') # A REVOIR df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*join_on], how='left') # A REVOIR # df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=['id_site'], how='left') # A REVOIR insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur) def insert_site(df): df = df.copy() if 'nom' not in df.columns: df['nom'] = 'Inconnu' if 'type_site' not in df.columns: df['type_site'] = 'N.D.' if 'type_milieu' not in df.columns: df['type_milieu'] = 'Pelouses sèches' # if 'remarques' not in df.columns: # df['remarques'] = None df = df[['id_site','nom','date','auteur','type_site','type_milieu', 'structure' # 'remarques' ]] drop_col = ['auteur','structure'] typs = pycen.sites._get_typ_site() typm = pycen.sites._get_typ_milieux() for t in df.type_milieu.unique(): df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0] for s in df.type_site.unique(): df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0] df['id_type_milieu'] = df['id_type_milieu'].astype(int) # df['id_type_site'] = df['id_type_site'].astype(str) df['dept'] = df.id_site.str[:2] df['org'] = df.id_site.str[2:-4] df['num'] = df.id_site.str[-4:] df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True) columns = [col['name'] for col in pycen.sites.columns_sitetab] + drop_col c = df.columns[df.columns.isin(columns)] df = df[c] print(df) try: df[df.columns.drop(drop_col)].to_sql( name = 'sites', con = pycen.con, schema = pycen.sites.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news sites OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news sites impossible !') return [df,False] def check_org_lot(lot): """ :lot: list. liste des lots return ------- DataFrame des id_lot de la liste lot. """ tab_lot = pycen.sites._get_lots() not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ] if not_intab: df = pd.DataFrame({'libelle':not_intab}) df.to_sql( name='lots', con=pycen.con, schema='sites', index=False, if_exists='append', method='multi', ) tab_lot = pycen.sites._get_lots() tab_lot = tab_lot[tab_lot.libelle.isin(lot)] return tab_lot[['id_lot','libelle']] def insert_geom(df): drop_col = ['auteur','structure'] dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'} c = ['id_site','geom','date','remarques','legende','table_org','id_origine',*drop_col] cc = df.columns[df.columns.isin(c)] df = df[cc].copy() df.set_geometry('geom', inplace=True) colindict = [ *df.columns[df.columns.isin(dic.keys())] ] if colindict: df.rename( columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}, inplace=True, ) if 'table_org' in df.columns: tab = [*df.table_org.unique()] lots = check_org_lot(tab) df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left') del df['table_org'] del df['libelle'] try: df[df.columns.drop(drop_col)].to_postgis( name = 'r_sites_geom', con = pycen.con, schema = pycen.sites.schema, if_exists = 'append', index = False, # geom_col = 'geom' ) print('Insert news geom OK !') except Exception as e: print(e) print('PS : Insert news geom impossible !') return [df,False] return [df,True] def get_idGeomSite(lst_site,ps_statut='actif'): ''' :lst_site: list or pd.Series . :ps_statut: str. ['actif', 'history', 'all']. Statut des sites à récupérer, 'actif'; Date_fin IS NULL 'history'; A une Date_fin 'all'; Tous les Sites ''' id_geom = pycen.ps.get_sitesGeom( id_site=[*lst_site], statut=ps_statut) tp = id_geom.groupby('id_site').id.max() id_geom = id_geom[id_geom.id.isin(tp)] if not id_geom.empty: id_geom = id_geom[['id','id_site']] id_geom.columns = ['id_geom_site','id_site'] return id_geom def insert_attrsPS(df,ps_statut='actif'): params = [] drop_col = ['auteur','structure'] for p in ['%_embrous','niv_embrous','%_recouvmnt','recouvmnt']: if p in df.columns: params = [*params, p] c = ['id_site','date','pratiques',*params,*drop_col] for p in ['statut','pratiques']: if p in df.columns: c = [*c, p] cc = df.columns[df.columns.isin(c)] df = df[cc].copy() if 'pratiques' in df.columns and not all(df.pratiques.isna()): df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns') del df['pratiques'] for col in params: df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True) if 'niv_embrous' in df.columns: df.niv_embrous.replace(['nan'],['N.D'],inplace=True) if 'recouvmnt' in df.columns: df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True) if 'statut' in df.columns: df.statut.replace(['ND'],['N.D'],inplace=True) # for t in df.dtypes[df.dtypes == int].index: # df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str) taux = False if '%_embrous' in df.columns: taux = True df.niv_embrous = df.niv_embrous + ';' + df['%_embrous'] del df['%_embrous'] if '%_recouvmnt' in df.columns: taux = True df.recouvmnt = df.recouvmnt + ';' + df['%_recouvmnt'] del df['%_recouvmnt'] df.set_index(['id_site','date',*drop_col], inplace=True) df = df.stack().reset_index(-1) df.columns = ['type','param'] df.loc[df.type.str.len() < 4,'type'] = 'pratique' if taux: df[['param','taux']] = df.param.str.split(';',expand=True) df.taux.replace(['nan','None'],[None,None],inplace=True) df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True) df.param.replace({'ND':'N.D'}, inplace=True) df.reset_index(inplace=True,drop=False) param = pycen.ps.get_param() typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param') for t in df.type.unique(): tmp = param[param.type == t] df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id]) df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description], [*tmp.id]) df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description.str.lower()], [*tmp.id]) df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True) df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left') # del df['id_site'] # id_site inséré dans la table depuis 19-07-2022 del df['type'] df.rename(columns={'param':'id_param'}, inplace=True) # df.loc[df.id_param=='None','id_param'] = None df.replace({'None':None}, inplace=True) df.dropna(subset=['id_param'], inplace=True) try: df[df.columns.drop(drop_col)].to_sql( name = 'r_site_param', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news attrs OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news attrs impossible !') return [df,False] def split_codehab(lst_codehab): lst = lst_codehab.copy() lst = lst.str.split('x|X|,|&|/|\(|et',expand=True) \ .stack().str.strip() \ .str.replace('\)','',regex=True)\ .droplevel(-1).reset_index(drop=False) return lst def format_codehab2insert(lst_codehab): lst = lst_codehab.copy() lst.replace('x|X|,|et','&',regex=True, inplace=True) lst.replace('&','&;',regex=True, inplace=True) lst.replace('/','/;',regex=True, inplace=True) lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1) lst.columns = ['ordre', 'code_hab'] lst.ordre = lst.ordre + 1 lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&' lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/' lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip() lst.reset_index(drop=False, inplace=True) suspect = lst.code_hab.str.split('(',expand=True) if suspect.shape[1] > 1: suspect[0] = suspect[0].str.strip() suspect[1] = suspect[1].str.replace('\)','',regex=True) suspect.columns = ['code_hab','suspect'] del lst['code_hab'] # raise Exception('habitat suspecté en développement') else: suspect.columns = ['code_hab'] del suspect['code_hab'] lst = pd.merge(lst,suspect,right_index=True,left_index=True) lst.set_index('id', inplace=True) return lst def insert_cbPS(df): drop_col = ['auteur','structure'] df = df.copy() df.date = df.date.astype(str) df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip() df.code_hab = df.code_hab.replace(['None','nan','0'],[None,None,None]) df.dropna(subset=['code_hab'],inplace=True) ids = select_ID(df[df.columns.drop(['code_hab',*drop_col])],sch=pycen.ps.schema,tab='r_site_habitat') same_col = df.columns[df.columns.isin(ids.columns)] if ids[same_col].duplicated().any(): raise ValueError('Doublon détecté dans la jointure [df VS ids] !') df,ids = _altertype(df,ids) df = df.merge(ids, on=[*same_col], how='left') df = df[['id', 'code_hab']].copy() df.set_index('id',inplace=True) # df = df.code_hab.str.split('x|,|&',expand=True) \ # .stack().str.strip() \ # .droplevel(-1).reset_index(drop=False) # df = split_codehab(df.code_hab) # df.columns = ['id_sitehab', 'code_hab'] df = format_codehab2insert(df.code_hab) df.index.name = 'id_sitehab' df.reset_index(drop=False,inplace=True) try: df.to_sql( name = 'r_hab_cb', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news codes habitats OK !') except Exception as e: print(e) print('PS : Insert news codes habitats impossible !') def insert_habPS(df,ps_statut='actif'): import re drop_col = ['auteur','structure'] c = ['id_site','date',*drop_col] cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ] df = df[cc].copy() c_nhab = df.columns[df.columns.str.contains('n_hab')] df[c_nhab] = df[c_nhab].astype(float) cc = df.columns[(~df.columns.str.contains('hab')) ] df.set_index([*cc], inplace=True) # df.set_index(['id_site','auteur','date'], inplace=True) # for t in df.dtypes[df.dtypes == int].index: # df[t] = df[t].astype(str) df = df.stack().reset_index(-1) df.columns = ['param','value'] df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]] del df['other'] df['index'] = df['index'].astype(int) df['value'] = df['value'].replace(['-'],[None]) df.dropna(subset=['value'], inplace=True) df = df.set_index(['index','param'], append=True).unstack() df = df.droplevel(0,axis=1) df.code_hab.replace(['0','None','nan'],[None,None,None], inplace=True) df.dropna(subset=['code_hab'],inplace=True) # df['n_hab'] = df['n_hab'].astype(int) # Désactiver pour insertion de données sans n_hab df.reset_index(inplace=True,drop=False) df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left') # del df['id_site'] # id_site inséré dans la table depuis 19-07-2022 # Reclassification des codes habitats / proportions d'habitats if 'n_hab' in df.columns: df.sort_values(['id_geom_site','n_hab','index'],ascending=[True,False,True], inplace=True) else: df.sort_values(['id_geom_site','index'],ascending=[True,True], inplace=True) lst = [ list(range(1, df[df.id_geom_site==d].shape[0]+1 )) for d in df.id_geom_site.unique()] flat_list = [item for sublist in lst for item in sublist] df['index'] = flat_list try: df[df.columns.drop([*drop_col,'code_hab'])].to_sql( name = 'r_site_habitat', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news attrs habitats OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news attrs habitats impossible !') return [df,False] def insert_legendCartoPS(df,ps_statut='actif'): c = ['id_site','auteur','date','leg_carto'] #,'legende'] cc = df.columns[df.columns.isin(c)] df = df[cc].copy() param = pycen.ps.get_listLegendePS() df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id]) df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left') del df['id_site'] del df['leg_carto'] try: df[df.columns.drop('auteur')].to_sql( name = 'r_site_legcarto', con = pycen.con, schema = pycen.ps.schema, if_exists = 'append', index = False, method = 'multi' ) print('Insert news legendes OK !') return [df,True] except Exception as e: print(e) print('PS : Insert news legendes impossible !') return [df,False] def filter_saisierror(df): import re tmp = df.copy() lst_iderror = [] del tmp['geom'] cc = tmp.columns[(~tmp.columns.str.contains('hab')) ] tmp.set_index([*cc], inplace=True) tmp = tmp.stack().reset_index(-1) tmp.columns = ['param','value'] tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]] del tmp['other'] tmp['index'] = tmp['index'].astype(int) tmp['value'] = tmp['value'].replace(['-'],[None]) tmp.dropna(subset=['value'], inplace=True) tmp = tmp.set_index(['index','param'], append=True).unstack() tmp = tmp.droplevel(0,axis=1) tmp.reset_index(inplace=True,drop=False) lst_error1 = [] if '_hab' in tmp.columns: tmp.hab_.replace( ['Xerobromenion erecti', 'Mesobromion', 'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',], ['Xerobromion erecti', 'Mesobromion erecti', 'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',], inplace=True) for hab in tmp.hab_.sort_values().unique(): print(hab) print(tmp[tmp.hab_ == hab].code_hab.unique()) for cod in tmp[tmp.hab_ == hab].code_hab.unique(): perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count() if perc < 0.6: if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']: continue print('\t'+cod + ' : '+ perc.astype(str)) lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()] df[df.id_site.isin(lst_error1)].to_postgis( name='incoherence_habVScodehab', con=con, schema=schema, if_exists='replace', index=False, # geom_col='geom', index_label='id_site', ) if lst_error1: print('%i'%len(lst_error1)) cb = pycen.ref_hab().get_CB() tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip() tmp.code_hab = tmp.code_hab.replace(['0'],[None]) tmp.dropna(subset=['code_hab'], inplace=True) tmpp = tmp[['id_site','index','code_hab']].copy() tmpp.set_index(['id_site','index'], inplace=True) # if any(tmpp.code_hab.str.contains('x|,|&')): # tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1) tmpp = split_codehab(tmpp.code_hab) tmpp.columns = ['id_site','index','code_hab'] tmpp = tmpp[~tmpp.code_hab.isin(cb.id)] lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()] if 'n_hab' in tmp.columns: lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()] if lst_error2: print('ERROR code_hab : %i'%len(lst_error2)) df[df.id_site.isin(lst_error2)].to_postgis( name='PB_codehab_nonPresent_dans_corineBiotope', con=con, schema=schema, if_exists='append', index=False, # geom_col='geom', index_label='id_site', ) lst_iderror = [*lst_error1,*lst_error2] lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index df.drop(lst_iderrorindex,inplace=True) return df.sort_values('id_site') def _insert_site_(df,ps_statut='actif'): df_ins, ins = insert_geom(df) if ins: insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur') df_ins, ins = insert_attrsPS(df,ps_statut) if ins: insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur') df_ins, ins = insert_habPS(df,ps_statut) if ins: if df_ins.shape[0] > 4000: df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy() df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy() insert_cbPS(df_ins1) insert_cbPS(df_ins2) else: insert_cbPS(df_ins) insert_authordata( df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat', colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur') def maj_site(df): _insert_site_(df) def new_site(df): df_ins, ins = insert_site(df) if ins: insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') _insert_site_(df) def replace_site(df, cor): # Insertion des nouveaux codes site df_ins, ins = insert_site(df) if ins: insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') # remplissage de la table de correspondance old/new site cor.to_sql( name = 'r_site_maj', con = pycen.con, schema = 'sites', if_exists = 'append', index = False, method = 'multi' ) # Insertion de la date de cloture des anciens sites df_updt = pd.merge( cor, df[['id_site','date']], how='left', left_on='id_site_new', right_on='id_site') df_updt = df_updt[['id_site_old','date']] df_updt.columns = ['id','date_fin'] pycen.update_to_sql( df = df_updt, con = pycen.con, table_name='sites', schema_name='sites', key_name='id' ) # Suite de l'insertion des sites _insert_site_(df) def check_overlaps(): ''' Vérification de la superposition des polygons ''' sql = ''' SELECT v1.site_code site_code_old, v1.geom geom_old, v2.site_code site_code_new, v2.geom geom_new FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2 WHERE ST_OVERLAPS(v1.geom,v2.geom) = TRUE AND v1.date_geom < v2.date_geom AND v1.site_code <> v2.site_code; ''' res = gpd.read_postgis(sql,con_ps,geom_col='geom_old') return res if __name__ == "__main__": isin_bdd = True schema = "habitat" lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"', '"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"', '"PS_VERCORS_CEN38_2011"' ] # from_table = '"cr_ECRIN_habitats_CBNA_2014"' from_table = None from_file = 'PS38_modifs_AG_2024.gpkg' path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/2024/' # org = from_file.split('/')[1] tutu = pd.DataFrame() if from_table is not None: for table in lst_tab: sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table) df = gpd.read_postgis( sql = sql, con = con ) if 'id' in df.columns and 'n_polygone' in df.columns: df['id'] = df['n_polygone'] del df['n_polygone'] elif 'id' in df.columns and 'polygon' in df.columns: df['id'] = df['polygon'] del df['polygon'] elif 'id' in df.columns and 'ident' in df.columns: df['id'] = df['ident'] del df['ident'] df = normalize_colname(df) if 'remarques' in df.columns and 'legende' not in df.columns: df['legende'] = df.remarques # df.rename(columns={'id':'id_site'}, inplace=True) if table == '"PS_VERCORS_CEN38_2011"': df = df[df.date != '?'].copy() # df.legende = format_legende(df.legende) df = format_date(df) if table == '"PS_CHAMBARAN_CEN38_2013"': df.type_patur = normalize_paturage(df.type_patur) df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True) df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \ df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \ df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur'] df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \ df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \ df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur'] df['pratiques'] = df.p1 df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2 df.drop(columns=['type_patur','p1','p2'], inplace=True) df['table_org'] = table[1:-1] tutu = pd.concat([tutu,df]) if from_file is not None: table = from_file.split('/')[-1]#.split('.')[0] df = gpd.read_file(path0+from_file) df = normalize_colname(df) df = format_date(df) df['table_org'] = table df['structure'] = 'CEN Isère' # df['type_pat'].replace(['Indéterminé','/'],None,inplace=True) # df.loc[df.type_pat.notna(),'pratiques'] = \ # df[df.type_pat.notna()].pratiques + ' ' + df[df.type_pat.notna()].type_pat.str.lower() tutu = pd.concat([tutu,df]) c = ['ident','id_origine','id_site','geom','auteur','hab_1','code_hab1','n_hab1', 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3', 'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6', 'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8', 'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10', 'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12', 'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14', 'hab_4','code_hab4','n_hab4','pratiques','niv_embrous', 'recouvmnt','remarques','phyto','statut','h_remarq','legende', 'leg_carto','date','annee','%_embrous','%_recouvmnt','surf_emb', 'source','structure', # 'org','dept','num', 'table_org' ] if 'ident' in df.columns and 'id_origine' not in df.columns: tutu.rename(columns={'ident':'id_origine'}, inplace=True) cols = tutu.columns[tutu.columns.isin(c)] df = tutu[cols].copy() df = df.set_geometry('geom',crs=2154) # df.code_hab3.replace({ # '43.171':'41.171', # '31.4B':'31.4' # }, inplace=True) df = pycen.tools.Polygons_to_MultiPolygon(df) df_new, df_replace, df_maj = define_siteName(df,rcvmt=10) df_new_tmp = df_new.copy() df_new = df_new_tmp[df_new_tmp.id_site.notna()].copy() if not df_new.empty: df_new = df_new[~df_new.id_site.isna()].copy() df_new.sort_index(inplace=True) df_new['auteur'] = normalize_auteur(df_new.auteur) df_new['id_origine'] = df_new['id_origine'].astype(str) df_new = filter_saisierror(df_new) if not df_maj.empty: df_maj['auteur'] = normalize_auteur(df_maj.auteur) id_col = 'id_origine' if 'id_origine' in df_maj.columns else 'id_site' df_maj['id_origine'] = df_maj[id_col].astype(str) df_maj = filter_saisierror(df_maj) if not df_replace['df'].empty: df_replace['df']['auteur'] = normalize_auteur(df_replace['df']['auteur']) df_replace['df']['id_origine'] = df_replace['df']['id_origine'].astype(str) df_replace['df'] = filter_saisierror(df_replace['df']) # for d in df.dtypes[df.dtypes == object].index: # # df[d].replace([' '], [' '],regex=True,inplace=True) # df[d] = df[d].str.normalize('NFKC') DF = df_maj.copy() Cnhab = DF.columns[DF.columns.str.startswith('n_hab')] if not all(DF[[*Cnhab]].astype(float).sum(axis=1) == 100): print( DF[ ~(DF[[*Cnhab]].astype(float).sum(axis=1) == 100) ] ) # DF.loc[ (~(DF[[*Cnhab]].sum(axis=1) == 100)),'n_hab1' ] = 20 if not df_new.empty: new_site(df_new) if not df_maj.empty: maj_site(df_maj) if not df_replace['df'].empty: replace_site( df = df_replace['df'], cor = df_replace['cor'] ) exit() ################ ## END SCRIPT ## ################ print(df) while True: res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n') if not res[0].upper() in ['Y','N']: continue else: break if res[0].upper() == 'Y': df_ins, ins = insert_site(df) if ins: insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') df_ins, ins = insert_geom(df) if ins: insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur') df_ins, ins = insert_attrsPS(df) if ins: insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur') df_ins, ins = insert_habPS(df) if ins: if df_ins.shape[0] > 4000: df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy() df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy() insert_cbPS(df_ins1) insert_cbPS(df_ins2) else: insert_cbPS(df_ins) insert_authordata( df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat', colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur') # df_ins, ins = insert_legendCartoPS(df)%_embrous # if ins: # insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur') sql = """ DELETE FROM sites.sites WHERE id in (SELECT id_site FROM sites.r_sites_geom where id_lot = 20); """ with pycen.con.begin() as cnx: cnx.execute(sql)