from os import replace import pandas as pd import numpy as np from geoalchemy2 import Geometry # import numpy as np from sqlalchemy import create_engine, text from sqlalchemy.engine import URL import geopandas as gpd # Parametres bdd user = 'cgeier' pwd = 'adm1n*bdCen' adr = '91.134.194.221' port = '5432' base = 'sicen2' bckup_user = 'postgres' bckup_pwd = '1234' bckup_adr = '192.168.0.81' bckup_port = '5455' bckup_base = 'sicen2' url = URL.create('postgresql+psycopg2', username=user, password=pwd, host=adr, database=base, ) con = create_engine(url) bckup_url = URL.create('postgresql+psycopg2', username=bckup_user, password=bckup_pwd, host=bckup_adr, database=bckup_base, ) bckup_con = create_engine(bckup_url) # con = create_engine('postgresql+psycopg2://{1}:{0}@{2}:{3}/{4}'.format(pwd, user, adr, port, base), echo=False) # bckup_con = create_engine('postgresql+psycopg2://{1}:{0}@{2}:{3}/{4}'.format(bckup_pwd, bckup_user, bckup_adr, bckup_port, bckup_base), echo=False) def update_data(df, con, sch, tab, epsg=None): columns = df.columns.to_list() frame = df.copy() frame.replace("'","''", regex=True, inplace=True) pkey = con.dialect.get_pk_constraint(con, table_name=tab, schema=sch)['constrained_columns'] for col in frame.select_dtypes(include=['object']).columns: test = frame[frame[col].str.contains("'", na=False)] if not test.empty: frame[col].replace("'", "''", inplace=True, regex=True) # print(frame[col]) if 'geom' in columns or 'geometry' in columns: if epsg or df.crs: if not epsg: epsg = df.crs.to_epsg() name_geom = df.geometry.name frame[name_geom] = 'SRID={epsg};'.format(epsg=epsg) + df[name_geom].map(str) # else: return 'No crs define in update_data or in gdf' for c, col in enumerate(columns): if c == 0: frame['insert'] = "('" + frame[col].map(str) # break else: frame['insert'] = frame['insert'] + "','" + frame[col].map(str) if c == len(columns)-1: frame['insert'] = frame['insert'] + "')" # if c == 0: # frame['insert'] = '("' + frame[col].map(str) # # break # else: # frame['insert'] = frame['insert'] + '","' + frame[col].map(str) # if c == len(columns)-1: # frame['insert'] = frame['insert'] + '")' lst_cols = ', '.join(columns) lst_vals = ','.join(frame['insert']) lst_vals.replace("\'None\'", 'NULL', inplace=True) lst_dupKey = ', '.join([col + '=EXCLUDED.' + col for col in columns]) lst_pkey = ','.join(pkey) sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) VALUES {lst_vals} ON CONFLICT ({lst_pkey}) DO UPDATE SET {lst_dupKey} ;'''.format( sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals, lst_dupKey=lst_dupKey, lst_pkey=lst_pkey) # sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) # VALUES {lst_vals} # ON CONFLICT DO NOTHING; # '''.format(sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals) try: con.execute(sql) # con.execute(text(sql)) print(''' Update OK !''') except Exception as exept: print(exept) df = pd.read_sql_table( table_name = 'personne', con = con, schema = 'md', index_col='id_personne') tmp = pd.DataFrame(df.nom.str.split(' ',1).tolist(), columns = ['nom','prenom'], index=df.index) tmp = tmp[~tmp.prenom.isnull()] df.loc[tmp.index,'prenom'] = tmp['prenom'] df.loc[tmp.index,'nom'] = tmp['nom'] df['nom'] = [nom.replace('CEN38_','') for nom in df['nom']] df['nom'] = [nom.replace('GENTIANA_','') for nom in df['nom']] df['nom'] = [nom.replace('LPO38_','') for nom in df['nom']] df['nom'] = [nom.replace('LPO_','') for nom in df['nom']] df['nom'] = [nom.replace('CENRA_','') for nom in df['nom']] df['nom'] = [nom.replace('GRPLS_','') for nom in df['nom']] rm_row = ['ISÈRE', 'Ecrin', 'FUR', 'BRIQUIR', 'BORGNE', '(FMBDS)', 'Isère', 'Rhône', '(Presonnel)', 'Monsieur', 'Batrachologique de France', '(Ecosphère)', 'PIC VERT Association', 'BAGOUSSE', '(BIOTOPE)', '(Dauphinelle)', 'Cras', '(GRPLS)', 'et Vie Sociale', '(ONCFS)', 'campagne sauvetage amphibiens'] df = df[~df.prenom.isin(rm_row)] tmp = pd.DataFrame(df.prenom.str.split(' ',1).tolist(), columns = ['nom','prenom'], index=df.index) tmp = tmp[~tmp.prenom.isnull()] tmp.drop(index=[100032,8628,8645,4238,8058,8070,8353,1099,1081,1323,1324], inplace=True) df.loc[tmp.index,'nom'] = df.loc[tmp.index,'nom'] + ' ' + tmp['nom'] df.loc[tmp.index,'prenom'] = tmp['prenom'] tmp = pd.DataFrame( df.loc[df.nom=='Abbé',['nom','prenom']].prenom.str.split(' ',1).tolist(), columns = ['prenom','nom'], index=df.loc[df.nom=='Abbé',['nom','prenom']].index) tmp2 = df.loc[df.nom=='Abbé',['nom','prenom']] df.loc[tmp.index,'nom'] = tmp['nom'] df.loc[tmp.index,'prenom'] = tmp2['nom'] + ' ' + tmp['prenom'] df.reset_index(inplace=True) tmp = df.copy() tmp = tmp[['id_personne','nom','prenom', 'id_structure']] # update_data(tmp,con,sch='md',tab='personne') id_old = '8044' id_new = '1000014' sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{0}' OR validateur = {0};'''.format(id_old) # sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;' pd.read_sql( sql=sql, con=con )[['observateur','validateur']] # df[['prenom', 'nom']].values.tolist() # sql = '''UPDATE saisie.saisie_observation SET validateur = 100077 # WHERE validateur = 1110;''' try: con.execute(sql) # con.execute(text(sql)) print(''' Update OK !''') except Exception as exept: print(exept) tmpx = df.copy() tmpx = tmpx[['id_personne','nom','prenom','id_structure']] tutux = tmpx.drop_duplicates(['nom','prenom','id_structure'], keep=False) tomx = tmpx[~tmpx.id_personne.isin(tutux.id_personne)] tomx = tomx.sort_values('nom') for nom in tomx.nom.unique().tolist(): tmp1 = tomx[tomx.nom==nom] keep = max(tmp1.id_personne) rep = min(tmp1.id_personne) sql = '''UPDATE saisie.saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}';'''.format( keep=keep, replace=rep ) # sql = '''UPDATE saisie.saisie_observation SET validateur = 100077 # WHERE validateur = 1110;''' try: con.execute(sql) # con.execute(text(sql)) print(''' Update OK !''') except Exception as exept: print(exept) sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format( replace=rep ) # sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;' test = pd.read_sql( sql=sql, con=con )[['observateur','validateur']] if test.empty: print(''' OK for %s'''%nom) else: print('ERROR !!!!!!!!!') break print(nom) sql='''SELECT * FROM md.personne WHERE nom like '{nom}%';'''.format(nom=nom) tutu = pd.read_sql( sql=sql, con=con ) if tutu.shape[0] > 1: df print(''' {id_personne},{nom},{prenom},{id_structure}'''.format( id_personne=id_personne,nom=nom,prenom=prenom,id_structure=id_structure)) from sqlalchemy import create_engine, text from sqlalchemy.engine import URL from shapely.geometry import Point import matplotlib.pyplot as plt import contextily as cx import geopandas as gpd import pandas as pd # Parametres bdd user = 'cgeier' pwd = 'adm1n*bdCen' adr = '91.134.194.221' port = '5432' base = 'sicen2' url = URL.create('postgresql+psycopg2', username=user, password=pwd, host=adr, database=base, ) con = create_engine(url) df1 = pd.read_sql_table( table_name = 'saisie_observation', con = con, schema = 'saisie', index_col='id_obs') geometry = [Point(xy) for xy in zip(df1['longitude'], df['latitude'])] crs = 'epsg:4326' df = gpd.GeoDataFrame(df1, crs=crs, geometry=geometry) df = df.to_crs(epsg=3857) df_unkeep = df.drop_duplicates(keep=False) df_clean = df.drop_duplicates(keep='first') df[~df.index.isin(df_clean.index)].sort_values('date_obs').index.values df[~df.index.isin(df_unkeep.index)].sort_values('date_obs').index.values ax = df[~df.index.isin(df_unkeep.index)][['geometry']].plot() cx.add_basemap(ax) plt.show() df.sort_index(inplace=True) data_drop = df[~df.index.isin(df.drop_duplicates(keep=False).index)] tmp = [i.split('&') for i in df['observateur'].unique() ] lst_pers = [item for sublist in tmp for item in sublist] df_pers = pd.DataFrame(lst_pers, columns=['id_pers']).drop_duplicates() sql='''SELECT * FROM md.personne WHERE id_personne IN ({ids});'''.format(ids=','.join(df_pers.id_pers)) tutu = pd.read_sql( sql=sql, con=con ) df_pers['id_pers'] = df_pers['id_pers'].astype(int) df_pers[~df_pers.id_pers.isin(tutu.id_personne)] tom = toto.copy() tom = tom[tom.observateur.str.contains('&')] tom['observateur'] = [idx.replace('1043','100086') for idx in tom['observateur']] tom['observateur'] = [idx.replace('6191','1000002') for idx in tom['observateur']] tom['observateur'] = [idx.replace('6051','1000013') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1070','1000011') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1088','100043') for idx in tom['observateur']] tom['observateur'] = [idx.replace('6045','1000004') for idx in tom['observateur']] tom['observateur'] = [idx.replace('6121','1000009') for idx in tom['observateur']] tom['observateur'] = [idx.replace('6355','1000003') for idx in tom['observateur']] tom['observateur'] = [idx.replace('8632','1000005') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1034','100007') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1110','100077') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1075','1000022') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1188','100038') for idx in tom['observateur']] tom['observateur'] = [idx.replace('8271','100017') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1049','100053') for idx in tom['observateur']] tom['observateur'] = [idx.replace('8328','100034') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1102','100062') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1016','1000001') for idx in tom['observateur']] tom['observateur'] = [idx.replace('1187','100037') for idx in tom['observateur']] tom['observateur'] = [idx.replace('8018','1000008') for idx in tom['observateur']] tom['observateur'] = [idx.replace('4233','100067') for idx in tom['observateur']] tom['observateur'] = [idx.replace('8254','1000012') for idx in tom['observateur']] tom['observateur'] = [idx.replace('8015','100016') for idx in tom['observateur']] df.sort_index(inplace=True) tom.sort_index(inplace=True) df.loc[df.index.isin(tom.index), 'observateur'] = tom['observateur'] df.reset_index(inplace=True) tmp = df.copy() tmp = tmp[['id_obs','observateur']] update_data(tmp,con,sch='saisie',tab='saisie_observation') # Check user in suivi_saisie_observation existent tous! from datetime import datetime as dt df = pd.read_sql_table( table_name = 'suivi_saisie_observation', con = con, schema = 'saisie', ) df.sort_values('date_operation', ascending=False, inplace=True) x = '2021-05-25 22:00:00' y = '2021-05-21 12:30:00' x = dt.strptime(x, '%Y-%m-%d %H:%M:%S') y = dt.strptime(y, '%Y-%m-%d %H:%M:%S') ddf = df[(df.date_operation < x) & (df.date_operation > y)] ddf = ddf[['operation','utilisateur', 'id_obs', 'observateur', 'validateur', 'numerisateur']] # Get all "id" observateur tmp_obs = [i.split('&') for i in ddf['observateur'].dropna().unique() ] lst_pers_obs = [item for sublist in tmp_obs for item in sublist] tmp_val = [i.split('&') for i in ddf['validateur'].dropna().unique().astype(int).astype(str) ] lst_pers_val = [item for sublist in tmp_val for item in sublist] tmp_num = [i.split('&') for i in ddf['numerisateur'].dropna().unique().astype(str) ] lst_pers_num = [item for sublist in tmp_num for item in sublist] lst_pers = lst_pers_obs + lst_pers_val + lst_pers_num df_pers = pd.DataFrame(lst_pers, columns=['id_pers']).drop_duplicates() # Check if personnes exist pers_not_exist = [] for i in df_pers.id_pers: sql = 'SELECT id_personne,prenom, nom, id_structure FROM md.personne WHERE id_personne = %s;'%i res = pd.read_sql( sql = sql, con = con ) if res.empty: print(i) pers_not_exist += [i] sql = '''SELECT * FROM saisie.suivi_saisie_observation WHERE utilisateur = 'inconnu' AND date_operation < '2021-05-25 22:00:00' AND date_operation > '2021-05-21 12:30:00';''' pd.read_sql( sql=sql, con=con ) sql = '''UPDATE saisie.suivi_saisie_observation SET utilisateur = 'colas.geier@cen-isere.org' WHERE utilisateur = 'inconnu' AND date_operation < '2021-05-25 22:00:00' AND date_operation > '2021-05-21 12:30:00';''' try: con.execute(sql) # con.execute(text(sql)) print(''' Update OK !''') except Exception as exept: print(exept) rep = '8044' keep = '1000014' sql_check ='''SELECT * FROM saisie.saisie_observation WHERE observateur like '%{replace}%' OR validateur = {replace};'''.format( replace=rep ) sql_check ='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format( replace=rep ) sql_updt = ''' UPDATE saisie.saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}'; UPDATE saisie.saisie_observation SET numerisateur = {keep} WHERE numerisateur = {replace}; '''.format( keep=keep, replace=rep ) # sql = '''UPDATE saisie.saisie_observation SET validateur = 100077 # WHERE validateur = 1110;''' test = pd.read_sql( sql=sql_check, con=con )[['observateur','validateur']] if test.empty: print(''' OK for %s in table saisie'''%nom) else: try: con.execute(sql_updt) # con.execute(text(sql)) print(''' Update OK !''') except Exception as exept: print(exept) sql_check2 ='''SELECT * FROM saisie.suivi_saisie_observation WHERE observateur = '{replace}' OR validateur = '{replace}';'''.format( replace=rep ) sql_updt2 = ''' UPDATE saisie.suivi_saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}'; UPDATE saisie.suivi_saisie_observation SET numerisateur = {keep} WHERE numerisateur = {replace}; '''.format( keep=keep, replace=rep ) test2 = pd.read_sql( sql=sql_check2, con=con )[['observateur','validateur']] if test2.empty: print(''' OK for %s in table saisie'''%nom) else: try: con.execute(sql_updt2) # con.execute(text(sql)) print(''' Update OK !''') except Exception as exept: print(exept) sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format( replace=rep ) # sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;' test = pd.read_sql( sql=sql, con=con )[['observateur','validateur']] if test.empty: print(''' OK for %s'''%nom) else: print('ERROR !!!!!!!!!') # break