449 lines
15 KiB
Python
449 lines
15 KiB
Python
from os import replace
|
|
import pandas as pd
|
|
import numpy as np
|
|
from geoalchemy2 import Geometry
|
|
# import numpy as np
|
|
from sqlalchemy import create_engine, text
|
|
from sqlalchemy.engine import URL
|
|
import geopandas as gpd
|
|
|
|
|
|
# Parametres bdd
|
|
user = 'cgeier'
|
|
pwd = 'adm1n*bdCen'
|
|
adr = '91.134.194.221'
|
|
port = '5432'
|
|
base = 'sicen2'
|
|
|
|
bckup_user = 'postgres'
|
|
bckup_pwd = '1234'
|
|
bckup_adr = '192.168.0.81'
|
|
bckup_port = '5455'
|
|
bckup_base = 'sicen2'
|
|
|
|
|
|
url = URL.create('postgresql+psycopg2',
|
|
username=user,
|
|
password=pwd,
|
|
host=adr,
|
|
database=base,
|
|
)
|
|
con = create_engine(url)
|
|
bckup_url = URL.create('postgresql+psycopg2',
|
|
username=bckup_user,
|
|
password=bckup_pwd,
|
|
host=bckup_adr,
|
|
database=bckup_base,
|
|
)
|
|
bckup_con = create_engine(bckup_url)
|
|
# con = create_engine('postgresql+psycopg2://{1}:{0}@{2}:{3}/{4}'.format(pwd, user, adr, port, base), echo=False)
|
|
# bckup_con = create_engine('postgresql+psycopg2://{1}:{0}@{2}:{3}/{4}'.format(bckup_pwd, bckup_user, bckup_adr, bckup_port, bckup_base), echo=False)
|
|
|
|
|
|
def update_data(df, con, sch, tab, epsg=None):
|
|
columns = df.columns.to_list()
|
|
frame = df.copy()
|
|
frame.replace("'","''", regex=True, inplace=True)
|
|
pkey = con.dialect.get_pk_constraint(con, table_name=tab, schema=sch)['constrained_columns']
|
|
for col in frame.select_dtypes(include=['object']).columns:
|
|
test = frame[frame[col].str.contains("'", na=False)]
|
|
if not test.empty:
|
|
frame[col].replace("'", "''", inplace=True, regex=True)
|
|
# print(frame[col])
|
|
|
|
|
|
if 'geom' in columns or 'geometry' in columns:
|
|
if epsg or df.crs:
|
|
if not epsg:
|
|
epsg = df.crs.to_epsg()
|
|
name_geom = df.geometry.name
|
|
frame[name_geom] = 'SRID={epsg};'.format(epsg=epsg) + df[name_geom].map(str)
|
|
# else: return 'No crs define in update_data or in gdf'
|
|
|
|
for c, col in enumerate(columns):
|
|
if c == 0:
|
|
frame['insert'] = "('" + frame[col].map(str)
|
|
# break
|
|
else:
|
|
frame['insert'] = frame['insert'] + "','" + frame[col].map(str)
|
|
if c == len(columns)-1:
|
|
frame['insert'] = frame['insert'] + "')"
|
|
# if c == 0:
|
|
# frame['insert'] = '("' + frame[col].map(str)
|
|
# # break
|
|
# else:
|
|
# frame['insert'] = frame['insert'] + '","' + frame[col].map(str)
|
|
# if c == len(columns)-1:
|
|
# frame['insert'] = frame['insert'] + '")'
|
|
|
|
lst_cols = ', '.join(columns)
|
|
lst_vals = ','.join(frame['insert'])
|
|
lst_vals.replace("\'None\'", 'NULL', inplace=True)
|
|
lst_dupKey = ', '.join([col + '=EXCLUDED.' + col for col in columns])
|
|
lst_pkey = ','.join(pkey)
|
|
|
|
sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) VALUES {lst_vals} ON CONFLICT ({lst_pkey}) DO UPDATE SET {lst_dupKey} ;'''.format(
|
|
sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals, lst_dupKey=lst_dupKey, lst_pkey=lst_pkey)
|
|
# sql = '''INSERT INTO {sch}.{tab} ({lst_cols})
|
|
# VALUES {lst_vals}
|
|
# ON CONFLICT DO NOTHING;
|
|
# '''.format(sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals)
|
|
try:
|
|
con.execute(sql)
|
|
# con.execute(text(sql))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print(exept)
|
|
|
|
|
|
df = pd.read_sql_table(
|
|
table_name = 'personne',
|
|
con = con,
|
|
schema = 'md',
|
|
index_col='id_personne')
|
|
|
|
tmp = pd.DataFrame(df.nom.str.split(' ',1).tolist(), columns = ['nom','prenom'], index=df.index)
|
|
tmp = tmp[~tmp.prenom.isnull()]
|
|
df.loc[tmp.index,'prenom'] = tmp['prenom']
|
|
df.loc[tmp.index,'nom'] = tmp['nom']
|
|
df['nom'] = [nom.replace('CEN38_','') for nom in df['nom']]
|
|
df['nom'] = [nom.replace('GENTIANA_','') for nom in df['nom']]
|
|
df['nom'] = [nom.replace('LPO38_','') for nom in df['nom']]
|
|
df['nom'] = [nom.replace('LPO_','') for nom in df['nom']]
|
|
df['nom'] = [nom.replace('CENRA_','') for nom in df['nom']]
|
|
df['nom'] = [nom.replace('GRPLS_','') for nom in df['nom']]
|
|
|
|
rm_row = ['ISÈRE', 'Ecrin', 'FUR', 'BRIQUIR', 'BORGNE', '(FMBDS)', 'Isère', 'Rhône', '(Presonnel)', 'Monsieur', 'Batrachologique de France', '(Ecosphère)',
|
|
'PIC VERT Association', 'BAGOUSSE', '(BIOTOPE)', '(Dauphinelle)', 'Cras', '(GRPLS)', 'et Vie Sociale', '(ONCFS)', 'campagne sauvetage amphibiens']
|
|
df = df[~df.prenom.isin(rm_row)]
|
|
tmp = pd.DataFrame(df.prenom.str.split(' ',1).tolist(), columns = ['nom','prenom'], index=df.index)
|
|
tmp = tmp[~tmp.prenom.isnull()]
|
|
tmp.drop(index=[100032,8628,8645,4238,8058,8070,8353,1099,1081,1323,1324], inplace=True)
|
|
df.loc[tmp.index,'nom'] = df.loc[tmp.index,'nom'] + ' ' + tmp['nom']
|
|
df.loc[tmp.index,'prenom'] = tmp['prenom']
|
|
tmp = pd.DataFrame(
|
|
df.loc[df.nom=='Abbé',['nom','prenom']].prenom.str.split(' ',1).tolist(),
|
|
columns = ['prenom','nom'],
|
|
index=df.loc[df.nom=='Abbé',['nom','prenom']].index)
|
|
tmp2 = df.loc[df.nom=='Abbé',['nom','prenom']]
|
|
df.loc[tmp.index,'nom'] = tmp['nom']
|
|
df.loc[tmp.index,'prenom'] = tmp2['nom'] + ' ' + tmp['prenom']
|
|
df.reset_index(inplace=True)
|
|
tmp = df.copy()
|
|
tmp = tmp[['id_personne','nom','prenom', 'id_structure']]
|
|
# update_data(tmp,con,sch='md',tab='personne')
|
|
|
|
id_old = '8044'
|
|
id_new = '1000014'
|
|
sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{0}' OR validateur = {0};'''.format(id_old)
|
|
# sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;'
|
|
pd.read_sql(
|
|
sql=sql,
|
|
con=con
|
|
)[['observateur','validateur']]
|
|
# df[['prenom', 'nom']].values.tolist()
|
|
|
|
|
|
# sql = '''UPDATE saisie.saisie_observation SET validateur = 100077
|
|
# WHERE validateur = 1110;'''
|
|
try:
|
|
con.execute(sql)
|
|
# con.execute(text(sql))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print(exept)
|
|
|
|
|
|
tmpx = df.copy()
|
|
tmpx = tmpx[['id_personne','nom','prenom','id_structure']]
|
|
tutux = tmpx.drop_duplicates(['nom','prenom','id_structure'], keep=False)
|
|
tomx = tmpx[~tmpx.id_personne.isin(tutux.id_personne)]
|
|
tomx = tomx.sort_values('nom')
|
|
|
|
for nom in tomx.nom.unique().tolist():
|
|
tmp1 = tomx[tomx.nom==nom]
|
|
keep = max(tmp1.id_personne)
|
|
rep = min(tmp1.id_personne)
|
|
sql = '''UPDATE saisie.saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}';'''.format(
|
|
keep=keep, replace=rep
|
|
)
|
|
# sql = '''UPDATE saisie.saisie_observation SET validateur = 100077
|
|
# WHERE validateur = 1110;'''
|
|
try:
|
|
con.execute(sql)
|
|
# con.execute(text(sql))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print(exept)
|
|
|
|
sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format(
|
|
replace=rep
|
|
)
|
|
# sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;'
|
|
test = pd.read_sql(
|
|
sql=sql,
|
|
con=con
|
|
)[['observateur','validateur']]
|
|
if test.empty:
|
|
print(''' OK for %s'''%nom)
|
|
|
|
else:
|
|
print('ERROR !!!!!!!!!')
|
|
break
|
|
|
|
|
|
print(nom)
|
|
sql='''SELECT * FROM md.personne WHERE nom like '{nom}%';'''.format(nom=nom)
|
|
tutu = pd.read_sql(
|
|
sql=sql,
|
|
con=con
|
|
)
|
|
if tutu.shape[0] > 1:
|
|
df
|
|
print('''
|
|
{id_personne},{nom},{prenom},{id_structure}'''.format(
|
|
id_personne=id_personne,nom=nom,prenom=prenom,id_structure=id_structure))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import create_engine, text
|
|
from sqlalchemy.engine import URL
|
|
from shapely.geometry import Point
|
|
import matplotlib.pyplot as plt
|
|
import contextily as cx
|
|
import geopandas as gpd
|
|
import pandas as pd
|
|
# Parametres bdd
|
|
user = 'cgeier'
|
|
pwd = 'adm1n*bdCen'
|
|
adr = '91.134.194.221'
|
|
port = '5432'
|
|
base = 'sicen2'
|
|
url = URL.create('postgresql+psycopg2',
|
|
username=user,
|
|
password=pwd,
|
|
host=adr,
|
|
database=base,
|
|
)
|
|
con = create_engine(url)
|
|
|
|
df1 = pd.read_sql_table(
|
|
table_name = 'saisie_observation',
|
|
con = con,
|
|
schema = 'saisie',
|
|
index_col='id_obs')
|
|
|
|
geometry = [Point(xy) for xy in zip(df1['longitude'], df['latitude'])]
|
|
crs = 'epsg:4326'
|
|
df = gpd.GeoDataFrame(df1, crs=crs, geometry=geometry)
|
|
df = df.to_crs(epsg=3857)
|
|
df_unkeep = df.drop_duplicates(keep=False)
|
|
df_clean = df.drop_duplicates(keep='first')
|
|
|
|
df[~df.index.isin(df_clean.index)].sort_values('date_obs').index.values
|
|
df[~df.index.isin(df_unkeep.index)].sort_values('date_obs').index.values
|
|
|
|
ax = df[~df.index.isin(df_unkeep.index)][['geometry']].plot()
|
|
cx.add_basemap(ax)
|
|
plt.show()
|
|
|
|
df.sort_index(inplace=True)
|
|
data_drop = df[~df.index.isin(df.drop_duplicates(keep=False).index)]
|
|
|
|
tmp = [i.split('&') for i in df['observateur'].unique() ]
|
|
lst_pers = [item for sublist in tmp for item in sublist]
|
|
df_pers = pd.DataFrame(lst_pers, columns=['id_pers']).drop_duplicates()
|
|
|
|
sql='''SELECT * FROM md.personne WHERE id_personne IN ({ids});'''.format(ids=','.join(df_pers.id_pers))
|
|
tutu = pd.read_sql(
|
|
sql=sql,
|
|
con=con
|
|
)
|
|
df_pers['id_pers'] = df_pers['id_pers'].astype(int)
|
|
df_pers[~df_pers.id_pers.isin(tutu.id_personne)]
|
|
|
|
|
|
|
|
|
|
tom = toto.copy()
|
|
tom = tom[tom.observateur.str.contains('&')]
|
|
tom['observateur'] = [idx.replace('1043','100086') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('6191','1000002') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('6051','1000013') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1070','1000011') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1088','100043') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('6045','1000004') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('6121','1000009') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('6355','1000003') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('8632','1000005') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1034','100007') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1110','100077') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1075','1000022') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1188','100038') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('8271','100017') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1049','100053') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('8328','100034') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1102','100062') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1016','1000001') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('1187','100037') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('8018','1000008') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('4233','100067') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('8254','1000012') for idx in tom['observateur']]
|
|
tom['observateur'] = [idx.replace('8015','100016') for idx in tom['observateur']]
|
|
|
|
df.sort_index(inplace=True)
|
|
tom.sort_index(inplace=True)
|
|
df.loc[df.index.isin(tom.index), 'observateur'] = tom['observateur']
|
|
df.reset_index(inplace=True)
|
|
tmp = df.copy()
|
|
tmp = tmp[['id_obs','observateur']]
|
|
update_data(tmp,con,sch='saisie',tab='saisie_observation')
|
|
|
|
|
|
|
|
|
|
|
|
# Check user in suivi_saisie_observation existent tous!
|
|
from datetime import datetime as dt
|
|
df = pd.read_sql_table(
|
|
table_name = 'suivi_saisie_observation',
|
|
con = con,
|
|
schema = 'saisie',
|
|
)
|
|
df.sort_values('date_operation', ascending=False, inplace=True)
|
|
x = '2021-05-25 22:00:00'
|
|
y = '2021-05-21 12:30:00'
|
|
x = dt.strptime(x, '%Y-%m-%d %H:%M:%S')
|
|
y = dt.strptime(y, '%Y-%m-%d %H:%M:%S')
|
|
|
|
ddf = df[(df.date_operation < x) & (df.date_operation > y)]
|
|
ddf = ddf[['operation','utilisateur', 'id_obs', 'observateur', 'validateur', 'numerisateur']]
|
|
|
|
# Get all "id" observateur
|
|
tmp_obs = [i.split('&') for i in ddf['observateur'].dropna().unique() ]
|
|
lst_pers_obs = [item for sublist in tmp_obs for item in sublist]
|
|
tmp_val = [i.split('&') for i in ddf['validateur'].dropna().unique().astype(int).astype(str) ]
|
|
lst_pers_val = [item for sublist in tmp_val for item in sublist]
|
|
tmp_num = [i.split('&') for i in ddf['numerisateur'].dropna().unique().astype(str) ]
|
|
lst_pers_num = [item for sublist in tmp_num for item in sublist]
|
|
lst_pers = lst_pers_obs + lst_pers_val + lst_pers_num
|
|
df_pers = pd.DataFrame(lst_pers, columns=['id_pers']).drop_duplicates()
|
|
|
|
# Check if personnes exist
|
|
pers_not_exist = []
|
|
for i in df_pers.id_pers:
|
|
sql = 'SELECT id_personne,prenom, nom, id_structure FROM md.personne WHERE id_personne = %s;'%i
|
|
res = pd.read_sql(
|
|
sql = sql,
|
|
con = con
|
|
)
|
|
if res.empty:
|
|
print(i)
|
|
pers_not_exist += [i]
|
|
|
|
|
|
sql = '''SELECT * FROM saisie.suivi_saisie_observation
|
|
WHERE utilisateur = 'inconnu'
|
|
AND date_operation < '2021-05-25 22:00:00'
|
|
AND date_operation > '2021-05-21 12:30:00';'''
|
|
pd.read_sql(
|
|
sql=sql,
|
|
con=con
|
|
)
|
|
sql = '''UPDATE saisie.suivi_saisie_observation SET utilisateur = 'colas.geier@cen-isere.org'
|
|
WHERE utilisateur = 'inconnu'
|
|
AND date_operation < '2021-05-25 22:00:00'
|
|
AND date_operation > '2021-05-21 12:30:00';'''
|
|
try:
|
|
con.execute(sql)
|
|
# con.execute(text(sql))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print(exept)
|
|
|
|
|
|
|
|
rep = '8044'
|
|
keep = '1000014'
|
|
|
|
sql_check ='''SELECT * FROM saisie.saisie_observation WHERE observateur like '%{replace}%' OR validateur = {replace};'''.format(
|
|
replace=rep
|
|
)
|
|
sql_check ='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format(
|
|
replace=rep
|
|
)
|
|
sql_updt = '''
|
|
UPDATE saisie.saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}';
|
|
UPDATE saisie.saisie_observation SET numerisateur = {keep} WHERE numerisateur = {replace};
|
|
'''.format(
|
|
keep=keep, replace=rep
|
|
)
|
|
# sql = '''UPDATE saisie.saisie_observation SET validateur = 100077
|
|
# WHERE validateur = 1110;'''
|
|
|
|
test = pd.read_sql(
|
|
sql=sql_check,
|
|
con=con
|
|
)[['observateur','validateur']]
|
|
if test.empty:
|
|
print(''' OK for %s in table saisie'''%nom)
|
|
else:
|
|
try:
|
|
con.execute(sql_updt)
|
|
# con.execute(text(sql))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print(exept)
|
|
|
|
sql_check2 ='''SELECT * FROM saisie.suivi_saisie_observation WHERE observateur = '{replace}' OR validateur = '{replace}';'''.format(
|
|
replace=rep
|
|
)
|
|
sql_updt2 = '''
|
|
UPDATE saisie.suivi_saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}';
|
|
UPDATE saisie.suivi_saisie_observation SET numerisateur = {keep} WHERE numerisateur = {replace};
|
|
'''.format(
|
|
keep=keep, replace=rep
|
|
)
|
|
test2 = pd.read_sql(
|
|
sql=sql_check2,
|
|
con=con
|
|
)[['observateur','validateur']]
|
|
if test2.empty:
|
|
print(''' OK for %s in table saisie'''%nom)
|
|
else:
|
|
try:
|
|
con.execute(sql_updt2)
|
|
# con.execute(text(sql))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print(exept)
|
|
|
|
|
|
|
|
|
|
sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format(
|
|
replace=rep
|
|
)
|
|
# sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;'
|
|
test = pd.read_sql(
|
|
sql=sql,
|
|
con=con
|
|
)[['observateur','validateur']]
|
|
if test.empty:
|
|
print(''' OK for %s'''%nom)
|
|
|
|
else:
|
|
print('ERROR !!!!!!!!!')
|
|
# break |