Python_scripts/1_SICEN/import_fauneisereTOsicen.py

405 lines
13 KiB
Python

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-.
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
from datetime import datetime as dt
import pandas as pd
import geopandas as gpd
nom_numerateur = 'GEIER'
pnom_numerator = 'Colas'
today = dt.now().date().isoformat()
# Parametres bdd
user = 'cen_admin'
pwd = '#CEN38@venir'
adr = '91.134.194.221'
port = '5432'
base = 'sicen2'
url = URL.create('postgresql+psycopg2',
username=user,
password=pwd,
host=adr,
database=base,
)
con = create_engine(url)
nom_numerateur = nom_numerateur.upper()
pnom_numerator = pnom_numerator[0].upper() + pnom_numerator[1:]
query_get_numerator = """
SELECT * FROM md.personne
WHERE nom = '%s'
AND prenom = '%s'
AND id_structure = 1
""" % (nom_numerateur,pnom_numerator)
numerateur = pd.read_sql_query(
sql = query_get_numerator,
con = con
)
create_view='''
DROP VIEW import.test_import_faune_isere;
# CREATE OR REPLACE VIEW import.test_import_faune_isere AS
# SELECT DISTINCT
# *,
# st_setsrid(st_makepoint(coord_lon_l93, coord_lat_l93),2154)::geometry(POINT,2154) AS geom
# FROM import.import_data_fauneisere;
'''
with con.begin() as cnx:
cnx.execute(create_view)
select_import = """
SELECT * FROM import.import_data_fauneisere;
"""
data_import = pd.read_sql_query(
sql = select_import,
con = con)
verif_taxref = '''
SELECT CASE WHEN b.cdnom_taxref IS NULL THEN bb.cd_nom
ELSE b.cdnom_taxref END cdnom_taxref,
b.cdnom_taxref,
CASE WHEN c.cd_nom IS NULL THEN cc.cd_nom
ELSE c.cd_nom END cd_nom,
--c.cd_nom,
CASE WHEN c.cd_ref IS NULL THEN cc.cd_ref
ELSE c.cd_ref END cd_ref,
--c.cd_ref,
a.id_sighting, a.id_species, a.name_species, a.id_observer, a.latin_species,
a.taxonomy_name, a.family_name, a.sys_order, a.date, a.date_day, a.date_month, a.date_year,
a.date_jday, a.date_pentade, a.date_decade, a.date_week, a.time_start, a.time_start_hour, a.time_start_min, a.time_stop, a.time_stop_hour, a.time_stop_min,
a.full_form, a.timing, a.id_place, a.place, a.municipality, a.county, a.country, a.insee, a.coord_lon_liie, a.coord_lat_liie, a.coord_lon_l93, a.coord_lat_l93,
a.coord_lat, a.coord_lon, a.coord_lat_dms, a.coord_lon_dms, a.coord_f, a.coord_e, a.coord_n, a."precision", a.altitude, a.grid_name,
a.estimation_code, a.total_count,
a.detail, a.atlas_code,
a.hidden, a.admin_hidden, a.behaviour,
--a.committee_chn, a.committee_chr,
a.search_export_species_sheet_dir_oiseaux, a.search_export_species_sheet_protection_nat, a.search_export_species_sheet_dir_habitat,
a.search_export_species_sheet_uicn, a.search_export_species_sheet_lr_nat, a.search_export_species_sheet_znieff, a.search_export_species_sheet_id_fnat,
a.surname, a.name, a.search_export_entity_short_name, a.search_export_entity_full_name, a.tra_surname, a.tra_name, a.id_form,
a.search_export_entity_tra_full_name, a.second_hand, a.comment, a.private_comment, a.daily_text_comment_rem, a.insert_date, a.update_date, a.protocol, a.has_death_info
FROM import.import_data_fauneisere a
LEFT JOIN (ref_lpo.taxons_faune_isere_taxref b
JOIN inpn.taxref c ON c.cd_nom::integer = b.cdnom_taxref)
ON a.latin_species = b.latin_species
LEFT JOIN (inpn.taxons_isere_absents_taxref bb
JOIN inpn.taxref cc ON cc.cd_nom::integer = bb.cd_nom)
ON a.id_species = bb.id_taxon
;
'''
df_verif = pd.read_sql_query(
sql = verif_taxref,
con = con)
data_import[~data_import.id_species.isin(df_verif.id_species)] \
[['name_species','latin_species','id_species']].drop_duplicates()
update_structure_name = """
UPDATE import.import_data_fauneisere a
SET search_export_entity_short_name = REPLACE(search_export_entity_short_name, 'CEN38', 'CEN Isère');
"""
with con.begin() as cnx:
cnx.execute(update_structure_name)
create_table_pers = """
DROP TABLE IF EXISTS md.temp_import_personne;
CREATE TABLE md.temp_import_personne AS
WITH nom_complet_identifies AS
(SELECT DISTINCT
ltrim(unnest(string_to_array(a.surname,'&'))) as prenom,
ltrim(unnest(CASE WHEN search_export_entity_short_name IN ('CEN Isère','-') THEN string_to_array(''|| upper(split_part(a.name,' /',1)),'&')
ELSE string_to_array(upper(split_part(a.name,' (',1)),'&') END)) as nom,
CASE WHEN search_export_entity_short_name ILIKE '-' THEN 'CEN Isère'
ELSE search_export_entity_short_name END as structure,
a.name || ' ' || a.surname as full_name
FROM import.import_data_fauneisere a
ORDER BY full_name),
personne_sicen AS
(SELECT a.id_personne, prenom, nom, nom || ' ' || prenom as nom_complet, role, specialite,
a.id_structure, b.nom_structure
FROM md.personne a
JOIN md.structure b ON a.id_structure = b.id_structure
--WHERE personne.id_personne BETWEEN 100000 AND 999998
)
SELECT DISTINCT
CASE WHEN b.id_personne IS NULL THEN nextval('md.personne_id_personne_hors_cen38_seq'::regclass)
ELSE b.id_personne END as id_personne,
b.id_personne id_personne_exist,
'Observateur Faune-Isère' as remarque,
a.prenom as prenom,
a.nom as nom,
a.full_name as nom_complet,
b.nom_structure,
a.structure,
CASE WHEN a.structure ILIKE 'CEN Isère' THEN 1
ELSE 1 END AS id_structure
FROM nom_complet_identifies a
LEFT JOIN personne_sicen b ON lower(a.nom || ' ' || a.prenom) = lower(b.nom_complet) AND b.nom_structure = a.structure
;
"""
with con.begin() as cnx:
cnx.execute(create_table_pers)
select_persTOinsert = """
SELECT DISTINCT
a.id_personne,
a.remarque,
a.prenom as prenom,
a.nom as nom, --full_name
NULL as email,
'observ'::md.enum_role,
NULL::md.enum_specialite as specialite,
0 as mot_de_passe,
{id_creator} as createur,
'Mme / M.'::md.enum_titre as titre,
now()::date as date_maj,
a.id_structure
FROM md.temp_import_personne a
WHERE NOT EXISTS (SELECT DISTINCT id_personne FROM md.personne)
;
""".format(id_creator = numerateur.id_personne.item())
df_pers_insert = pd.read_sql_query(
sql = select_persTOinsert,
con = con)
if not df_pers_insert.empty:
raise Exception("DEV en cours ...")
insert_pers = """
INSERT INTO md.personne(
id_personne,
remarque,
prenom,
nom,
email,
role,
specialite,
mot_de_passe,
createur,
titre,
date_maj,
id_structure)
""" + select_persTOinsert
with con.begin() as cnx:
cnx.execute(insert_pers)
else:
pass
select_pers = """
WITH observateurs_multiples AS
(SELECT DISTINCT
string_agg(a.id_personne::text,'&') as id_personne,
--a.remarque,
--a.prenom as prenom,
--a.nom as nom,
a.nom_complet,
a.id_structure,
s.nom_structure
FROM md.temp_import_personne a
LEFT JOIN md.structure s ON a.id_structure = s.id_structure
GROUP BY a.nom_complet, a.id_structure,s.nom_structure)
"""
select_dataTOinsert = """
SELECT DISTINCT
--a.id_species,
--a.name_species,
--a.latin_species,
--a.protegee,
--a.embr,
--a.classe1,
--a.classe2,
a.date::date,
CASE WHEN a.timing <> '00:00:00' THEN a.timing::time
ELSE NULL END as heure_obs,
--a.date_year,
a.time_start,
a.time_stop,
CASE WHEN t.regne IS NULL THEN tt.regne
ELSE t.regne END regne,
CASE WHEN t.phylum IS NULL THEN tt.phylum
ELSE t.phylum END phylum,
CASE WHEN t.classe IS NULL THEN tt.classe
ELSE t.classe END classe,
CASE WHEN t.ordre IS NULL THEN tt.ordre
ELSE t.ordre END ordre,
CASE WHEN t.famille IS NULL THEN tt.famille
ELSE t.famille END famille,
CASE WHEN t.nom_valide IS NULL THEN tt.nom_valide
ELSE t.nom_valide END nom_valide,
CASE WHEN t.nom_vern IS NULL THEN tt.nom_vern
ELSE t.nom_vern END nom_vern,
CASE WHEN t.nom_complet IS NULL THEN tt.nom_complet
ELSE t.nom_complet END nom_complet,
CASE WHEN t.cd_nom IS NULL THEN tt.cd_nom
ELSE t.cd_nom END cd_nom,
--t.cd_nom cd_nom_2,
--b.cdnom_taxref,
CASE WHEN a.estimation_code IS NULL AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN a.total_count
WHEN a.estimation_code LIKE 'x' THEN '1'
ELSE NULL END AS effectif,
CASE WHEN a.estimation_code LIKE '>' AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN a.total_count
WHEN a.estimation_code LIKE '~' AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN a.total_count
ELSE NULL END AS effectif_min,
CASE WHEN a.estimation_code LIKE '~' AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN (a.total_count + 10)
ELSE NULL END AS effectif_max,
a.coord_lon_l93 as longitude,
a.coord_lat_l93 as latitude,
a.place as localisation,
o.id_personne as observateur,
/*a.surname,
a.name,
a.entity_short_name,
a.entity_full_name,
a.tra_surname,
a.tra_name,
a.tra_full_name,*/
{id_numerateur} as numerisateur,
o.id_structure,
concat_ws(' | ',
CASE WHEN a.detail IS NOT NULL THEN 'Détails : '|| a.detail ELSE NULL END,
--CASE WHEN a.has_death_info ILIKE 'oui' THEN 'Mortalité' ELSE NULL END,
CASE WHEN a.behaviour IS NOT NULL THEN 'Comportement : '|| a.behaviour ELSE NULL END,
CASE WHEN a.atlas_code IS NOT NULL THEN 'Code atlas : '|| a.atlas_code ELSE NULL END,
CASE WHEN a.comment IS NOT NULL THEN 'Commentaires : '|| a.comment ELSE NULL END
) as remarque_obs,
--a.id_place,
--a.municipality,
--a.county,
--a.country,
a.insee as code_insee,
TRUE as diffusable,
--CASE WHEN a.hidden ILIKE 'oui' THEN TRUE ELSE FALSE END as diffusable,
CASE WHEN lower(a."precision") ILIKE 'lieu-dit' THEN 'lieu-dit'
WHEN lower(a."precision") ILIKE 'localisation précise' THEN '0 à 10m'
ELSE NULL END::saisie.enum_precision AS "precision",
--a.grid_name,
--a.estimation_code,
'validée'::saisie.enum_statut_validation as statut_validation,
99 as id_etude, -- Echange de données (partenaires)
3 as id_protocole, -- Observation aléatoire
a.altitude as elevation,
--a.hidden,
--a.admin_hidden,
--a.committee_chn,
--a.committee_chr,
--a.second_hand,
--a.private_comment,
--a.insert_date,
--a.update_date,
--a.project_code,
st_setsrid(st_makepoint(coord_lon_l93, coord_lat_l93),2154)::geometry(POINT,2154) AS geometrie,
CASE WHEN (a.total_count IS NULL OR a.total_count = 0) THEN TRUE ELSE FALSE END AS obs_null,
8 as id_lot, -- Code du lot de données : Import Faune-Isère (exporté par LPO)
a.id_sighting as id_origine
FROM import.import_data_fauneisere a
LEFT JOIN (ref_lpo.taxons_faune_isere_taxref b
JOIN inpn.taxref t ON t.cd_nom::integer = b.cdnom_taxref)
ON a.latin_species = b.latin_species
LEFT JOIN (inpn.taxons_isere_absents_taxref bb
JOIN inpn.taxref tt ON tt.cd_nom::integer = bb.cd_nom)
ON a.id_species = bb.id_taxon
JOIN observateurs_multiples o ON lower(a.name || ' ' || a.surname) = lower(o.nom_complet)
--LEFT JOIN saisie.saisie_observation s
--ON ((a.id_sighting::text = s.id_origine) AND (id_lot IN (7.8)))
--AND s.id_origine IS NULL
;
""".format(id_numerateur=numerateur.id_personne.item())
df_data_insert = gpd.read_postgis(
sql = text(select_pers+select_dataTOinsert),
con = con,
geom_col='geometrie')
if df_data_insert.shape[0] == data_import.shape[0]:
insert_data = """
INSERT INTO saisie.saisie_observation(
--id_obs,
date_obs,
--date_debut_obs,
--date_fin_obs,
heure_obs,
--date_textuelle,
regne,
phylum,
classe,
ordre,
famille,
nom_valide,
nom_vern,
nom_complet,
cd_nom,
--effectif_textuel,
effectif,
effectif_min,
effectif_max,
--type_effectif,
--phenologie,
--id_waypoint,
longitude,
latitude,
localisation,
observateur,
numerisateur,
--validateur,
structure,
remarque_obs,
code_insee,
--id_lieu_dit,
diffusable,
"precision",
statut_validation,
id_etude,
id_protocole,
--url_photo,
--commentaire_photo,
--decision_validation,
--determination,
elevation,
geometrie,
--qualification,
--reprostatut,
obs_null,
--uuid,
id_lot,
id_origine)
"""
with con.begin() as cnx:
cnx.execute(
text(select_pers+
insert_data+
select_dataTOinsert)
)
update_structure_name = """
UPDATE saisie.suivi_saisie_observation a
SET utilisateur = REPLACE(utilisateur, 'inconnu', '{email_numerator}')
WHERE date_operation > '{today}'
;
""".format(
email_numerator=numerateur.email.item(),
today = today
)
drop_temp_table = """
DROP TABLE IF EXISTS md.temp_import_personne;
"""
with con.begin() as cnx:
cnx.execute(update_structure_name)
cnx.execute(drop_temp_table)