From 825a87f8842367b9fcac99713ad5c82a8f87cd83 Mon Sep 17 00:00:00 2001 From: Colas Geier Date: Wed, 22 Nov 2023 10:33:20 +0100 Subject: [PATCH] [CG] : init repository - load all file --- 0_FONCIER/CADASTRE/agregg_cadastre.py | 47 + 0_FONCIER/TdB_FEDE/__init__.py | 31 + 0_FONCIER/TdB_FEDE/parcelles_TdB_fede .py | 732 +++++ 0_FONCIER/TdB_FEDE/parcelles_TdB_fede2.py | 373 +++ 0_FONCIER/TdB_FEDE/sites_TdB_fede.py | 389 +++ 0_FONCIER/add_c_anim_foncier_v2.py | 91 + 0_FONCIER/config_insert_cadastre.py | 118 + 0_FONCIER/create_c_anim_foncier_v2.py | 187 ++ 0_FONCIER/create_foreign_zh.sql | 126 + 0_FONCIER/cutfantoir_bydep.py | 28 + 0_FONCIER/delete_foncier_acte.py | 15 + 0_FONCIER/extract_cadastre.py | 117 + 0_FONCIER/foncier_call_fonctions.py | 136 + 0_FONCIER/foncier_insert_administratif.py | 437 +++ 0_FONCIER/foncier_insert_administratif_V2.py | 573 ++++ 0_FONCIER/foncier_insert_cadastre.py | 770 +++++ 0_FONCIER/foncier_insert_cadastre_V2.py | 613 ++++ 0_FONCIER/foncier_insert_cadastre_V3.py | 540 ++++ 0_FONCIER/foncier_insert_site.py | 221 ++ 0_FONCIER/get_infos_parcelle.py | 18 + 0_FONCIER/grant_table.py | 59 + 0_FONCIER/remove_parcelles_site.py | 39 + 11_CALC_RHOMEO/insert_bdtopo3.py | 172 ++ 11_CALC_RHOMEO/insert_bvmdo.py | 40 + 11_CALC_RHOMEO/run_indicRhomeo.py | 366 +++ .../classification_group.py | 322 +++ 1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py | 41 + 1_SICEN/SERENA_siege/recovery_geo.py | 50 + 1_SICEN/SERENA_siege/recovery_relv.py | 190 ++ 1_SICEN/SERENA_siege/recovery_rhomeo.py | 464 +++ 1_SICEN/extract_chiro.py | 39 + 1_SICEN/extract_obs.py | 69 + 1_SICEN/extract_obs_bysites_toGeonature.py | 88 + 1_SICEN/import_dataTOsicenIMPORT.py | 80 + 1_SICEN/import_donneesaisieTOsicen.py | 332 +++ .../import_fauneisereTOsicen (biolovision).py | 470 ++++ 1_SICEN/import_fauneisereTOsicen.py | 404 +++ 1_SICEN/import_serena_to_sicen.py | 499 ++++ 1_SICEN/refresh_vm.py | 12 + 1_SICEN/sicen2_clean.py | 449 +++ 1_SICEN/sicen2_extract.py | 96 + 1_SICEN/taxonAbsentTaxref_TO_taxref.py | 38 + 1_SICEN/update_codeInsee.py | 91 + 1_SICEN/update_dataONsicen.py | 63 + 1_SICEN/update_useroperator_suivisaisie.py | 43 + 1_SICEN/v_saisie_.py | 273 ++ 1_SICEN/v_synthese[pole].py | 211 ++ 2_MEDWET/0_BROUILLONS/medwet.py | 87 + 2_MEDWET/0_BROUILLONS/recup_zh.py | 2256 +++++++++++++++ 2_MEDWET/get_medwet.py | 71 + 2_MEDWET/get_zh_cen.py | 511 ++++ 2_MEDWET/recup_zh_from_medwet.py | 939 +++++++ 3_AZALEE/RHEZO-TUF/identification_tufiere.py | 31 + 3_AZALEE/azaleeTOgeonature.py | 1157 ++++++++ 3_AZALEE/create_view.py | 465 ++++ 3_AZALEE/create_view_pers.py | 46 + 3_AZALEE/create_view_ps.py | 513 ++++ 3_AZALEE/create_view_ref.py | 69 + 3_AZALEE/create_view_site.py | 76 + 3_AZALEE/create_view_zh.py | 567 ++++ 3_AZALEE/create_view_zh2.py | 592 ++++ 3_AZALEE/del_psduplicated.py | 187 ++ 3_AZALEE/get_ps_byhab.py | 67 + 3_AZALEE/import_habCBNA_bdcen38.py | 172 ++ 3_AZALEE/insert_ps.py | 1479 ++++++++++ 3_AZALEE/insert_zh.py | 1031 +++++++ 3_AZALEE/recup_CBNA_habPS.py | 355 +++ 3_AZALEE/recup_ps.py | 1480 ++++++++++ 3_AZALEE/refHydro_to_db.py | 118 + 3_AZALEE/tmp/correct&maj_idsite_ps.py | 116 + 3_AZALEE/tmp/maj_tmpv.date_geom | 0 3_AZALEE/tmp/manip_invPS_CDIsère2016.py | 146 + .../manip_invPS_PS_AGGREGATION_NB_AG_2022.py | 65 + 3_AZALEE/tmp/manip_invPS_platière.py | 65 + 3_AZALEE/tmp/test.py | 14 + 3_AZALEE/tmp/zh_plu_metro.py | 1025 +++++++ 3_AZALEE/update_geomcover.py | 173 ++ 3_AZALEE/update_psInf1200.py | 63 + 4_CARMEN/send_view_to_carmen.py | 63 + 5_GEONATURE/GN_ZH/MEDWET2Geonat.py | 1337 +++++++++ 5_GEONATURE/GN_ZH/Notes gn_ZH.md | 19 + 5_GEONATURE/GN_ZH/correct_geom.py | 19 + 5_GEONATURE/GN_ZH/tools/__init__.py | 1 + 5_GEONATURE/GN_ZH/tools/pr_zh.py | 263 ++ 5_GEONATURE/GN_ZH/tools/source_zone.py | 222 ++ 5_GEONATURE/GN_ZH/tools/zh2gn.py | 31 + .../MIGRATION/PLATIERE/reproject_data.py | 148 + 5_GEONATURE/MONITORINGS/get_data.py | 103 + 5_GEONATURE/create_listTaxon.py | 131 + 5_GEONATURE/crsp_taxref_old.py | 29 + 5_GEONATURE/get_refNomencalture.py | 99 + 5_GEONATURE/insert_lareas.py | 177 ++ 5_GEONATURE/insert_utilisateurs.py | 76 + 5_GEONATURE/pivot_bdc_status.py | 78 + 5_GEONATURE/pivot_bdc_status_v2.py | 78 + 5_GEONATURE/source_zone.py | 222 ++ 6_SQL/foreign_table.py | 121 + 6_SQL/manage_user.py | 100 + 6_SQL/memo.py | 13 + 6_SQL/trigger.py | 50 + 6_SQL/vm.py | 22 + 7_LOGEPROJ/funcs_macro (pyxll).py | 7 + 7_LOGEPROJ/funcs_macro (xlwings).py | 19 + 7_LOGEPROJ/recup_backups.py | 194 ++ 7_LOGEPROJ/restaure_sauvegarde.py | 242 ++ 7_LOGEPROJ/test_xlwings.xlsx | Bin 0 -> 4353 bytes 8_TRAVAUX/clean_travaux.py | 39 + 8_TRAVAUX/views_saisie_travaux.py | 52 + CVB_bourbre_MC_penible.py | 250 ++ bdd_ZNIEFF.py | 72 + bdd_connect.py | 53 + correct_geom.py | 20 + createMNT_intersects.py | 83 + create_ref_habitat.py | 165 ++ cross_MNT.py | 88 + get_obs_znieff.py | 118 + hydro_analyse.py | 191 ++ intersection.py | 22 + maj_site.py | 57 + multi_auteurTOrelation_table.py | 73 + raster2pgsql_multithread.py | 59 + search_col.py | 31 + taxref.py | 95 + tmp_save/README.md | 58 + tmp_save/agreg_zone.py | 31 + tmp_save/pgszh_Belledo.py | 2475 +++++++++++++++++ tmp_save/pgszh_SudGres.py | 2374 ++++++++++++++++ tutu.py | 8 + update_to_sql.py | 76 + update_zonage_site.py | 66 + 130 files changed, 34918 insertions(+) create mode 100644 0_FONCIER/CADASTRE/agregg_cadastre.py create mode 100644 0_FONCIER/TdB_FEDE/__init__.py create mode 100644 0_FONCIER/TdB_FEDE/parcelles_TdB_fede .py create mode 100644 0_FONCIER/TdB_FEDE/parcelles_TdB_fede2.py create mode 100644 0_FONCIER/TdB_FEDE/sites_TdB_fede.py create mode 100644 0_FONCIER/add_c_anim_foncier_v2.py create mode 100644 0_FONCIER/config_insert_cadastre.py create mode 100644 0_FONCIER/create_c_anim_foncier_v2.py create mode 100644 0_FONCIER/create_foreign_zh.sql create mode 100644 0_FONCIER/cutfantoir_bydep.py create mode 100644 0_FONCIER/delete_foncier_acte.py create mode 100644 0_FONCIER/extract_cadastre.py create mode 100755 0_FONCIER/foncier_call_fonctions.py create mode 100644 0_FONCIER/foncier_insert_administratif.py create mode 100755 0_FONCIER/foncier_insert_administratif_V2.py create mode 100755 0_FONCIER/foncier_insert_cadastre.py create mode 100755 0_FONCIER/foncier_insert_cadastre_V2.py create mode 100644 0_FONCIER/foncier_insert_cadastre_V3.py create mode 100755 0_FONCIER/foncier_insert_site.py create mode 100644 0_FONCIER/get_infos_parcelle.py create mode 100644 0_FONCIER/grant_table.py create mode 100644 0_FONCIER/remove_parcelles_site.py create mode 100644 11_CALC_RHOMEO/insert_bdtopo3.py create mode 100644 11_CALC_RHOMEO/insert_bvmdo.py create mode 100644 11_CALC_RHOMEO/run_indicRhomeo.py create mode 100644 1_SICEN/MIGRATION GEONATURE/classification_group.py create mode 100644 1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py create mode 100644 1_SICEN/SERENA_siege/recovery_geo.py create mode 100644 1_SICEN/SERENA_siege/recovery_relv.py create mode 100644 1_SICEN/SERENA_siege/recovery_rhomeo.py create mode 100644 1_SICEN/extract_chiro.py create mode 100644 1_SICEN/extract_obs.py create mode 100644 1_SICEN/extract_obs_bysites_toGeonature.py create mode 100644 1_SICEN/import_dataTOsicenIMPORT.py create mode 100644 1_SICEN/import_donneesaisieTOsicen.py create mode 100644 1_SICEN/import_fauneisereTOsicen (biolovision).py create mode 100644 1_SICEN/import_fauneisereTOsicen.py create mode 100644 1_SICEN/import_serena_to_sicen.py create mode 100644 1_SICEN/refresh_vm.py create mode 100644 1_SICEN/sicen2_clean.py create mode 100644 1_SICEN/sicen2_extract.py create mode 100644 1_SICEN/taxonAbsentTaxref_TO_taxref.py create mode 100644 1_SICEN/update_codeInsee.py create mode 100644 1_SICEN/update_dataONsicen.py create mode 100644 1_SICEN/update_useroperator_suivisaisie.py create mode 100644 1_SICEN/v_saisie_.py create mode 100644 1_SICEN/v_synthese[pole].py create mode 100644 2_MEDWET/0_BROUILLONS/medwet.py create mode 100644 2_MEDWET/0_BROUILLONS/recup_zh.py create mode 100644 2_MEDWET/get_medwet.py create mode 100644 2_MEDWET/get_zh_cen.py create mode 100644 2_MEDWET/recup_zh_from_medwet.py create mode 100644 3_AZALEE/RHEZO-TUF/identification_tufiere.py create mode 100644 3_AZALEE/azaleeTOgeonature.py create mode 100644 3_AZALEE/create_view.py create mode 100644 3_AZALEE/create_view_pers.py create mode 100644 3_AZALEE/create_view_ps.py create mode 100644 3_AZALEE/create_view_ref.py create mode 100644 3_AZALEE/create_view_site.py create mode 100644 3_AZALEE/create_view_zh.py create mode 100644 3_AZALEE/create_view_zh2.py create mode 100644 3_AZALEE/del_psduplicated.py create mode 100644 3_AZALEE/get_ps_byhab.py create mode 100644 3_AZALEE/import_habCBNA_bdcen38.py create mode 100644 3_AZALEE/insert_ps.py create mode 100644 3_AZALEE/insert_zh.py create mode 100644 3_AZALEE/recup_CBNA_habPS.py create mode 100644 3_AZALEE/recup_ps.py create mode 100644 3_AZALEE/refHydro_to_db.py create mode 100644 3_AZALEE/tmp/correct&maj_idsite_ps.py create mode 100644 3_AZALEE/tmp/maj_tmpv.date_geom create mode 100644 3_AZALEE/tmp/manip_invPS_CDIsère2016.py create mode 100644 3_AZALEE/tmp/manip_invPS_PS_AGGREGATION_NB_AG_2022.py create mode 100644 3_AZALEE/tmp/manip_invPS_platière.py create mode 100644 3_AZALEE/tmp/test.py create mode 100644 3_AZALEE/tmp/zh_plu_metro.py create mode 100644 3_AZALEE/update_geomcover.py create mode 100644 3_AZALEE/update_psInf1200.py create mode 100644 4_CARMEN/send_view_to_carmen.py create mode 100755 5_GEONATURE/GN_ZH/MEDWET2Geonat.py create mode 100644 5_GEONATURE/GN_ZH/Notes gn_ZH.md create mode 100644 5_GEONATURE/GN_ZH/correct_geom.py create mode 100755 5_GEONATURE/GN_ZH/tools/__init__.py create mode 100755 5_GEONATURE/GN_ZH/tools/pr_zh.py create mode 100755 5_GEONATURE/GN_ZH/tools/source_zone.py create mode 100755 5_GEONATURE/GN_ZH/tools/zh2gn.py create mode 100644 5_GEONATURE/MIGRATION/PLATIERE/reproject_data.py create mode 100644 5_GEONATURE/MONITORINGS/get_data.py create mode 100644 5_GEONATURE/create_listTaxon.py create mode 100644 5_GEONATURE/crsp_taxref_old.py create mode 100644 5_GEONATURE/get_refNomencalture.py create mode 100644 5_GEONATURE/insert_lareas.py create mode 100644 5_GEONATURE/insert_utilisateurs.py create mode 100644 5_GEONATURE/pivot_bdc_status.py create mode 100644 5_GEONATURE/pivot_bdc_status_v2.py create mode 100644 5_GEONATURE/source_zone.py create mode 100644 6_SQL/foreign_table.py create mode 100644 6_SQL/manage_user.py create mode 100644 6_SQL/memo.py create mode 100644 6_SQL/trigger.py create mode 100644 6_SQL/vm.py create mode 100644 7_LOGEPROJ/funcs_macro (pyxll).py create mode 100644 7_LOGEPROJ/funcs_macro (xlwings).py create mode 100644 7_LOGEPROJ/recup_backups.py create mode 100644 7_LOGEPROJ/restaure_sauvegarde.py create mode 100644 7_LOGEPROJ/test_xlwings.xlsx create mode 100644 8_TRAVAUX/clean_travaux.py create mode 100644 8_TRAVAUX/views_saisie_travaux.py create mode 100644 CVB_bourbre_MC_penible.py create mode 100644 bdd_ZNIEFF.py create mode 100644 bdd_connect.py create mode 100644 correct_geom.py create mode 100644 createMNT_intersects.py create mode 100644 create_ref_habitat.py create mode 100644 cross_MNT.py create mode 100644 get_obs_znieff.py create mode 100644 hydro_analyse.py create mode 100644 intersection.py create mode 100644 maj_site.py create mode 100644 multi_auteurTOrelation_table.py create mode 100644 raster2pgsql_multithread.py create mode 100644 search_col.py create mode 100644 taxref.py create mode 100644 tmp_save/README.md create mode 100644 tmp_save/agreg_zone.py create mode 100644 tmp_save/pgszh_Belledo.py create mode 100644 tmp_save/pgszh_SudGres.py create mode 100644 tutu.py create mode 100644 update_to_sql.py create mode 100644 update_zonage_site.py diff --git a/0_FONCIER/CADASTRE/agregg_cadastre.py b/0_FONCIER/CADASTRE/agregg_cadastre.py new file mode 100644 index 0000000..86aa907 --- /dev/null +++ b/0_FONCIER/CADASTRE/agregg_cadastre.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from sqlalchemy.sql import text +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import pandas as pd +import geopandas as gpd + + +# pg_dump -h 91.134.194.221 -d cadastre -U cgeier --schema="38_202207" -s > 38_202207.sql +# psql -h 91.134.194.221 -U cgeier -d cadastre -a -f 38_202207.sql + +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +base = 'cadastre' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con_cad = create_engine(url) + + +lst_sch = con_cad.dialect.get_schema_names(con_cad) +lst_sch = ['07_202207','26_202207','42_202207'] + +for s in lst_sch: + lst_tab = con_cad.dialect.get_table_names(con_cad,s) + for t in lst_tab: + lst_col = con_cad.dialect.get_columns(con_cad,t,s) + lst_col = [x['name'] for x in lst_col ] + + if 'geom' in lst_col: + df = gpd.read_postgis('SELECT * FROM "{sch}".{tab}'.format(sch=s,tab=t),con_cad) + if df.empty: + continue + df.to_postgis(t,con_cad,'xx_202207',if_exists='append',index=False) + else : + df = pd.read_sql_table(t,con_cad,s) + if df.empty: + continue + df.to_sql(t,con_cad,'xx_202207',if_exists='append',index=False) + + print("{sch}.{tab}".format(sch=s,tab=t)) diff --git a/0_FONCIER/TdB_FEDE/__init__.py b/0_FONCIER/TdB_FEDE/__init__.py new file mode 100644 index 0000000..1ec2b2c --- /dev/null +++ b/0_FONCIER/TdB_FEDE/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +from os import getlogin +import pandas as pd +import geopandas as gpd +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +from pycen import con_bdcen as con, con_fon + + +annee = 2023 +path_source = '/media/{log}/SRV/FICHIERS/OUTILS/BASES DE DONNEES/BILAN_FEDE_CEN/{an}/Docs_SIG_joints{an}/Docs_SIG_joints{an}/'.format(log=getlogin(),an=annee) +dico_data = 'Dico_DATA_sites_CEN_v%i_v1.xlsx' % annee +sheet_par = 'parcelles_cen_XX_%i' % annee +sheet_sit = 'sites_cen_XX_%i' % annee + + +###### Récupération des données conservatoires +def get_sites_cen(): + sqlsit = '''SELECT * FROM %s.%s WHERE date_fin is NULL OR date_fin >= '%i-01-01' ;'''%('sites','c_sites_zonages',annee) + dfsite = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con) + + # Isolation de tous les sites CEN (gestion/assist/mili) + return dfsite[ + ~dfsite.code_site.duplicated()|\ + (dfsite.code_site.duplicated() & dfsite.type_zonage == 'ZI') + ] + +def get_parcelles_cen(): + return \ No newline at end of file diff --git a/0_FONCIER/TdB_FEDE/parcelles_TdB_fede .py b/0_FONCIER/TdB_FEDE/parcelles_TdB_fede .py new file mode 100644 index 0000000..501d24d --- /dev/null +++ b/0_FONCIER/TdB_FEDE/parcelles_TdB_fede .py @@ -0,0 +1,732 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +# library pandas geopandas sqlalchemy psycopg2-binary openpyxl + +from calendar import c +from os import getlogin +import pandas as pd +import geopandas as gpd +from sqlalchemy.sql import text +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +from pycen import con_bdcen as con, con_fon +from functools import reduce + +annee = 2022 + +path_source = '/media/{login}/SRV/FICHIERS/OUTILS/BASES DE DONNEES/BILAN_FEDE_CEN/{an}/Docs_SIG_joints{an}/Docs_SIG_joints{an}/'.format(login=getlogin(),an=annee) +dico_data = 'Dico_DATA_sites_CEN_v%i_v1.xlsx' % annee +sheet_par = 'parcelles_cen_XX_%i' % annee +sheet_sit = 'sites_cen_XX_%i' % annee + +lv_cen = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='A:D',nrows=25) +lv_typmaitr = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='F:K',nrows=23) +lv_typmilie = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='M:N',nrows=16) +lv_echelnum = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='P:Q',nrows=8) +lv_typhab = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=13, usecols='P:Q',nrows=5) +lv_sourcegeom = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='S:U',nrows=12) +lv_lienres = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=20, usecols='M:N',nrows=4) +lv_typprop = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=45, usecols='A:H',nrows=12) +lv_codgeol = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=36, usecols='M:T',nrows=19) +lst_cols_parc = pd.read_excel(path_source+dico_data,sheet_name=sheet_par,header=0, usecols='F',nrows=26)\ + ['nom du champ'].tolist() + + +user = 'postgres' +pwd = 'postgres' +adr = '172.17.0.2' +base = 'postgres' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con_vm = create_engine(url) +fadd = '/home/colas/Documents/tmp/FONCIER_FEDE/add_parc.gpkg' +add = gpd.read_file(fadd) + +sql0 = '''SELECT +t1.geo_parcelle, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, +t1.annee, t1.update_dat, t4.ccopre, t4.ccosec, t4.dcntpa, t1.geom, STRING_AGG(t3.gtoper::text,',') gtoper, STRING_AGG(t3.ccogrm::text,',') ccogrm, +STRING_AGG(CONCAT(t3.gtoper::text||COALESCE('_'||t3.ccogrm::text,'')),',') type_prop, STRING_AGG(t3.ddenom::text,',') ddenom +FROM "{sch}"."{t1}" t1 +LEFT JOIN ("{sch}".parcelle_info t2 + LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal)) +USING (geo_parcelle) +LEFT JOIN "{sch}".parcelle t4 ON t1.geo_parcelle = t4.parcelle +'''.format( + sch='38_202207', + t1='v_geo_parcelle') +sql1 = '''WHERE substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) IN ('%s') +GROUP BY 1,2,3,4,5,6,7,8'''%"','".join(add.par_id) +sql = sql0 + sql1 +parcx = gpd.read_postgis(text(sql),con=con_vm) +cols_parcx = parcx.columns +for col in ['ccopre','ccosec','type_prop','ddenom'] : + parcx[col] = parcx[col].str.strip() + parcx[col] = parcx[col].replace('',None) +parcx.loc[parcx.ccopre.isna(),'ccopre'] = parcx.loc[parcx.ccopre.isna(),'par_id'].str[5:8] +parcx.loc[parcx.ccosec.isna(),'ccosec'] = parcx.loc[parcx.ccosec.isna(),'par_id'].str[8:10] +parcx.loc[parcx.ccosec.str.len()==1,'ccosec'] = '0'+parcx.loc[parcx.ccosec.str.len()==1,'ccosec'] +parcx.loc[parcx.dcntpa.isna(),'dcntpa'] = parcx.loc[parcx.dcntpa.isna(),'geom'].area.round().astype(int) +parcx.loc[parcx.gtoper.isna(),['gtoper','ccogrm','type_prop']] = ['2','0','2_0'] +parcx.loc[parcx.par_id=='380740000B0705','ddenom'] = "CONSERVATOIRE D'ESPACES NATURELS ISERE" +parcx.dcntpa = parcx.dcntpa.astype(int) + +###### Récupération des parcelles RN île de la Platière +f_plaRN = '/home/colas/Documents/tmp/FONCIER_FEDE/PLATIERE/Maitrise_Foncier_RN.shp' +f_plaENS = '/home/colas/Documents/tmp/FONCIER_FEDE/PLATIERE/Maitrise_Foncier_ENS.shp' +RNNP_rn = gpd.read_file(f_plaRN) +RNNP_ens = gpd.read_file(f_plaENS) +RNNP_rn.rename_geometry('geom',inplace=True) +RNNP_ens.rename_geometry('geom',inplace=True) +RNNP_rn.rename(columns={'Date_sign':'annee_acq','surf_acqui':'surface_ac','surf_parce':'superficie'},inplace=True) +RNNP_ens.rename(columns={'code_commu':'code_comm','année_acq':'annee_acq','année':'anne'},inplace=True) +RNNP_rn = RNNP_rn[RNNP_rn.code_comm.str.len()==5] +RNNP_ens = RNNP_ens[RNNP_ens.code_comm.str.len()==5] +RNNP_rn['section'] = RNNP_rn.parcelle.str[:2] +RNNP_ens['section'] = RNNP_ens.parcelle.str[:2] +RNNP_rn['num_parc'] = RNNP_rn.parcelle.str[2:] +RNNP_ens['num_parc'] = RNNP_ens.parcelle.str[2:] +RNNP_rn['num_parc'] = RNNP_rn['num_parc'].str.strip() +RNNP_ens['num_parc'] = RNNP_ens['num_parc'].str.strip() +RNNP_rn.loc[RNNP_rn.num_parc.str.len()==1,'num_parc'] = '000' + RNNP_rn.loc[RNNP_rn.num_parc.str.len()==1,'num_parc'] +RNNP_rn.loc[RNNP_rn.num_parc.str.len()==2,'num_parc'] = '00' + RNNP_rn.loc[RNNP_rn.num_parc.str.len()==2,'num_parc'] +RNNP_rn.loc[RNNP_rn.num_parc.str.len()==3,'num_parc'] = '0' + RNNP_rn.loc[RNNP_rn.num_parc.str.len()==3,'num_parc'] +RNNP_ens.loc[RNNP_ens.num_parc.str.len()==1,'num_parc'] = '000' + RNNP_ens.loc[RNNP_ens.num_parc.str.len()==1,'num_parc'] +RNNP_ens.loc[RNNP_ens.num_parc.str.len()==2,'num_parc'] = '00' + RNNP_ens.loc[RNNP_ens.num_parc.str.len()==2,'num_parc'] +RNNP_ens.loc[RNNP_ens.num_parc.str.len()==3,'num_parc'] = '0' + RNNP_ens.loc[RNNP_ens.num_parc.str.len()==3,'num_parc'] + +RNNP_rn.loc[RNNP_rn.code_comm.str.startswith('39'),'code_comm'] = '39' + RNNP_rn.loc[RNNP_rn.code_comm.str.startswith('39'),'code_comm'].str[2:] +RNNP_ens.loc[RNNP_ens.code_comm.str.startswith('39'),'code_comm'] = '39' + RNNP_ens.loc[RNNP_ens.code_comm.str.startswith('39'),'code_comm'].str[2:] +RNNP_rn['par_id'] = RNNP_rn.code_comm+'000'+RNNP_rn.section+RNNP_rn.num_parc +RNNP_ens['par_id'] = RNNP_ens.code_comm+'000'+RNNP_ens.section+RNNP_ens.num_parc + +lst_par_id_rnnp = [*RNNP_rn.par_id,*RNNP_ens.par_id] +RNNP = pd.concat([RNNP_rn,RNNP_ens]) +RNNP['surf_parc_maitrise_m2'] = RNNP.surface_ac +RNNP['pour_part'] = 0 +RNNP.loc[(~RNNP.proportion.isna())&(RNNP.proportion!=100),'pour_part'] = 1 # Récup après, cf. dfgere + +frnnp_tofede = '/home/colas/Documents/tmp/FONCIER_FEDE/PLATIERE/parcelles_RNNP_tofede.gpkg' +rnnp_tofede = gpd.read_file(frnnp_tofede,layer='parcelles_RNNP_tofede') +rnnp_tofede.rename_geometry('geom', inplace=True) +rnnp_tofede['id_site_cen_parc'] = 0 +rnnp_tofede['id_site_fcen_parc'] = None # OK +rnnp_tofede['insee_dep'] = rnnp_tofede.par_id.str[:2] +rnnp_tofede['insee_com'] = rnnp_tofede.par_id.str[2:5] +rnnp_tofede['num_section'] = rnnp_tofede.ccosec +rnnp_tofede['num_parcelle'] = rnnp_tofede.par_id +# rnnp_tofede['code_mfu1'] = None # Récup après, cf. dfgere +# rnnp_tofede['code_mfu2'] = None # Récup après, cf. dfgere +# rnnp_tofede['type_prop'] = None # Récup après, cf. parcb +# rnnp_tofede['mesure_compens'] = None # Récup après, cf. dfgere +rnnp_tofede['surf_ore_m2'] = 0 +rnnp_tofede['date_debut_ore'] = None +rnnp_tofede['date_fin_ore'] = None +rnnp_tofede['doc_foncier'] = 0 +rnnp_tofede['source_doc_foncier'] = 0 +rnnp_tofede['parc_gestion_rnx'] = 0 +rnnp_tofede['parc_gestion_rnx'] = 1 +rnnp_tofede['surf_parc_maitrise_m2'] = rnnp_tofede.dcntpa +rnnp_tofede['source_geom_parc_nature'] = 2 +rnnp_tofede['source_geom_parc_annee'] = None # Récup après, cf. parcb +rnnp_tofede['echelle_num_parc'] = 0 +rnnp_tofede['source_surf_parc'] = 1 # OK +rnnp_tofede['date_maj_parcelle'] = None # Récup après, cf. parcb +rnnp_tofede['bnd'] = None # Récup après, cf. dfgere +rnnp_tofede['pour_part'] = None # Récup après, cf. dfgere +rnnp_tofede['domaine_public'] = 0 +rnnp_tofede['id_proprietaire'] = None # Récup après, cf. parcb + + +# RNNP_ens[(~RNNP_ens.num_parcel.isna())&(RNNP_ens.num_parcel!=RNNP_ens.parcelle)] + +###### Récupération des données conservatoires +sqlsit = '''SELECT * FROM %s.%s WHERE date_fin is NULL OR date_fin >= '%i-01-01' ;'''%('sites','c_sites_zonages',annee) +dfsite = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con) + +# Isolation de tous les sites CEN (gestion/assist/mili) +dfsite[ + ~dfsite.code_site.duplicated()|\ + (dfsite.code_site.duplicated() & dfsite.type_zonage == 'ZI') +] + + +rngl = dfsite[(dfsite.code_site=='RNGL')&(dfsite.type_zonage=='ZI')] + +sqlfon = 'SELECT * FROM %s.%s'%('foncier','c_animation_fonciere_sites') +dffonc = gpd.read_postgis(sql=sqlfon,geom_col='geom',con=con) + +##### Données identifiées pour les remontées fédé +ffede = '/home/colas/Documents/tmp/FONCIER_FEDE/extract_parc_for_fede.gpkg' +tofede = gpd.read_file(ffede) +tofede.rename_geometry('geom', inplace=True) +cols_tofede = tofede.columns +cols_join = [*cols_parcx[cols_parcx.isin(cols_tofede)]] +tofede = pd.concat([tofede,parcx[cols_join]],ignore_index=True) +tofede.reset_index(drop=True,inplace=True) +if 'type_prop' in tofede.columns: + tofede.drop(columns=['type_prop'], inplace=True) +# tofede['id_site_cen_parc'] = 0 +tofede['id_site_fcen_parc'] = None # OK +tofede['insee_dep'] = tofede.par_id.str[:2] +tofede['insee_com'] = tofede.par_id.str[2:5] +tofede['num_section'] = tofede.ccosec +tofede['num_parcelle'] = tofede.par_id +# tofede['code_mfu1'] = None # Récup après, cf. dfgere +# tofede['code_mfu2'] = None # Récup après, cf. dfgere +# tofede['type_prop'] = None # Récup après, cf. parcb +# tofede['mesure_compens'] = None # Récup après, cf. dfgere +tofede['surf_ore_m2'] = 0 +tofede['date_debut_ore'] = None +tofede['date_fin_ore'] = None +tofede['doc_foncier'] = 0 +tofede['source_doc_foncier'] = 0 +tofede['parc_gestion_rnx'] = 0 +tofede.loc[tofede.geom.intersects(rngl.unary_union),'parc_gestion_rnx'] = 1 +tofede['surf_parc_maitrise_m2'] = tofede.dcntpa +tofede['source_geom_parc_nature'] = 2 +# tofede['source_geom_parc_annee'] = None # Récup après, cf. parcc +tofede['echelle_num_parc'] = 0 +tofede['source_surf_parc'] = 1 # OK +# tofede['date_maj_parcelle'] = None # Récup après, cf. parcc +# tofede['bnd'] = None # Récup après, cf. dfgere +# tofede['pour_part'] = None # Récup après, cf. dfgere +tofede['domaine_public'] = 0 +# tofede['id_proprietaire'] = None # Récup après, cf. parcb + + + +# c_fede = tofede[['par_id','geom']].copy() +# c_fede.geom = tofede.representative_point() +# pb_parcel = dffonc[~dffonc.intersects(c_fede.unary_union)] +# c_fede.to_file('/home/colas/Documents/tmp/FONCIER_FEDE/centroid_parcelle_for_fede.gpkg', driver="GPKG") +# pb_parcel.to_file('/home/colas/Documents/tmp/FONCIER_FEDE/polys_NoIntersect_parcelles.gpkg', driver="GPKG") + +###### Récupération des données conservatoires +# sqlsit = 'SELECT * FROM %s.%s'%('sites','c_sites_zonages') +# dfsite = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con) + +sqlfon = 'SELECT * FROM %s.%s'%('foncier','c_animation_fonciere_sites') +dffonc = gpd.read_postgis(sql=sqlfon,geom_col='geom',con=con) +# dffonc.drop(columns=[], inplace=True) + +#### 1 geom devient 2 +# id 7037 +dffonc.loc[dffonc.id==7037, ['code_dep','code_com','section','numero','geom']] = tofede.loc[tofede.par_id=='381000000A2233',['insee_dep','insee_com','num_section','dnupla','geom']].values[0] +add_p = gpd.GeoDataFrame( + [tofede.loc[tofede.par_id=='381000000A2234',['insee_dep','insee_com','num_section','dnupla','geom']].values[0]], + columns=['code_dep','code_com','section','numero','geom'] + ) +oth_cols = dffonc.columns[~dffonc.columns.isin(add_p.columns)].drop('id') +add_p[[*oth_cols]] = dffonc.loc[dffonc.id==7037, [*oth_cols]].values[0] +add_p['id'] = 0 +add_p.set_geometry('geom', crs=2154, inplace=True) +dffonc = pd.concat([dffonc,add_p]) +# id 1932 +dffonc.loc[dffonc.id==1932, ['code_dep','code_com','section','numero','geom']] = tofede.loc[tofede.par_id=='38140000AM0190',['insee_dep','insee_com','num_section','dnupla','geom']].values[0] +add_p = gpd.GeoDataFrame( + [tofede.loc[tofede.par_id=='38140000AM0189',['insee_dep','insee_com','num_section','dnupla','geom']].values[0]], + columns=['code_dep','code_com','section','numero','geom'] + ) +oth_cols = dffonc.columns[~dffonc.columns.isin(add_p.columns)].drop('id') +add_p[[*oth_cols]] = dffonc.loc[dffonc.id==1932, [*oth_cols]].values[0] +add_p['id'] = 0 +add_p.set_geometry('geom', crs=2154, inplace=True) +dffonc = pd.concat([dffonc,add_p]) +#### + +dropline = dffonc[dffonc.id.isin([7144,7106,7107,7108,7109,7110,6666])].index.values # drop geometrie dupliquée +dffonc.drop([*dropline], inplace=True) +dffonc.loc[dffonc.numero.str.len()==1,'numero'] = '000'+dffonc.loc[dffonc.numero.str.len()==1,'numero'] +dffonc.loc[dffonc.numero.str.len()==2,'numero'] = '00'+dffonc.loc[dffonc.numero.str.len()==2,'numero'] +dffonc.loc[dffonc.numero.str.len()==3,'numero'] = '0'+dffonc.loc[dffonc.numero.str.len()==3,'numero'] +dffonc.loc[dffonc.section.str.len()==1,'section'] = '0'+dffonc.loc[dffonc.section.str.len()==1,'section'] +dffonc.loc[dffonc.id == 7101,['code_dep','code_com','section','numero']] = ['38','037','0D','0130'] +dffonc.loc[dffonc.id == 7102,['code_dep','code_com','section','numero']] = ['38','037','0D','0131'] +dffonc.loc[dffonc.id == 7103,['code_dep','code_com','section','numero']] = ['38','037','0D','0132'] +dffonc.loc[dffonc.id == 7104,['code_dep','code_com','section','numero']] = ['38','037','0D','0133'] +dffonc.loc[dffonc.id == 7105,['code_dep','code_com','section','numero']] = ['38','037','0D','0134'] +dffonc.loc[dffonc.id.isin([ + 61,62,70,69,68,72,67,66,63,64,60,57,56,59,6,7,8,9,65,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, + 26,27,28,29,30,58,31,32,33,34,35,36,37,38,39,40,41,55,54,42,43,71,44,45,46,47,48,49,50,51,52,53,4,5, + 1350,1349,1345,1346,1347,1352,1353,1348,1354,1351, # ccopre == 121 + 2433,2447,2449,2432,2448 # ccopre == 274 + ]),'code_com'] = '479' +dffonc.loc[dffonc.id.isin([ + 2604,2605,2535,2599,2603,2602,2601,2639,2534,2536,2537,2532,2538,2540,2539,2635,2641,2533,2563,2562, + 2561,2556,2560,2555,2597,2607,2614,2591,2622,2623,2620,2621,2619,2593,2592,2550,2547,2544,2551,2553, + 2552,2545,2541,2546,2548,2549,2543,2583,2566,2596,2608,2567,2559,2584,2568,2615,2629,2618,2631,2571, + 2609,2572,2590,2632,2569,2570,2595,2558,2636,2633,2606,2574,2573,2564,2579,2580,2577,2578,2565,2557, + 2576,2581,2613,2598,2626,2627,2625,2611,2624,2642,2554,2628,2582,2585,2610,2542,2594,2589,2575,2616, + 2612,2600,2617,2630,2587,2588,2586,2638,2637,2640,2634 + ]),'code_com'] = '292' +dffonc.loc[dffonc.id.isin([7114,7115,7116,7117,7118,7119,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135]),'code_com'] = '064' +dffonc.loc[dffonc.id.isin([7120,7121,7122,7123,7124,7125]),'code_com'] = '401' +dffonc.loc[dffonc.id.isin([7136,7137,7138,7139]),['code_com','section']] = ['341','0B'] +dffonc.loc[dffonc.id.isin([7142,7143]),'code_com'] = '052' +dffonc.loc[dffonc.id.isin([1010288,1041363]),'code_com'] = '517' +dffonc.loc[dffonc.id.isin([1182503,1182640]),'code_com'] = '147' +dffonc.loc[dffonc.id.isin([1243657]),'code_com'] = '448' +dffonc.loc[dffonc.id.isin([1629034,1629035]),'code_com'] = '257' +dffonc.loc[dffonc.code_dep.isna(),'code_dep'] = '38' + +##### +dffonc.loc[dffonc.id==113, 'numero'] = '0389' +dffonc.drop(dffonc[dffonc.id==114].index[0],inplace=True) +dffonc.loc[dffonc.id==151, 'numero'] = '0369' +dffonc.loc[dffonc.id==591, 'numero'] = '0705' # numero == '0542' est un ancien nom + +dffonc.sort_values(['code_dep','code_com','section','numero'],inplace=True) +dffonc['par_id'] = dffonc.code_dep + dffonc.code_com + '___' + dffonc.section + dffonc.numero +dffonc['parid_start'] = dffonc.code_dep + dffonc.code_com +dffonc['parid_end'] = dffonc.section + dffonc.numero +dffonc.loc[dffonc.id.isin([1634,1638,1606,1607,1608]),'par_id'] = dffonc.loc[dffonc.id.isin([1634,1638,1606,1607,1608]),'par_id'].replace('___','227',regex=True) +dffonc.loc[dffonc.id.isin([1495,1673,1675]),'par_id'] = dffonc.loc[dffonc.id.isin([1495,1673,1675]),'par_id'].replace('___','000',regex=True) +# dffonc.loc[dffonc.id.isin([1495,1675]),'par_id'] + +# Jointure by parid_start & parid_end +dfpar_id = tofede[['par_id']].copy() +dfpar_id['parid_start'] = dfpar_id.par_id.str[:5] +dfpar_id['parid_end'] = dfpar_id.par_id.str[-6:] +lst_parid_ok = [1495,1634,1638,1673,1675,1606,1607,1608] # liste parcelle par_id reconstitué entier +parid_ok = dffonc.loc[dffonc.id.isin(lst_parid_ok)] +parid_join = pd.merge( + dffonc.loc[~dffonc.id.isin(lst_parid_ok)].drop(columns=['par_id']), + dfpar_id.loc[~dfpar_id.par_id.isin(parid_ok.par_id)], + on = ['parid_start','parid_end'], + how='left' + ) +parid_join.geom = parid_join.buffer(0) +parid_ok = pd.concat([ parid_ok, parid_join[~parid_join.par_id.isna()] ]) +parid_ok.drop(columns=['parid_start','parid_end'], inplace=True) +parid_na = parid_join[parid_join.par_id.isna()] + +# Jointure by centroid +dfpar_id = tofede.loc[~tofede.par_id.isin(parid_ok.par_id),['par_id','geom']].copy() +dfpar_id.geom = dfpar_id.representative_point() +parid_join = gpd.sjoin(parid_na.drop(columns=['par_id']),dfpar_id,op='contains', how='left').drop(columns=['index_right']) +parid_join_ok = parid_join[ [str(x).startswith(y) and z in str(x) for x,y,z in zip(parid_join['par_id'], parid_join['parid_start'],parid_join['section'])] ].copy() +# parid_join[ [str(x).endswith(z) for x,y,z in zip(parid_join['par_id'], parid_join['parid_start'],parid_join['parid_end'])] ] +parid_join_ok.drop(columns=['parid_start','parid_end'], inplace=True) + +# Couches annimation foncière finale +parid_OK = pd.concat([parid_ok,parid_join_ok]) +parid_NA = dffonc[~dffonc.id.isin(parid_OK.id.unique())] + +# Récupération des info BND +sqlmod = 'SELECT * FROM %s'%('animation_fonciere_pour_fcen') +dfmodf = gpd.read_postgis(sql=sqlmod,geom_col='geom',con=con) +del dfmodf['id_0'] + +dffon_OK = pd.merge( + parid_OK, + dfmodf[['id','Parc_MCE','docs_off_num','Nbr_lots_BND','partiel_conv_ou_acq']], + on='id',how='left') + + +# +lst_prop = ['Acquis par le CEN ou CREN','ACQUIS&CONVENTION', 'PARTIELLEMENT MAITRISE'] +lst_conv = ["CONVENTION D'USAGE",'CONVENTION'] +lst_gere = ['BAIL EMPHYTEOTIQUE', *lst_conv, *lst_prop] +dfgere = dffon_OK[ + # ( + # (dffon_OK.geom.intersects(dfsite[(dfsite.type_zonage=='ZI')|(dfsite.type_site=='MC')].unary_union))| + # (~dffon_OK.Parc_MCE.isna())|(~dffon_OK.Nbr_lots_BND.isna())| + # (~dffon_OK.partiel_conv_ou_acq.isna())|(~dffon_OK.docs_off_num.isna()) + # ) + # & + (dffon_OK.maitrise.isin(lst_gere)) + ].copy() +dfautr = dffon_OK[~dffon_OK.par_id.isin(dfgere.par_id)].copy() +# id_site_cen_parc +dfgere['id_site_cen_parc'] = dfgere.id +# code_mfu1 & code_mfu2 +dfgere['code_mfu1'] = None +dfgere['code_mfu2'] = None +dfgere.loc[dfgere.maitrise.isin(lst_prop),'code_mfu1'] = 'P' +dfgere.loc[dfgere.maitrise.isin(lst_conv),'code_mfu1'] = 'C' +dfgere.loc[dfgere.maitrise == 'BAIL EMPHYTEOTIQUE','code_mfu1'] = 'L' +dfgere.loc[dfgere.maitrise.isin(lst_prop),'code_mfu2'] = 'P1' +dfgere.loc[dfgere.maitrise.isin(lst_conv),'code_mfu2'] = 'C7' +dfgere.loc[dfgere.maitrise == 'BAIL EMPHYTEOTIQUE','code_mfu2'] = 'L1' +# BND +dfgere['bnd'] = 0 +dfgere.loc[dfgere.indivision=='BND','bnd'] = 1 +tmp = pd.DataFrame(columns=dfgere.columns) +for i,row in dfgere.loc[~dfgere.Nbr_lots_BND.isna()].iterrows(): + # if i==0 : tmp = row.to_list() + tmp = tmp.append([row]*(int(row.Nbr_lots_BND)-1)) +dfgere = pd.concat([dfgere,tmp]).sort_index() +# MC +dfgere['mesure_compens'] = 0 +dfgere.loc[dfgere.Parc_MCE=='OUI','mesure_compens'] = 1 +# pour_part +dfgere['pour_part'] = 0 +dfgere.loc[dfgere.maitrise=='PARTIELLEMENT MAITRISE','pour_part'] = 1 + + +# Parcelles millitaire +dic = { + 'ccosec':'section', +} +dfmili = dfsite[dfsite.type_site=='MILI'].copy() +sqlmil = "SELECT * FROM %s.%s WHERE ST_Contains('SRID=2154;%s',geom)" % ('cadastre', 'parcelles_38',dfmili.buffer(10).unary_union) +miliparc = gpd.read_postgis(sqlmil,con=con_fon) +miliparc['code_dep'] = miliparc.codcom.str[:2] +miliparc['code_com'] = miliparc.codcom.str[2:] +dfmili = miliparc[['par_id','geom','ccosec','code_dep','code_com']].copy() +dfmili.rename(columns=dic, inplace=True) +dfmili['id_site_cen_parc'] = 0 +dfmili['code_mfu1'] = 'C' +dfmili['code_mfu2'] = 'C12' +dfmili['bnd'] = 0 +dfmili['mesure_compens'] = 0 +dfmili['pour_part'] = 0 + +dfgere = pd.concat([dfgere,dfmili]) + + +# from pycen import update_to_sql +# tmpdffonc = dffonc[['id','nom_propri']] +# tmpdffonc["nom_propri"] = tmpdffonc["nom_propri"].str.strip() +# tmpdffonc.loc[tmpdffonc.nom_propri== '',"nom_propri"] = None +# update_to_sql(df=tmpdffonc, con=con, table_name='c_animation_fonciere_sites', schema_name='foncier', key_name='id', geom_col=None) + +##################### +##### Parcelles #####2017380474000AN +# Ne considère pas Platière, ni le Grand Lemps +##################### + + + +# Récupération des infos dans la bdd bd_cen (#Foncier) +# RNNP +sql = '''SELECT DISTINCT ON (t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre,t1.ccosec,t1.dnupla) + t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre, + t1.ccosec,t1.dnupla::text, STRING_AGG(t6.gtoper::text,',') gtoper, STRING_AGG(t6.ccogrm::text,',') ccogrm, + STRING_AGG(CONCAT(t6.gtoper::text||COALESCE('_'||t6.ccogrm::text,'')),',') type_prop, STRING_AGG(t6.ddenom::text,',') ddenom +FROM {sch}."{t1}" t1 +JOIN ({sch}."lots_38" t2 + JOIN ({sch}."cadastre_38" t3 + JOIN ({sch}."cptprop_38" t4 + JOIN ({sch}."r_prop_cptprop_38" t5 + JOIN {sch}."proprios_38" t6 USING (dnuper)) + USING (dnupro)) + USING (dnupro)) + USING (lot_id)) +USING (par_id) +WHERE par_id IN ('{par_ids}') GROUP BY 1,2,3,4,5,6'''.format( + sch='cadastre', + t1='parcelles_38', + par_ids="','".join(lst_par_id_rnnp)) +parca1 = gpd.read_postgis(text(sql),con=con_fon) +parca1.to_file('/home/colas/Documents/tmp/FONCIER_FEDE/PLATIERE/parcelles_RNNP_tofede.gpkg', driver='GPKG') + + +# Récupération des infos dans la bdd bd_cen (#Foncier) +sql = '''SELECT DISTINCT ON (t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre,t1.ccosec,t1.dnupla) +t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre, +t1.ccosec,t1.dnupla::text, STRING_AGG(t6.gtoper::text,',') gtoper, STRING_AGG(t6.ccogrm::text,',') ccogrm, +STRING_AGG(CONCAT(t6.gtoper::text||COALESCE('_'||t6.ccogrm::text,'')),',') type_prop, STRING_AGG(t6.ddenom::text,',') ddenom +FROM {sch}."{t1}" t1 +JOIN ({sch}."lots_38" t2 + JOIN ({sch}."cadastre_38" t3 + JOIN ({sch}."cptprop_38" t4 + JOIN ({sch}."r_prop_cptprop_38" t5 + JOIN {sch}."proprios_38" t6 USING (dnuper)) + USING (dnupro)) + USING (dnupro)) + USING (lot_id)) +USING (par_id) +WHERE par_id IN ('{par_ids}') GROUP BY 1,2,3,4,5,6'''.format( + sch='cadastre', + t1='parcelles_38', + par_ids="','".join(tofede.par_id)) +parca2 = gpd.read_postgis(text(sql),con=con_fon) + +cols_parca2 = parca2.columns +cols_join = [*cols_parcx[cols_parcx.isin(cols_parca2)]] +parcb = pd.concat([ + parca1, + parca2[~parca2.par_id.isin(parcx.par_id)], + parcx[cols_join] +]) +# n = 10 +# while n!= 0: +# parcb.type_prop = parcb.type_prop.replace(',,',',',regex=True) +# n = n-1 +parcb.loc[parcb.type_prop.str.contains('2_0'),['type_prop','gtoper','ccogrm']] = ['2_0_MIX','2','0'] +parcb.loc[parcb.type_prop.isin(['2_7,2_3,','2_7,2_3,2_3,']),['type_prop','gtoper','ccogrm']] = ['2_0_MIX','2','0'] +parcb.loc[parcb.type_prop.str.contains('2_9'),['type_prop','gtoper','ccogrm']] = ['2_9_PUB','2','9'] +parcb.loc[parcb.type_prop.str.contains('2_8'),['type_prop','gtoper','ccogrm']] = ['2_8_PRI','2','8'] +parcb.loc[parcb.type_prop.str.contains('2_7'),['type_prop','gtoper','ccogrm']] = ['2_7_PRI','2','7'] +parcb.loc[parcb.type_prop.str.contains('2_6'),['type_prop','gtoper','ccogrm']] = ['2_6_PRI','2','6'] +parcb.loc[parcb.type_prop.str.contains('2_5'),['type_prop','gtoper','ccogrm']] = ['2_5_PUB','2','5'] +parcb.loc[parcb.type_prop.str.contains('2_4'),['type_prop','gtoper','ccogrm']] = ['2_4_COM','2','4'] +parcb.loc[parcb.type_prop.str.contains('2_3'),['type_prop','gtoper','ccogrm']] = ['2_3_PUB','2','3'] +parcb.loc[parcb.type_prop.str.contains('2_2'),['type_prop','gtoper','ccogrm']] = ['2_2_PUB','2','2'] +parcb.loc[parcb.type_prop.str.contains('2_1'),['type_prop','gtoper','ccogrm']] = ['2_1_PUB','2','1'] +parcb.loc[parcb.type_prop.str.contains('1,1'),['type_prop','gtoper','ccogrm']] = ['1_PRI','1',None] +parcb.loc[parcb.type_prop == '1',['type_prop','gtoper','ccogrm']] = ['1_PRI','1',None] +lstterm_cen38 = ["CONSERVATOIRE D'ESPACES NATURELS ISERE","AVENIR CONSERVATOIRE DES ESPACES NATURELS DE L'ISERE","CONSERVATOIRE D'ESPACES NATURELS ISERE AVENIR DIT CEN ISERE"] +lstterm_cenra = ["CONSERVATOIRE RHONE ALPES DES ESPACES NATURELS","CONSERVATOIRE RH ALP ESPACES NATURELS"] +parcb.loc[parcb.ddenom.isin([*lstterm_cen38,*lstterm_cenra]),['type_prop','gtoper','ccogrm']] = ['3_CEN','3',None] +# parcb.loc[parcb.type_prop.str.len()==3,'gtoper'] = parcb[parcb.type_prop.str.len()==3].type_prop.str[0] +# parcb.loc[parcb.type_prop.str.len()==3,'ccogrm'] = parcb[parcb.type_prop.str.len()==3].type_prop.str[-1] +parcb.loc[parcb.dnupla.str.len()==1,'dnupla'] = '000' + parcb.loc[parcb.dnupla.str.len()==1,'dnupla'] +parcb.loc[parcb.dnupla.str.len()==2,'dnupla'] = '00' + parcb.loc[parcb.dnupla.str.len()==2,'dnupla'] +parcb.loc[parcb.dnupla.str.len()==3,'dnupla'] = '0' + parcb.loc[parcb.dnupla.str.len()==3,'dnupla'] +parcb['id_proprietaire'] = 0 +parcb.loc[parcb.ddenom.str.contains('|'.join(lstterm_cenra),na=False),'id_proprietaire'] = '28' + + +# Récupération des infos dans la VM cadastre (#Foncier) +# sql = '''SELECT geo_parcelle, substring(geo_parcelle from 1 for 2)||substring(geo_parcelle from 4 for 12) par_id, geom +# FROM "38_202207".geo_parcelle''' +# 38517000AW0032 # par_id +# 380517000AW0032 # geo_parcelle +sql = '''SELECT +substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, +t1.annee, t1.update_dat, t1.geom +FROM "{sch}"."{t1}" t1 +WHERE substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) IN ('{par_ids}') +'''.format( + sch='38_202207', + t1='v_geo_parcelle', + par_ids="','".join(tofede[~tofede.par_id.isin(parcx.par_id)].par_id)) +parcc0 = gpd.read_postgis(text(sql),con=con_vm) +sql = '''SELECT +substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, +t1.annee, t1.update_dat, t1.geom +FROM "{sch}"."{t1}" t1 +WHERE substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) IN ('{par_ids}') +'''.format( + sch='26_202007', + t1='v_geo_parcelle', + par_ids="','".join(tofede[~tofede.par_id.isin(parcx.par_id)].par_id)) +parcc1 = gpd.read_postgis(text(sql),con=con_vm) + + + +cols_parcc = parcc0.columns +cols_join = [*cols_parcx[cols_parcx.isin(cols_parcc)]] +parcc = pd.concat([parcc0,parcc1,parcx[cols_join]]) +parcc['source_geom_parc_annee'] = parcc.annee +parcc['date_maj_parcelle'] = parcc.update_dat + +l1 = dfgere.columns[dfgere.columns.isin(lst_cols_parc)] +l2 = tofede.columns[tofede.columns.isin(lst_cols_parc)] +l3 = parcb.columns[parcb.columns.isin(lst_cols_parc)] +l4 = parcc.columns[parcc.columns.isin(lst_cols_parc)] +p1 = dfgere[['par_id',*l1]].copy() +p2 = tofede[['par_id',*l2,'geom']].copy() +p3 = parcb[['par_id',*l3]].copy() +p4 = parcc[['par_id',*l4]].copy() +dfs = [p1,p2,p3,p4] + +df_parcs = reduce(lambda left,right: pd.merge(left,right,on=['par_id'], + how='left'), dfs) +for par_id in df_parcs[df_parcs.par_id.duplicated(keep=False)].par_id.unique(): + x = df_parcs[df_parcs.par_id==par_id].shape[0] + df_parcs.loc[df_parcs.par_id==par_id,'surf_parc_maitrise_m2'] = round(df_parcs.loc[df_parcs.par_id==par_id,'surf_parc_maitrise_m2']/x).astype(int) +df_parcs.date_maj_parcelle = df_parcs.date_maj_parcelle.astype(str) + +gdf_tofede = df_parcs[['id_site_fcen_parc','num_parcelle','date_maj_parcelle','geom']].copy() +gdf_tofede = gdf_tofede.set_geometry('geom', crs=2154) +gdf_tofede.rename(columns={ + 'id_site_fcen_parc':'id_fcen', + 'num_parcelle':'num_parc', + 'date_maj_parcelle':'date_maj_p' +},inplace=True) +gdf_tofede.to_file('/home/colas/Documents/tmp/FONCIER_FEDE/SEND/Parcelles_CEN_38_%i.shp' % annee) + +df_tofede = df_parcs[lst_cols_parc].copy() +df_tofede.loc[df_tofede.date_debut_ore.isna(),'date_debut_ore'] +df_tofede.to_csv('/home/colas/Documents/tmp/FONCIER_FEDE/SEND/Parcelles_CEN_38_%i.csv' % annee) + + + + + + + +# parcb.loc[(parcb.ddenom.str.contains("AVENIR CONSERVATOIRE DES ESPACES NATURELS DE L'ISERE",case=False))].ddenom.unique() +# parcb.loc[(parcb.ddenom.str.contains('CONSERVATOIRE|CEN',case=False))&(parcb.ddenom.str.contains('ISERE',case=False))].ddenom.unique() +# parcb.loc[parcb.gtoper.isin(['1,1','2,1','1,2','1,1,1']),'gtoper'] = '1' +# parcb.loc[parcb.gtoper.isin(['1,1,1,1','2,1,1,1,1,1,1','2,1,1,1,1,1,1,1,1,1,1,1,1,1,1','2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1']),'gtoper'] = '1' +# parcb.loc[parcb.gtoper.isin(['2,2']),'gtoper'] = '2' +# parcb.loc[parcb.gtoper=='1','ccogrm'] = None + +parcb[parcb.par_id.duplicated(keep=False)] + +dic_col = { + 'code_dep' : 'insee_dep', + 'section' : 'num_section', + 'num_unique_v2' : 'num_parcelle', # ???? +} + +dic_parc = { + 'id':'id_site_cen_parc', +} +lst_prop = ['Acquis par le CEN ou CREN','ACQUIS&CONVENTION', 'PARTIELLEMENT MAITRISE'] +lst_conv = ["CONVENTION D'USAGE",'CONVENTION'] +lst_gere = ['BAIL EMPHYTEOTIQUE', *lst_conv, *lst_prop] + +sql = '''SELECT DISTINCT +t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre, +t1.ccosec,t1.dnupla::text, t6.gtoper::text, t6.ccogrm::text +FROM {sch}."{t1}" t1 +JOIN ({sch}."lots_38" t2 + JOIN ({sch}."cadastre_38" t3 + JOIN ({sch}."cptprop_38" t4 + JOIN ({sch}."r_prop_cptprop_38" t5 + JOIN {sch}."proprios_38" t6 USING (dnuper)) + USING (dnupro)) + USING (dnupro)) + USING (lot_id)) +USING (par_id) +WHERE ST_Within(t1.geom, 'SRID=2154;{geom}') '''.format( + sch='cadastre', + t1='parcelles_38', + geom=dffonc.unary_union) + +sql = '''SELECT DISTINCT +t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre, +t1.ccosec,t1.dnupla::text, t6.gtoper::text, t6.ccogrm::text +FROM {sch}."{t1}" t1 +JOIN ({sch}."lots_38" t2 + JOIN ({sch}."cadastre_38" t3 + JOIN ({sch}."cptprop_38" t4 + JOIN ({sch}."r_prop_cptprop_38" t5 + JOIN {sch}."proprios_38" t6 USING (dnuper)) + USING (dnupro)) + USING (dnupro)) + USING (lot_id)) +USING (par_id) +WHERE ST_Intersects(t1.geom, 'SRID=2154;{geom}') '''.format( + sch='cadastre', + t1='parcelles_38', + geom=dffonc.unary_union) +sql = sql + " AND t1.par_id LIKE ANY (array['%s']);"% "','".join(dffonc[~dffonc.par_id.isna()].par_id) +parc = gpd.read_postgis(sql=text(sql),geom_col='geom',con=con_fon) +parc.loc[parc.dnupla.str.len()==1,'dnupla'] = '000' + parc.loc[parc.dnupla.str.len()==1,'dnupla'] +parc.loc[parc.dnupla.str.len()==2,'dnupla'] = '00' + parc.loc[parc.dnupla.str.len()==2,'dnupla'] +parc.loc[parc.dnupla.str.len()==3,'dnupla'] = '0' + parc.loc[parc.dnupla.str.len()==3,'dnupla'] + +# parc = gpd.read_file('/home/colas/Documents/tmp/FONCIER_FEDE/extract_parc_for_fede.gpkg') +# parc.rename_geometry('geom',inplace=True) +# parc.to_file('/home/colas/Documents/tmp/FONCIER_FEDE/extract_parc_for_fede.gpkg',driver='GPKG') + +parc['semi_par_id'] = parc.par_id.str[:5] + parc.ccosec + parc.dnupla + + +###################### +# HARMONISATION couche FONCIER +dffonc_b1 = dffonc_b5 = dffonc_b20 = dffonc_b50 = dffonc.copy() +dffonc_b1.geom = dffonc_b1.buffer(1) +dffonc_b20.geom = dffonc_b20.buffer(20) +dffonc_b50.geom = dffonc_b50.buffer(50) +dffonc_b5.geom = dffonc_b5.buffer(5) +tmp = gpd.sjoin(dffonc_b50[~dffonc_b50.numero.isna()],parc,op='contains',how='left') +tmp2 = tmp[tmp.numero == tmp.dnupla] +crsp = tmp2.id.tolist() +tmp[~tmp.id.isin(crsp)] + + +tmp[tmp.id.duplicated(keep=False)] +############## +############## + + + +dfparc = parc[['par_id','gtoper','ccogrm','geom']] + +dfgere = dffonc[dffonc.maitrise.isin(lst_gere)].copy() # parcelles gérée +dfgere['code_mfu1'] = None +dfgere['code_mfu2'] = None +dfgere.loc[dfgere.maitrise.isin(lst_prop),'code_mfu1'] = 'P' +dfgere.loc[dfgere.maitrise.isin(lst_conv),'code_mfu1'] = 'C' +dfgere.loc[dfgere.maitrise == 'BAIL EMPHYTEOTIQUE','code_mfu1'] = 'L' +dfgere.loc[dfgere.maitrise.isin(lst_prop),'code_mfu2'] = 'P1' +dfgere.loc[dfgere.maitrise.isin(lst_conv),'code_mfu2'] = 'C7' +dfgere.loc[dfgere.maitrise == 'BAIL EMPHYTEOTIQUE','code_mfu2'] = 'L1' + +dfgere.loc[dfgere.maitrise == 'PARTIELLEMENT MAITRISE','bnd'] = 1 +dfgere.loc[dfgere.maitrise != 'PARTIELLEMENT MAITRISE','bnd'] = 0 + +# Parcelles d'interventions +dftmp = dffonc[~dffonc.maitrise.isin(lst_gere)].copy() # couche temp des parcelles non-gérés +sittmp = dfsite[(dfsite.assist_cnv)&(dfsite.assist_col)] # sites en assistance +intersect = dftmp.intersects(sittmp.unary_union) # Intersection [couche temp] VS [sites assist] +dfintr = dftmp[intersect] # parcelles d'interventions + +# Parcelles millitaire +dic = { + 'ccosec':'num_section', + 'par_id':'num_parcelle' +} +dfmili = dfsite[dfsite.type_site=='MILI'].copy() +sqlmil = "SELECT * FROM %s.%s WHERE ST_Contains('SRID=2154;%s',geom)" % ('cadastre', 'parcelles_38',dfmili.buffer(10).unary_union) +miliparc = gpd.read_postgis(sqlmil,con=con_fon) +miliparc['code_dep'] = miliparc.codcom.str[:2] +miliparc['code_com'] = miliparc.codcom.str[2:] +dfmili = miliparc[['par_id','geom','ccosec','code_dep','code_com']].copy() +dfmili.rename(columns=dic, inplace=True) + + + +# All parcelles +df = pd.concat([dfgere,dfintr,dfmili], ignore_index=True) +keep_col = df.columns[df.columns.isin(lst_cols_parc)] +df = df[keep_col] +df.reset_index(drop=True, inplace=True) + + +sql = '''SELECT par_id,geom, dcbtpa,typprop_id FROM %s."%s" WHERE ST_Intersects(geom, 'SRID=2154;%s') ''' % ('cadastre', 'parcelles_38',df.representative_point().unary_union) +parc = gpd.read_postgis(sql=sql,geom_col='geom',con=con_fon) + +sql = '''SELECT geo_parcelle,geo_section,idu,proprietaire,proprietaire_info,geom FROM %s."%s" WHERE ST_Intersects(geom, 'SRID=2154;%s') ''' % ('ref_cadastre', 'parcelle_info',df.representative_point().unary_union) +prop = gpd.read_postgis(sql=sql,geom_col='geom',con=con) + + + +df['insee_com'] = df.code_dep + df.code_com +tmp = df[['id','geom']] +gpd.sjoin(df,parc[['par_id','geom']],how='left',op='intersects') +# df['num_parcelle'] = + +df['surf_ore_m2'] = 0 +df['date_debut_ore'] = None +df['date_fin_ore'] = None +lst_doc = [] +df.loc[df.id.isin(lst_doc),'doc_foncier'] = 1 +df.loc[~df.id.isin(lst_doc),'doc_foncier'] = 0 +df.loc[df.doc_foncier == 1,'source_doc_foncier'] = 'CEN Isère' +df['pour_part'] = 0 +df['domaine_public'] = 0 +df['id_proprietaire'] = 0 + + +df['parc_gestion_rnx'] = None # ????????????????? +df['source_geom_parc_nature'] = 2 +df['source_geom_parc_annee'] = 2017 +df['source_surf_parc'] = 1 + + + + + + +# 3800010000A0001 +# 38566AK00981000 +# 596060000A0012 +# 073490000C0103 +########### +### Autre # +########### +df = pd.DataFrame() +df['id_cen'] = 14 +df['id_site_cen'] = '' diff --git a/0_FONCIER/TdB_FEDE/parcelles_TdB_fede2.py b/0_FONCIER/TdB_FEDE/parcelles_TdB_fede2.py new file mode 100644 index 0000000..4d0c4bf --- /dev/null +++ b/0_FONCIER/TdB_FEDE/parcelles_TdB_fede2.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +from pycen import con_bdcen, con_cad +from os import getlogin +from os.path import join +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import geopandas as gpd +import pandas as pd + +user = 'postgres' +pwd = 'postgres' +adr = '172.17.0.2' +base = 'postgres' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con_vm = create_engine(url) + +annee = 2023 + +PATH = '/media/{login}/SRV/FICHIERS/OUTILS/BASES DE DONNEES/BILAN_FEDE_CEN/{an}/' +path_source = join(PATH,'Docs_SIG_joints{an}/').format(login=getlogin(),an=annee) +dico_data = 'Dico_DATA_sites_CEN_v%i_v1.xlsx' % annee +sheet_par = 'parcelles_cen_xx_%i' % annee +sheet_sit = 'sites_cen_xx_%i' % annee + +# lv_cen = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='A:D',nrows=25) +# lv_typmaitr = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='F:K',nrows=23) +# lv_typmilie = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='M:N',nrows=16) +# lv_echelnum = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='P:Q',nrows=8) +# lv_typhab = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=13, usecols='P:Q',nrows=5) +# lv_sourcegeom = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=1, usecols='S:U',nrows=12) +# lv_lienres = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=20, usecols='M:N',nrows=4) +# lv_typprop = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=45, usecols='A:H',nrows=12) +# lv_codgeol = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs (copy&modif)',header=36, usecols='M:T',nrows=19) +lst_cols_parc = pd.read_excel(path_source+dico_data,sheet_name=sheet_par,header=0, usecols='F',nrows=26)\ + ['nom du champ'].tolist() + +lst_prop = ['Acquis par le CEN ou CREN','ACQUIS&CONVENTION', 'PARTIELLEMENT MAITRISE','ACQUIS PAR LE CEN EN BND','ACQUIS PAR LE CEN ISERE'] +lst_conv = ["CONVENTION D'USAGE",'CONVENTION'] +lst_gere = ['BAIL EMPHYTEOTIQUE', *lst_conv, *lst_prop] + +DICT_COLS = { + 'id' : 'id_site_cen_parc', + 'par_id' : 'num_parcelle', + 'section': 'num_section', + 'code_dep':'insee_dep', + 'code_com':'insee_com', + +} + +def update_idproprietaire(df): + if 'id_proprietaire' in df.columns : + df['id_proprietaire'] = 0 + + df.loc[df.classif_prop.str.contains('CEN RA'),'id_proprietaire'] = 28 + df.loc[df.classif_prop.str.contains('CEN ISERE'),'id_proprietaire'] = 14 + df.id_proprietaire.fillna(0,inplace=True) + + return df + + + +def update_typeprop(df): + + DICT = { + 'Etat':'2_1_PUB', + 'CEN RA - BND':'3_CEN', + 'CEN RA':'3_CEN', + 'CEN ISERE - BND':'3_CEN', + 'Privé et entreprises':'1_PRI', + 'Communes et comcom':'2_4_COM', + 'Autres public et assimiliés (syndicats)':'2_0_MIX', + 'Département':'2_3_PUB', + 'Autres asso et fédérations':'2_6_PRI', + 'FRUP':'3_CEN', + } + + df['type_prop'] = df['classif_prop'].copy() + df['type_prop'].replace(DICT, inplace=True) + + df.loc[ + (df.classif_prop=='Privé et entreprises') & + (df.gtoper.str.contains('2')), + 'type_prop'] = '2_0_MIX' + df.loc[ + df.ccogrm == 9, + 'type_prop'] = '2_9_PUB' + + return df + + +def update_codemfu(df): + + df.loc[df.maitrise.isin(lst_prop),'code_mfu1'] = 'P' + df.loc[df.maitrise.isin(lst_conv),'code_mfu1'] = 'C' + df.loc[df.maitrise == 'BAIL EMPHYTEOTIQUE','code_mfu1'] = 'L' + df.loc[df.maitrise.isin(lst_prop),'code_mfu2'] = 'P1' + df.loc[df.maitrise.isin(lst_conv),'code_mfu2'] = 'C7' + df.loc[df.maitrise == 'BAIL EMPHYTEOTIQUE','code_mfu2'] = 'L1' + + df.loc[df.maitrise == 'ACCORD VERBAL','code_mfu1'] = 'C' + df.loc[df.maitrise == 'ACCORD VERBAL','code_mfu2'] = 'C17' + + df.loc[df.maitrise == 'CONVENTION','code_mfu1'] = 'C' + df.loc[df.maitrise == 'CONVENTION','code_mfu2'] = 'C7' + + df.loc[df.indivision=='USUFRUIT','code_mfu2'] = 'P2' + + return df + + +def update_bnd(df): + + df.loc[df.maitrise == 'PARTIELLEMENT MAITRISE','bnd'] = 1 + df.loc[df.maitrise != 'PARTIELLEMENT MAITRISE','bnd'] = 0 + df.loc[df.maitrise == 'ACQUIS&CONVENTION','bnd'] = 1 + + return df + + +def get_parc_milli(): + + sqlsit = ''' + SELECT * FROM %s.%s + WHERE (date_fin is NULL OR date_fin >= '%i-01-01') AND type_site='MILI' ;'''%('sites','c_sites_zonages',annee) + df1 = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con_bdcen) + + sql0 = '''SELECT + t1.geo_parcelle, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, + t1.annee, t1.update_dat, t4.ccopre, t4.ccosec, t4.dcntpa, t1.geom, STRING_AGG(t3.gtoper::text,',') gtoper, STRING_AGG(t3.ccogrm::text,',') ccogrm, + STRING_AGG(CONCAT(t3.gtoper::text||COALESCE('_'||t3.ccogrm::text,'')),',') type_prop, STRING_AGG(t3.ddenom::text,',') ddenom + FROM "{sch}"."{t1}" t1 + LEFT JOIN ("{sch}".parcelle_info t2 + LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal)) + USING (geo_parcelle) + LEFT JOIN "{sch}".parcelle t4 ON t1.geo_parcelle = t4.parcelle + '''.format( + sch='38_%i07'%(annee-1), + t1='v_geo_parcelle') + sql1 = ''' + WHERE ST_INTERSECTS(t1.geom,'SRID={epsg};{poly}') + GROUP BY 1,2,3,4,5,6,7,8 + '''.format(epsg=2154,poly=df1.unary_union.buffer(-1)) + sql = sql0 + sql1 + df2 = gpd.read_postgis(sql,con_cad) + df2['code_dep'] = df2.par_id.str[:2] + df2['code_com'] = df2.par_id.str[2:5] + df2['code_mfu1'] = 'C' + df2['code_mfu2'] = 'C12' + df2['section'] = df2.ccosec.copy() + df2['id_site_cen_parc'] = 0 + df2['bnd'] = 0 + df2['mesure_compens'] = 0 + df2['pour_part'] = 0 + + return df2 + + +def update_mc(df): + + sqlsit = ''' + SELECT * FROM %s.%s + WHERE (date_fin is NULL OR date_fin >= '%i-01-01') AND type_site='MC' ;'''%('sites','c_sites_zonages',annee) + df1 = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con_bdcen) + + is_mc = df.intersects(df1.unary_union) + df['mesure_compens'] = 0 + df.loc[ + (is_mc & df.maitrise.isin(lst_conv) + )|(df.Parc_MCE=='OUI'), + 'mesure_compens' + ] = 1 + + return df + + +def update_pourpart(df): + + df['pour_part'] = 0 + df.loc[ + df.maitrise=='PARTIELLEMENT MAITRISE', + 'pour_part' + ] = 1 + + return df + + +def is_rnx(df): + + sqlsit = ''' + SELECT * FROM %s.%s + WHERE (date_fin is NULL OR date_fin >= '%i-01-01') AND type_site='RNN' ;'''%('sites','c_sites_zonages',annee) + df1 = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con_bdcen) + + isrnx = df.intersects(df1.unary_union.buffer(-1)) + df['parc_gestion_rnx'] = 0 + df.loc[isrnx,'parc_gestion_rnx'] = 1 + + return df + + +def update_datemajparcelle(df,cad): + + tmp = df.merge( + parcx[['par_id','update_dat']], + how='left', + left_on='num_parcelle', + right_on='par_id') + tmp.rename(columns={'update_dat':'date_maj_parcelle'},inplace=True) + + return tmp + + +def modif_FRUP2023(df): + + df = df[ + ~df.commentair.str.contains( + # 'sign&2023|acq&2023', + r'sign.*2023|acq.*2023', + na=False, + case=False, + regex=True)] + is_frup = df.maitrise=='FRUP' + df.loc[is_frup,'classif_prop'] = 'CEN ISERE' + df.loc[is_frup,'maitrise'] = 'ACQUIS PAR LE CEN ISERE' + + return df + + +def complete_tab(df): + + df['id_site_cen_parc'] = df['id_site_cen_parc'].astype(int) + df['id_site_fcen_parc'] = None + df['bnd'] = df['bnd'].astype(int) + df.loc[df.type_prop.str.contains(','),'type_prop'] = '2_0' + + df['surf_ore_m2'] = 0 + df['date_debut_ore'] = None + df['date_fin_ore'] = None + df['doc_foncier'] = 1 + df['source_doc_foncier'] = 0 + df['source_geom_parc_nature'] = 2 + df['source_geom_parc_annee'] = annee - 1 + df['echelle_num_parc'] = 0 + df['source_surf_parc'] = 1 + df = update_datemajparcelle(df,parcx) + df['domaine_public'] = 0 + + return df + + + +if __name__ == "__main__": + + shplastsend = gpd.read_file( + join(PATH.format(login=getlogin(),an=annee-1),'ENVOIE','Parcelles_CEN_38_2022.shp') + ) + lastsend = pd.read_csv(PATH.format(login=getlogin(),an=annee-1)+'ENVOIE/Parcelles_CEN_38_2022.csv') + + sql = ''' + SELECT * FROM foncier.c_anim_foncier_V2 + WHERE maitrise not in ('Acquis par le CDI','Acquis par la commune ou Comcom','CONVENTION DE PASSAGE') AND maitrise is not NULL + ; ''' + dfparc = gpd.read_postgis(sql,con_bdcen) + + if annee == 2023: + dfparc = modif_FRUP2023(dfparc) + + dfparc = update_codemfu(dfparc) + dfparc = update_bnd(dfparc) + dfparc = update_pourpart(dfparc) + dfparc = update_mc(dfparc) + dfparc = is_rnx(dfparc) + dfparc = update_idproprietaire(dfparc) + dfparc = update_typeprop(dfparc) + + + dfparc['surf_parc_maitrise_m2'] = dfparc.area + + # COLUMNS : + # Index(['par_id', 'geom', 'dcntpa', 'typprop_id', 'ccopre', 'section', 'dnupla', + # 'nom_com', 'numero', 'code_dep', 'code_com', 'gtoper', 'ccogrm', + # 'type_prop', 'nom_proprio', 'id', 'Parc_MCE', 'docs_off_num', + # 'Nbr_lots_BND', 'partiel_conv_ou_acq', 'code_mfu1', 'code_mfu2', 'bnd', + # 'mesure_compens', 'pour_part', 'par_id_v2', 'maitrise', 'date_der_c', + # 'indivision', 'contact', 'avis_propr', 'commentair', 'nom_proprio_old', + # 'site_id', 'type_zone', 'last_upd_parc', 'classif_prop', 'date_debut', + # 'date_fin'], + # dtype='object') + + # + tmp = dfparc.rename(columns=DICT_COLS).columns.isin(lst_cols_parc) + lst_col = dfparc.rename(columns=DICT_COLS).columns[tmp] + parc = dfparc.rename(columns=DICT_COLS)[lst_col].copy() + shp_parc = dfparc[['par_id','geom']].rename(columns=DICT_COLS).copy() + + + sql0 = '''SELECT + t1.geo_parcelle, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, + t1.annee, t1.update_dat, t4.ccopre, t4.ccosec, t4.dcntpa, t1.geom, STRING_AGG(t3.gtoper::text,',') gtoper, STRING_AGG(t3.ccogrm::text,',') ccogrm, + STRING_AGG(CONCAT(t3.gtoper::text||COALESCE('_'||t3.ccogrm::text,'')),',') type_prop, STRING_AGG(t3.ddenom::text,',') ddenom + FROM "{sch}"."{t1}" t1 + LEFT JOIN ("{sch}".parcelle_info t2 + LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal)) + USING (geo_parcelle) + LEFT JOIN "{sch}".parcelle t4 ON t1.geo_parcelle = t4.parcelle + '''.format( + sch='38_202207', + t1='v_geo_parcelle') + sql1 = '''WHERE substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) IN ('%s') + GROUP BY 1,2,3,4,5,6,7,8'''%"','".join(dfparc.par_id.tolist()) + sql = sql0 + sql1 + + parcx = gpd.read_postgis(sql,con=con_cad) + noparcx = dfparc[~dfparc.par_id.isin(parcx.par_id)] + + newparc = parc[~parc.num_parcelle.isin(lastsend.num_parcelle)].copy() + shp_newparc = shp_parc[~shp_parc.num_parcelle.isin(lastsend.num_parcelle)].copy() + newparcx = parcx[parcx.par_id.isin(shp_newparc.num_parcelle)].copy() + shp_newparc = shp_newparc.merge(newparcx[['par_id','update_dat']],how='left',left_on='num_parcelle',right_on='par_id')\ + .drop(columns=['par_id'])\ + .rename(columns={ + 'num_parcelle':'num_parc', + 'update_dat':'date_maj_p' + })\ + .rename_geometry('geometry') + + + newparc = complete_tab(newparc) + parc = complete_tab(parc) + # mili = get_parc_milli() + + PARC = pd.concat([lastsend,newparc]) + PARC.id_site_cen_parc = PARC.id_site_cen_parc.astype(int) + + + # Mise au format des dates + date_cols = PARC.columns[PARC.columns.str.contains('date')] + for c in date_cols: + PARC[c] = pd.to_datetime(PARC[c],yearfirst=True).dt.strftime('%Y/%m/%d') + # site2023[c] = site2023[c].dt.strftime('%Y/%m/%d') + + + + + SHP_PARC = gpd.GeoDataFrame( + pd.concat([shplastsend,shp_newparc]), + geometry='geometry', + crs=2154 + ) + SHP_PARC['date_maj_p'] = pd.to_datetime(SHP_PARC['date_maj_p'],yearfirst=True).dt.strftime('%Y/%m/%d') + SHP_PARC.geometry = SHP_PARC.buffer(0) + + PARC.to_csv( + join( + PATH.format(login=getlogin(),an=annee), + 'Recueil_data', + 'parcelles_cen_38_2023.csv'), + index=False) + SHP_PARC.to_file( + join( + PATH.format(login=getlogin(),an=annee), + 'Recueil_data', + 'parcelles_cen_38_2023.shp'), + index=False) + + diff --git a/0_FONCIER/TdB_FEDE/sites_TdB_fede.py b/0_FONCIER/TdB_FEDE/sites_TdB_fede.py new file mode 100644 index 0000000..b8f2ab7 --- /dev/null +++ b/0_FONCIER/TdB_FEDE/sites_TdB_fede.py @@ -0,0 +1,389 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +from os import getlogin,path,chdir +import sys +import pandas as pd +import geopandas as gpd +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +from pycen import con_bdcen as con, con_fon, wfs + +# NOT WORK +# SCRIPT_DIR = path.dirname(path.abspath(__file__)) +# sys.path.append(path.dirname(SCRIPT_DIR)) +# from . import get_siteCen_fede + + + +annee = 2023 +term_parcelle_conv = ['CONVENTION',"CONVENTION D'USAGE",'ACCORD VERBAL'] +path_agri = '/media/colas/SRV/FICHIERS/OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/TRAVAUX/Couches de référence/' +agri_partenaire_surf = gpd.read_file(path_agri+'AGRIS_partenaires_surface exploitées.shp') +agri_partenaire = gpd.read_file(path_agri+'AGRIS_partenaires.shp') +id_rnx = ['RNGL','RNIP'] + + +MAJ_COLNAME = { + 'doc_gestion_annee_ini':'doc_gestion_date_ini', + 'doc_gestion_annne_maj':'doc_gestion_date_maj', + 'source_geom_site_annee':'source_geom_site_date', + '0':'surf_carto_habitat_m2', + 'F':'geol_site_inpn', +} + + +def drop_specialchar(obj): + return obj.replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + # r'[ ]':"", + r'[–]':"-" + },regex=True) + +def get_site(): + sqlsit = '''SELECT * FROM %s.%s'''%('sites','c_sites_zonages') + return gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con) + + + +def get_siteCen_fede(an=None): + sqlsit = '''SELECT * FROM %s.%s'''%('sites','c_sites_zonages') + if an : + sqlsit += ''' WHERE date_fin is NULL OR date_fin >= '%i-01-01' '''%an + + sqlsit += ' order by code_site' + dfsite = gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con) + + # Isolation de tous les sites CEN (gestion/assist/mili) + dup = dfsite.code_site.duplicated(keep=False) + typzon = dfsite.type_zonage.isin(['ZI','DO','DH']) + return dfsite[ + ~dup | (dup & typzon) + ] + + +def get_parcelleCen_fede(an=None): + sqlsit = '''SELECT * FROM %s.%s'''%('foncier','c_anim_foncier_v2') + if an : + sqlsit += ''' + WHERE maitrise IN ( + 'ACCORD VERBAL','ACQUI PAR LE CEN EN BND','ACQUI PAR LE CEN ISERE', + 'ACQUIS PAR LE CEN ISERE','ACQUIS&CONVENTION','Acquis par le CEN ou CREN', + 'BAIL EMPHYTEOTIQUE','CONVENTION','CONVENTION D''USAGE','FRUP') + AND (commentair not ilike '%%{an}%%' OR commentair IS NULL)'''.format(an=an) + + return gpd.read_postgis(sql=sqlsit,geom_col='geom',con=con) + + +def update_date_site2022(df): + df['date_crea_site'].replace({ + '01/012005': '01/01/2005', + '01/01/98': '01/01/1998', + '01/01/03': '01/01/2003', + '09.05.2001': '09/05/2001', + '03/1986': '01/03/1986', + '01/01/94': '01/01/1994', + }, inplace=True) + df['date_crea_site'] = pd.to_datetime( + df['date_crea_site'], + # format='%Y/%m/%d', + yearfirst=True + ) + return df + + +def update_date(serie): + DICT = { + '024/04/2019':'24/04/2019' + } + return pd.to_datetime( + serie.replace(DICT), + # format='%Y/%m/%d', + yearfirst=True + ) + + +if __name__ == "__main__": + + + path_source = '/media/{login}/SRV/FICHIERS/OUTILS/BASES DE DONNEES/BILAN_FEDE_CEN/{an}/'.format(login=getlogin(),an=annee) + path_an_precedent = '/media/{login}/SRV/FICHIERS/OUTILS/BASES DE DONNEES/BILAN_FEDE_CEN/{an}/ENVOIE/'.format(login=getlogin(),an=annee-1) + tdb_sites_csv = 'Sites_CEN_38_%i.csv' + receuil_path = 'Recueil_data/' + receuil_datasite = 'TBD - receuil data SITES.xlsx' + shp_cnpe = 'primtresitecnpe/perimetre_site_CNPE_2019.shp' + + # path_source = '/media/{login}/SRV/FICHIERS/OUTILS/BASES DE DONNEES/BILAN_FEDE_CEN/{an}/Docs_SIG_joints{an}/'.format(login=getlogin(),an=annee) + dico_data = 'Docs_SIG_joints{an}/Dico_DATA_sites_CEN_v{an}_v1.xlsx'.format(an=annee) + lv_typmilie = pd.read_excel(path_source+dico_data,sheet_name='listes_valeurs',header=1, usecols='M:N',nrows=16) + lv_typmilie.columns = ['code_type_milieu','libelle_type_milieu'] + + dfsite = get_siteCen_fede(annee-1)\ + .sort_values('code_site')\ + .reset_index() + dfsite.loc[(dfsite.code_site=='N2IP') & (dfsite.type_zonage=='DO'),'code_site'] = 'N2IP_DO' + dfsite.loc[(dfsite.code_site=='N2IP') & (dfsite.type_zonage=='DH'),'code_site'] = 'N2IP_DH' + dfparc = get_parcelleCen_fede(annee) + + sitefcen = dfsite[dfsite.intersects(dfparc.unary_union)] + + parconv = dfparc.maitrise.isin(term_parcelle_conv) + site_rnx = dfsite[dfsite.code_site.isin(id_rnx)].copy() + + n2000_zps = wfs.get_wfs( + url = "https://ws.carmencarto.fr/WMS/119/fxx_inpn?", + layer = "Zones_de_protection_speciale", + bbox = dfsite.unary_union ) + # n2000_zps = wfs.get_wfs(url="https://ws.carmencarto.fr/WMS/119/fxx_inpn?",layer="Zones_de_protection_speciale") + n2000_zsc = wfs.get_wfs( + url="https://ws.carmencarto.fr/WMS/119/fxx_inpn?", + layer="Sites_d_importance_communautaire_JOUE__ZSC_SIC_", + bbox=dfsite.unary_union ) + + + site2022 = pd.read_csv(path_an_precedent+tdb_sites_csv % (annee-1))\ + .rename(columns=MAJ_COLNAME)\ + .replace({'id_site_cen':{'COMB':'COCO'}}) + + site2022 = update_date_site2022(site2022) + site2023 = site2022[site2022.id_site_cen.isin(dfsite.code_site)]\ + .reset_index(drop=True) + recueil_site = pd.read_excel(path_source+receuil_path+receuil_datasite,header=0,skiprows=[1])\ + .replace({'Null':None}) + recueil_site['doc_gestion_date_ini'] = update_date(recueil_site['doc_gestion_date_ini']) + recueil_site['doc_gestion_date_maj'] = update_date(recueil_site['doc_gestion_date_maj']) + recueil_site['doc_gestion_date_fin'] = update_date(recueil_site['doc_gestion_date_fin']) + + + test_add = ~recueil_site.id_site.isin(site2023.id_site_cen) + add_site = recueil_site[test_add].rename(columns={ + 'id_site':'id_site_cen' + }) + site2023 = pd.concat([site2023,add_site]) + + # site_exploit_agri = dfsite.intersects(agri_partenaire.unary_union) + site_exploit_agri_surf = dfsite.intersects(agri_partenaire_surf.unary_union) + + + # Test d'identification des nouveaux sites + sitenew = ~dfsite.code_site.isin(site2022.id_site_cen) + if sitenew.any(): + print('WARNING : de nouveaux sites sont apparus cette année :\n\t%s'%str(tuple(dfsite[sitenew].code_site))) + intersect_acqu = dfsite[sitenew].intersects(dfparc[~parconv].unary_union) + intersect_conv = dfsite[sitenew].intersects(dfparc[parconv].unary_union) + if intersect_acqu.any(): + print(' Le(s) nouveau(x) site(s) intersect(ent) des parcelles acquises : \n\t%s'%str(tuple(dfsite[sitenew&intersect_acqu].code_site))) + + else: + print(' Le(s) nouveau(x) site(s) ne contien(nen)t pas de parcelles acquises ...') + if intersect_conv.any(): + print(' Le(s) nouveau(x) site(s) intersect(ent) des parcelles conventionnées : \n\t%s'%str(tuple(dfsite[sitenew&intersect_conv].code_site))) + else: + print(' Le(s) nouveau(x) site(s) ne contien(nen)t pas de parcelles conventionnées ...') + dfsite = dfsite[~sitenew].copy()\ + .reset_index() + else : + print("Pas de nouveaux sites pour l'année %i."%annee) + + site2023.sort_values('id_site_cen',inplace=True) + recueil_site.sort_values('id_site',inplace=True) + dfsite.sort_values('code_site',inplace=True) + + + # nom_site + site2023['nom_site'] = [ + dfsite[dfsite.code_site==x].nom_site.values[0] + if x in [*dfsite.code_site] else None + for x in site2023.id_site_cen + ] + site2023['nom_site'] = drop_specialchar(site2023['nom_site'].str.lower()) + site2023['nom_site'] = site2023['nom_site'].str.upper() + # site_rnx_surface_m2 + site2023.site_rnx_surface_m2 = dfsite\ + .intersection(site_rnx.unary_union)\ + .area.astype(int) + # ens (A FAIRE) + site2023['ens'] = site2023['ens'].fillna(1).astype(int) + # site_cdl + site2023.site_cdl = 0 + # n2000_directive + site2023 = site2023.merge(dfsite[['code_site','geom']],how='left',left_on='id_site_cen',right_on='code_site')\ + .drop(columns=['code_site']) + cnpe = gpd.read_file(path_source+receuil_path+shp_cnpe) + # site2023.loc[site2023.id_site_cen=='CNPE','geom'] = cnpe.iloc[0].geometry + site2023 = gpd.GeoDataFrame(site2023,geometry='geom',crs=2154) + + site2023['is_zps'] = site2023.intersects(n2000_zps.unary_union) + site2023['is_zsc'] = site2023.intersects(n2000_zsc.unary_union) + site2023['n2000'] = site2023[['is_zps','is_zsc']].sum(axis=1) + site2023.loc[site2023.n2000==2,'n2000_directive'] = 'ZPS_ZSC' + site2023.loc[site2023.n2000==0,'n2000_directive'] = '0' + site2023.loc[site2023.is_zps & (site2023.n2000!=2),'n2000_directive'] = 'ZPS' + site2023.loc[site2023.is_zsc & (site2023.n2000!=2),'n2000_directive'] = 'ZSC' + site2023.drop(columns=['is_zps','is_zsc','n2000'],inplace=True) + # n2000_surface_m2 + site2023['n2000_surface_m2'] = site2023\ + .intersection(pd.concat([n2000_zps,n2000_zsc]).unary_union)\ + .area.astype(int) + # terrain_militaire + site2023['terrain_militaire'] = (dfsite.type_site == 'MILI').astype(int) + # site_marin + site2023['site_marin'] = 0 + # nb_contrat_agri + site2023['nb_contrat_agri'] = site2023['nb_contrat_agri'].fillna(0) + # nb_agri + site2023['nb_agri'] = site2023['nb_agri']\ + .replace({'?':0})\ + .fillna(0)\ + .astype(int) + # surf_contra_m2 + site2023['surf_contra_m2'] = site2023['surf_contra_m2'].fillna(0) + + # code_milieu_princ + dict_milieu = dict(zip(lv_typmilie.libelle_type_milieu,lv_typmilie.code_type_milieu)) + dfsite['milieux'].replace({'Gîtes à chiroptères et milieux souterrains':'Gîtes à chiroptères'}, inplace=True) + dfsite['code_milieu'] = dfsite.milieux.str.lower().replace(dict_milieu) + site2023['code_milieu_princ'] = [ + dfsite[dfsite.code_site==x].code_milieu.values[0] if x in [*dfsite.code_site] else None for x in site2023.id_site_cen + ] + + # nature_site_inpn + site2023['nature_site_inpn'] = 'N' + # geol_site_inpn + site2023['geol_site_inpn'] = 'N' + # code_geol + site2023['code_geol'] = 21 + # carto_habitats + site2023['carto_habitats'] = site2023['carto_habitats'].fillna(0) + # typo_carto_habitat + site2023['typo_carto_habitat'] = site2023['typo_carto_habitat'].fillna(0) + # surf_carto_habitat_m2 + site2023['surf_carto_habitat_m2'] = site2023['surf_carto_habitat_m2'].fillna(0) + # date_crea_site + datemiss = site2023['date_crea_site'].isnull() + for s in site2023[datemiss].id_site_cen: + if s not in dfsite.code_site : pass + + site2023.loc[site2023.id_site_cen==s,'date_crea_site'] = dfsite[dfsite.code_site==s].date_ajout + site2023['date_crea_site'] = pd.to_datetime(site2023['date_crea_site'],yearfirst=True, format='%Y/%m/%d') + # date_maj_site + site2023['date_maj_site'] = dfsite.date_maj.copy() + # nature_perimetre + site2023['nature_perimetre'] = site2023['nature_perimetre'].fillna(0) + # source_geom_site_nature + site2023['source_geom_site_nature'] = site2023['source_geom_site_nature'].fillna(0) + # source_geom_site_date + site2023['source_geom_site_date'] + # echelle_num_site + site2023['echelle_num_site'] = site2023['echelle_num_site'].fillna(0) + # precision_num_site + site2023['precision_num_site'] = site2023['precision_num_site'].fillna('NE') + # gestionnaire_site + gestmiss = site2023['gestionnaire_site'].isna() + site2023.loc[gestmiss,'gestionnaire_site'] = 'CEN Isère' + # operateur + opmiss = site2023['operateur'].isna() + site2023.loc[opmiss,'operateur'] = 'FCEN' + # surf_libre_evolution_m2 + site2023['surf_libre_evolution_m2'] = site2023['surf_libre_evolution_m2'].fillna(0) + # doc_gestion_presence + site2023['doc_gestion_presence'] = site2023['doc_gestion_presence'].fillna(0) + # doc_gestion_nom + site2023['doc_gestion_nom'] + # doc_gestion_evaluation + site2023['doc_gestion_evaluation'] + # doc_gestion_date_ini + # site2023['doc_gestion_date_ini'] = pd.to_datetime(site2023['doc_gestion_date_ini'],yearfirst=True) + site2023['doc_gestion_date_ini'] = update_date(site2023['doc_gestion_date_ini']) + # doc_gestion_date_maj + # site2023['doc_gestion_date_maj'] = pd.to_datetime(site2023['doc_gestion_date_maj'],yearfirst=True) + site2023['doc_gestion_date_maj'] = update_date(site2023['doc_gestion_date_maj']) + # doc_gestion_date_fin + # site2023['doc_gestion_date_fin'] = pd.to_datetime(site2023['doc_gestion_date_fin'],yearfirst=True) + site2023['doc_gestion_date_fin'] = update_date(site2023['doc_gestion_date_fin']) + # surf_doc_gestion_m2 + site2023['surf_doc_gestion_m2'] = site2023['surf_doc_gestion_m2'].fillna(0) + # url_fiche_inpn + site2023['url_fiche_inpn'] + # url_fiche_cen + site2023['url_fiche_cen'] + # doc_justif_admin + site2023['doc_justif_admin'] + # ouverture_public + site2023['ouverture_public'] + # description_site + site2023['description_site'] + # url_site_photo + site2023['url_site_photo'] + # sensibilite + site2023['sensibilite'] + # remq_sensibilite + site2023['remq_sensibilite'] + + site2023.sort_values('id_site_cen',inplace=True) + recueil_site.sort_values('id_site',inplace=True) + + for c in recueil_site.columns: + if c not in site2023.columns: pass + + notna = recueil_site[c].notna() + lstsite = recueil_site[notna].id_site.tolist() + s_2023 = site2023.id_site_cen.isin(lstsite) + r_2023 = recueil_site.id_site.isin(lstsite) + site2023.loc[s_2023,c] = recueil_site[r_2023][c] + + # test_add = ~recueil_site.id_site.isin(site2023.id_site_cen) + # add_site = recueil_site[test_add].rename(columns={ + # 'id_site':'id_site_cen' + # }) + # SITE2023 = pd.concat([site2023,recueil_site[test_add]]) + + for c in site2023.columns: + if site2023[c].dtype==float: + try: + site2023[c] = site2023[c].astype(int) + except: + pass + + # Mise au format des dates + date_cols = site2023.columns[site2023.columns.str.contains('date')] + for c in date_cols: + site2023[c] = pd.to_datetime(site2023[c],yearfirst=True).dt.strftime('%Y/%m/%d') + # site2023[c] = site2023[c].dt.strftime('%Y/%m/%d') + + + + shp2023 = site2023[['id_site_cen','id_site_fcen','date_crea_site','date_maj_site','geom']].copy() + shp2023['date_maj_s'] = [ + shp2023.date_maj_site[i] if pd.notna(shp2023.date_maj_site[i]) else shp2023.date_crea_site[i] for i in shp2023.index + ] + shp2023.drop(columns=['date_crea_site','date_maj_site'],inplace=True) + shp2023.rename(columns={'id_site_fcen':'id_fcen'},inplace=True) + + # Ecriture des fichiers finaux + site2023.drop(columns=['geom','id_site','gestionnaire']).to_csv(path_source+receuil_path+tdb_sites_csv%annee,index=False) + shp2023.to_file( + (path_source+receuil_path+tdb_sites_csv[:-3]+'shp')%annee, + index=False) +# from pycen import update_to_sql, +# site = get_site() +# darp = site.code_site.isin(['DARN','PEYR']) +# zi = site.type_zonage=='ZI' +# zo = site.type_zonage=='ZO' +# DARP = gpd.GeoDataFrame({ +# 'code_site':['DARP','DARP'], +# 'type_zonage': ['ZI','ZO'], +# }, +# geometry=[ +# site[darp&zi].unary_union,site[darp&zo].unary_union +# ],crs=2154)\ +# .rename_geometry('geom') + + +# DARP['surface_ha'] = round(DARP.area / 10000,2) +# update_to_sql(cnpe,con,'c_sites_zonages','sites','code_site') \ No newline at end of file diff --git a/0_FONCIER/add_c_anim_foncier_v2.py b/0_FONCIER/add_c_anim_foncier_v2.py new file mode 100644 index 0000000..98082e4 --- /dev/null +++ b/0_FONCIER/add_c_anim_foncier_v2.py @@ -0,0 +1,91 @@ +from pycen import con_bdcen,con_fon,update_to_sql +import geopandas as gpd +from datetime import datetime as dt + +# GET parcelles gérées à jour +sql = "SELECT * FROM sites.c_sites_zonages WHERE code_site = 'LEZE'" +sql = "SELECT * FROM foncier.c_anim_foncier_v2" +parc_site = gpd.read_postgis(sql,con_bdcen) +parc_site[(~parc_site.classif_prop.str.contains('cen',case=False))&(parc_site.maitrise.str.contains('acqui',case=False))].to_csv('/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/AB/verif_anim_foncier_v2.csv',index=False) +# parc_site.loc[parc_site.maitrise=='ACQUI PAR LE CEN ISERE','maitrise'] = 'ACQUIS PAR LE CEN ISERE' +# update_to_sql( +# parc_site[['par_id','maitrise']],con_bdcen,'c_anim_foncier_v2','foncier','par_id' +# ) + +# GET sites +sql = "SELECT * FROM sites.sites" +site_fon = gpd.read_postgis(sql,con_fon) +site_zi = site_fon[site_fon.site_id.str.endswith('ZI')] +site_zo = site_fon[site_fon.site_id.str.endswith('ZO')] + + +# GET cadastre from bd_cen +sql = ''' +WITH prop AS ( + SELECT + dnuper,gtoper,ccogrm,ddenom + FROM {sch}."proprios" + ORDER BY 2,3,4 +) +SELECT --DISTINCT ON (t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre,t1.ccosec,t1.dnupla) + t1.par_id, + t1.geom, + t1.dcntpa, + t1.typprop_id, + t1.ccopre, + t1.ccosec "section", + t1.dnupla::text, + c.nom nom_com, + substring(t1.par_id from 12 for 4) "numero", + substring(t1.codcom from 1 for 2) code_dep, + substring(t1.codcom from 3 for 3) code_com, + STRING_AGG(distinct t6.gtoper::text,',') gtoper, + STRING_AGG(distinct t6.ccogrm::text,',') ccogrm, + STRING_AGG(distinct CONCAT(TRIM(t6.gtoper::text)||COALESCE('_'||TRIM(t6.ccogrm::text),'')),',') type_prop, + --t6.dnuper, + STRING_AGG(distinct t6.ddenom::text,', ') nom_proprio +FROM {sch}."{t1}" t1 +JOIN ({sch}."lots" t2 + JOIN ({sch}."cadastre" t3 + JOIN ({sch}."cptprop" t4 + JOIN ({sch}."r_prop_cptprop" t5 + JOIN prop t6 USING (dnuper)) + USING (dnupro)) + USING (dnupro)) + USING (lot_id)) +USING (par_id) +JOIN administratif.communes c on t1.codcom = c.code_insee '''.format( + sch='cadastre', + t1='parcelles' +) + +sql_grp = '''GROUP BY 1,2,3,4,5,6,8''' + + +# Parcelles gestion +csv_parc = gpd.pd.read_csv('/home/colas/Documents/9_PROJETS/0_FONCIER/CEN38/LEZE_parcelles_a_rajouter_CSV.csv') +csv_parc['par_id'] = csv_parc.geo_parcel.str[:2]+csv_parc.geo_parcel.str[3:] +lst_parc = csv_parc.par_id.tolist() +sql_whr = "WHERE t1.par_id IN ('{par_ids}') ".format(par_ids="','".join(lst_parc)) +sql_gst = sql + sql_whr + sql_grp +parc = gpd.read_postgis(sql_gst,con_fon) +parc['last_upd_parc'] = dt.now().date().isoformat() +parc.loc[parc.type_prop.str.startswith(','),'type_prop'] = parc[parc.type_prop.str.startswith(',')].type_prop.str[1:] +parc['classif_prop'] = 'Privé et entreprises' +parc.loc[parc.type_prop=='1','classif_prop'] = 'Privé et entreprises' + +parc2 = parc.sjoin(site_fon[['site_id','geom']],how='left')\ + .replace({'site_id':{'_ZI|_ZO':''}},regex=True)\ + .drop(columns=['index_right'])\ + .drop_duplicates()\ + .reset_index(drop=True) + + +# Envoie bd-cen-38.foncier.c_anim_foncier_v2 +parc2.to_postgis( + name = 'c_anim_foncier_v2', + con = con_bdcen, + schema = 'foncier', + if_exists = 'append', + index=False +) diff --git a/0_FONCIER/config_insert_cadastre.py b/0_FONCIER/config_insert_cadastre.py new file mode 100644 index 0000000..3cefd7f --- /dev/null +++ b/0_FONCIER/config_insert_cadastre.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : config_insert_table.py +#Description : Insertion des données cadastrales à la base après de sa création. +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + + +DICT_TAB = [{ + 'table_in' : 'proprietaire', # Table source qui provient de la sortie du plugin cadastre de qgis + 'index_tab': 'proprietaire', # Pkey de la table source + 'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm', + 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'], + 'table_out': [{ + 'name': 'cptprop{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': False, # Supprime les champs vides à l'intérieure des chaines de carractères + 'columns_in': ['ccodep', 'ccocom', 'dnupro'], # Liste des columns à récupérer en entrée. + 'columns_add': {'dnupro': ['ccodep', 'ccocom', 'dnupro']}, # Définition des champs composés devant être ajoutés + 'unique': {'cols': ['dnupro'], 'keep': 'first'}, # Champs devant être uniques à l'intérieur de la table en sortie + 'dict': None, # Dictionnaire pour renommer les champs {'ancien_nom1': 'nouveau_nom1', 'ancien_nom2': 'nouveau_nom2', ...} + 'join': False + },{ + 'name': 'proprios{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'], + 'columns_add': {'dnuper': ['ccodep', 'dnuper']}, + 'unique': {'cols': ['dnuper'], 'keep': 'first'}, + 'dict': None, + 'join': False + },{ + 'name': 'r_prop_cptprop{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': False, + 'columns_in': ['ccodep', 'dnuper', 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'], + 'columns_add': { + 'dnuper': ['ccodep', 'dnuper'], + 'dnupro': ['ccodep', 'ccocom', 'dnupro']}, + 'unique': {'cols': ['dnupro', 'dnuper'], 'keep': 'first'}, + 'dict': None, + 'join': False + },] +},{ + 'table_in' : 'parcelle', + 'index_tab': 'parcelle', + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'dvoilib', 'type_filiation'], + 'table_out': [{ + 'name': 'vl{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccovoi', 'dvoilib'], + 'columns_add': { + 'vl_id': ['ccodep', 'ccocom', 'ccovoi'], + 'geom': None}, + 'unique': {'cols': ['vl_id'], 'keep': 'first'}, + 'dict': None, + 'join': False + },{ + 'name': 'parcelles{}'.format(dpt_nom_tab), + 'geom': { + 'table_geom_in': 'geo_parcelle', + 'index_geom': 'geo_parcelle' + }, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'type_filiation'], + 'columns_add': { + 'par_id': ['ccodep', 'ccocom', 'ccopre','ccosec', 'dnupla'], + 'codcom': ['ccodep', 'ccocom'], + 'vl_id': ['ccodep', 'ccocom', 'ccovoi'], + 'typprop_id': None }, + 'unique': False, + 'dict': {'type_filiation': 'type'}, + 'join': False + },{ + 'name': 'lots{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526) + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dcntpa'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], + 'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], + 'dnulot': None, }, + 'unique': False, + 'dict': {'dcntpa': 'dcntlo'}, + 'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(dpt_nom_tab), 'on': ['par_id'], 'type': 'distinct'}] + },] +},{ + 'table_in' : 'lots', + 'index_tab': 'lots', + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo'], + 'table_out': [{ + 'name': 'lots{}'.format(dpt_nom_tab), # !!!!!! parcelles avec lots: existe par_id NOT IN parcelles_73 + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],}, + 'unique': {'cols': ['lot_id'], 'keep': 'first'}, + 'dict': None, + 'join': False + },{ + 'name': 'lots_natcult{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],}, + 'unique': {'cols': ['lot_id'], 'keep': 'first'}, + 'dict': None, + 'join': [{ # ERROR ! 2 dclssf pour 1 lot_id + 'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], 'type': 'merge', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot','dsgrpf','cnatsp','dclssf','ccosub','dcntsf'], + }] + },] +}] diff --git a/0_FONCIER/create_c_anim_foncier_v2.py b/0_FONCIER/create_c_anim_foncier_v2.py new file mode 100644 index 0000000..a819e87 --- /dev/null +++ b/0_FONCIER/create_c_anim_foncier_v2.py @@ -0,0 +1,187 @@ +from pycen import con_bdcen,con_fon +import geopandas as gpd +from datetime import datetime as dt + +# GET parcelles gérées à jour +sql = "SELECT * FROM parcelles_foncier_tofede" +parc_site = gpd.read_postgis(sql,con_bdcen) +parc_smrg = parc_site[[ + 'par_id','id','Parc_MCE','docs_off_num', + 'Nbr_lots_BND','partiel_conv_ou_acq', + 'code_mfu1','code_mfu2','bnd','mesure_compens', + 'pour_part' +]] + +# GET couche animation fonciere +sql = """SELECT + c.*, + substring(c.num_unique_v2 from 1 for 5) || substring(c.num_unique_v2 from 13 for 3) || substring(c.num_unique_v2 from 6 for 6) par_id_v2 +FROM foncier.c_animation_fonciere_sites c""" +anim_fon = gpd.read_postgis(sql,con_bdcen) +anim_mrg = anim_fon[['par_id_v2', + 'id','maitrise','date_der_c','indivision', + 'contact','avis_propr','commentair','nom_propri' +]].rename( + columns={ + 'nom_propri':'nom_proprio_old' + } +) + +# GET sites +sql = "SELECT * FROM sites.sites" +site_fon = gpd.read_postgis(sql,con_fon) +site_zi = site_fon[site_fon.site_id.str.endswith('ZI')] +site_zo = site_fon[site_fon.site_id.str.endswith('ZO')] + +lst_pmili = parc_site[parc_site.id.isna()].par_id.tolist() + +# 3880,3884,3885,3883,3886,3859,3860,3856,3858,3854 +# par_id / lot_id +# '38222000ZB0013','38222000ZB0015','38222000ZB0016','38222000ZB0017','38222000ZB0023','38222000ZB0041','38222000ZB0042','38222000ZB0043','38222000ZB0058','38222000ZB0059' +# vl_id +# '3822200101','3822200126','3822200126','3822200126','3822200108','3822200106','3822200106','3822200106','3822200089','3822200089' + +# GET cadastre from bd_cen +sql = ''' +WITH prop AS ( + SELECT + dnuper,gtoper,ccogrm,ddenom + FROM {sch}."proprios" + ORDER BY 2,3,4 +) +SELECT --DISTINCT ON (t1.par_id,t1.geom, t1.dcntpa,t1.typprop_id,t1.ccopre,t1.ccosec,t1.dnupla) + t1.par_id, + t1.geom, + t1.dcntpa, + t1.typprop_id, + t1.ccopre, + t1.ccosec "section", + t1.dnupla::text, + c.nom nom_com, + substring(t1.par_id from 12 for 4) "numero", + substring(t1.codcom from 1 for 2) code_dep, + substring(t1.codcom from 3 for 3) code_com, + STRING_AGG(distinct t6.gtoper::text,',') gtoper, + STRING_AGG(distinct t6.ccogrm::text,',') ccogrm, + STRING_AGG(distinct CONCAT(TRIM(t6.gtoper::text)||COALESCE('_'||TRIM(t6.ccogrm::text),'')),',') type_prop, + --t6.dnuper, + STRING_AGG(distinct t6.ddenom::text,', ') nom_proprio +FROM {sch}."{t1}" t1 +JOIN ({sch}."lots" t2 + JOIN ({sch}."cadastre" t3 + JOIN ({sch}."cptprop" t4 + JOIN ({sch}."r_prop_cptprop" t5 + JOIN prop t6 USING (dnuper)) + USING (dnupro)) + USING (dnupro)) + USING (lot_id)) +USING (par_id) +JOIN administratif.communes c on t1.codcom = c.code_insee '''.format( + sch='cadastre', + t1='parcelles' +) + +sql_grp = '''GROUP BY 1,2,3,4,5,6,8''' + + +# Parcelles gestion +lst_parc = parc_site.par_id.tolist() +sql_whr = "WHERE t1.par_id IN ('{par_ids}') ".format(par_ids="','".join(lst_parc)) +sql_gst = sql + sql_whr + sql_grp +parc_gst = gpd.read_postgis(sql_gst,con_fon) +parc_gst_tmp = parc_gst.merge(parc_smrg,on='par_id') +parc_gst_2 = parc_gst_tmp.merge(anim_mrg,on='id') + + +# parcelles sites, NON PRESENTES dans parcelles gérées +geo = site_fon.unary_union +sql_whr = """ +WHERE ST_Intersects (t1.geom, 'SRID={epsg};{poly}') +AND t1.par_id NOT IN ('{par_ids}')""".format( + epsg = 2154, + poly = geo, + par_ids="','".join(lst_parc) +) +sql_veil = sql + sql_whr + sql_grp +parc_veil = gpd.read_postgis(sql_veil,con_fon) +parc_veil2 = parc_veil.merge(anim_mrg,left_on='par_id',right_on='par_id_v2',how='left') + + +# Parcelles Hors site +lst_id = [ + *parc_veil2.id.dropna().astype(int).tolist(), + *parc_gst_2.id.dropna().astype(int).tolist() +] +lst_parid_HS = anim_fon[~anim_fon.id.isin(lst_id)].par_id_v2.dropna().tolist() +sql_whr = "WHERE t1.par_id IN ('{par_ids}') ".format(par_ids="','".join(lst_parid_HS)) +sql_hs = sql + sql_whr + sql_grp +parc_hs = gpd.read_postgis(sql_hs,con_fon) +# parc_hs2 = parc_hs.merge(anim_mrg,left_on='par_id',right_on='par_id_v2') +parc_hs2 = parc_hs[parc_hs.is_valid].merge(anim_mrg,left_on='par_id',right_on='par_id_v2') +INVALID_parc_hs = parc_hs[~parc_hs.is_valid].merge(anim_mrg,left_on='par_id',right_on='par_id_v2') + +# parc_gst_2[parc_gst_2.type_prop.str.startswith(',')].iloc[:,:15] +# parc_veil2[parc_veil2.type_prop.str.startswith(',')].iloc[:,:15] +# parc_hs2[parc_hs2.type_prop.str.startswith(',')] + +# Merge parcelles +parc = gpd.pd.concat([parc_gst_2,parc_veil2,parc_hs2])\ + .reset_index(drop=True) +parc.loc[parc.type_prop.str.startswith(','),'type_prop'] = parc[parc.type_prop.str.startswith(',')].type_prop.str[1:] + +# Jointure id_site +parc2 = parc.sjoin(site_fon[['site_id','geom']],how='left')\ + .replace({'site_id':{'_ZI|_ZO':''}},regex=True)\ + .drop(columns=['index_right'])\ + .drop_duplicates()\ + .reset_index(drop=True) + + +# Drop manuel des doublons +parc_CRAS = ['381370000B0249','381370000B0241','381370000B0248','381370000B0250','381370000B0582'] +parc_MONA = ['383100000A0693'] +id_drop1 = parc2[parc2.par_id.isin(parc_CRAS) & (parc2.site_id != 'CRAS')].index +id_drop2 = parc2[parc2.par_id.isin(parc_MONA) & (parc2.site_id != 'MONA')].index +lst_id_drop = [*id_drop1,*id_drop2] +parc2.drop(lst_id_drop,inplace=True) +parc2.loc[parc2.par_id == '381370000B0566','site_id'] = 'CRAS' + + +# Jointure type_zone +parc2['zi'] = parc2.intersects(site_zi.unary_union) +parc2['zo'] = parc2.intersects(site_zo.unary_union) +parc2.loc[parc2.zi,'type_zone'] = 'ZI' +parc2.loc[parc2.zo & (parc2.type_zone != 'ZI'),'type_zone'] = 'ZO' +parc2.drop(columns = ['zi','zo'],inplace=True) + + +# PARC = gpd.pd.concat([parc2,parc_hs2]) + + +parc2['last_upd_parc'] = dt.now().date().isoformat() +# Envoie bd-cen-38.foncier.c_anim_foncier_v2 +parc2.to_postgis( + name = 'c_anim_foncier_v2', + con = con_bdcen, + schema = 'foncier', + if_exists = 'replace', + index=False +) + +INVALID_parc_hs.drop(columns='geom').to_sql( + name = 'c_anim_foncier_v2', + con = con_bdcen, + schema = 'foncier', + if_exists = 'append', + index=False +) + +sql = """ +ALTER TABLE foncier.c_anim_foncier_v2 ADD PRIMARY KEY (par_id); +GRANT ALL ON TABLE foncier.c_anim_foncier_v2 TO abavarot; +GRANT ALL ON TABLE foncier.c_anim_foncier_v2 TO grp_admin; +GRANT ALL ON TABLE foncier.c_anim_foncier_v2 TO cen_admin; +GRANT SELECT ON TABLE foncier.c_anim_foncier_v2 TO grp_consult; +""" +with con_bdcen.begin() as cnx: + cnx.execute(sql) diff --git a/0_FONCIER/create_foreign_zh.sql b/0_FONCIER/create_foreign_zh.sql new file mode 100644 index 0000000..e73275f --- /dev/null +++ b/0_FONCIER/create_foreign_zh.sql @@ -0,0 +1,126 @@ +from pycen import con_fon + +foreign_server = """ + CREATE SERVER fdw_azalee + FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname 'azalee', host '91.134.194.221', port '5432') +""" +user_mapping = """ + CREATE USER MAPPING + FOR cen_admin + SERVER fdw_azalee + OPTIONS (user 'cen_admin', password '#CEN38@venir') +""" +foreign_table = """ + -- DROP FOREIGN TABLE inventaires.inventaire_zh; + CREATE FOREIGN TABLE inventaires.inventaire_zh ( + site_code varchar(10) NULL, + nom varchar NULL, + auteur_site varchar NULL, + auteur_geom varchar NULL, + auteur_last_maj varchar NULL, + date_site date NULL, + date_geom date NULL, + date_last_maj date NULL, + type_milieu varchar NULL, + type_site varchar NULL, + typo_sdage varchar NULL, + rmq_site text NULL, + rmq_fct_majeur text NULL, + rmq_interet_patri text NULL, + rmq_bilan_menace text NULL, + rmq_orient_act text NULL, + rmq_usage_process text NULL, + code_cb varchar NULL, + lib_cb text NULL, + activ_hum varchar NULL, + impact varchar NULL, + "position" varchar NULL, + rmq_activ_hum text NULL, + connexion varchar NULL, + subm_orig varchar NULL, + subm_freq varchar NULL, + subm_etend varchar NULL, + fct_bio varchar NULL, + fct_hydro varchar NULL, + int_patri varchar NULL, + "val_socioEco" varchar NULL, + crit_delim varchar NULL, + crit_def_esp varchar NULL, + entree_eau_reg varchar NULL, + entree_eau_perm varchar NULL, + entree_eau_topo varchar NULL, + sortie_eau_reg varchar NULL, + sortie_eau_perm varchar NULL, + sortie_eau_topo varchar NULL, + geom public.geometry(geometry, 2154) NULL + ) + SERVER fdw_azalee + OPTIONS (schema_name 'zones_humides', table_name 'v_zoneshumides'); + + -- Permissions + ALTER TABLE inventaires.inventaire_zh OWNER TO cen_admin; + GRANT ALL ON TABLE inventaires.inventaire_zh TO cen_admin; +""" + +view_v_zoneshumides = """ + -- inventaires.v_zoneshumides source + CREATE OR REPLACE VIEW inventaires.v_zoneshumides + AS SELECT inventaire_zh.site_code, + inventaire_zh.nom, + inventaire_zh.auteur_site, + inventaire_zh.auteur_geom, + inventaire_zh.auteur_last_maj, + inventaire_zh.date_site, + inventaire_zh.date_geom, + inventaire_zh.date_last_maj, + inventaire_zh.type_milieu, + inventaire_zh.type_site, + inventaire_zh.typo_sdage, + inventaire_zh.rmq_site, + inventaire_zh.rmq_fct_majeur, + inventaire_zh.rmq_interet_patri, + inventaire_zh.rmq_bilan_menace, + inventaire_zh.rmq_orient_act, + inventaire_zh.rmq_usage_process, + inventaire_zh.code_cb, + inventaire_zh.lib_cb, + inventaire_zh.activ_hum, + inventaire_zh.impact, + inventaire_zh."position", + inventaire_zh.rmq_activ_hum, + CASE + WHEN inventaire_zh.rmq_activ_hum ~~ '%remblai%'::text THEN 't'::text + ELSE 'f'::text + END AS remblais, + inventaire_zh.connexion, + inventaire_zh.subm_orig, + inventaire_zh.subm_freq, + inventaire_zh.subm_etend, + inventaire_zh.fct_bio, + inventaire_zh.fct_hydro, + inventaire_zh.int_patri, + inventaire_zh."val_socioEco", + inventaire_zh.crit_delim, + inventaire_zh.crit_def_esp, + inventaire_zh.entree_eau_reg, + inventaire_zh.entree_eau_perm, + inventaire_zh.entree_eau_topo, + inventaire_zh.sortie_eau_reg, + inventaire_zh.sortie_eau_perm, + inventaire_zh.sortie_eau_topo, + inventaire_zh.geom + FROM inventaires.inventaire_zh; + + -- Permissions + ALTER TABLE inventaires.v_zoneshumides OWNER TO cen_admin; + GRANT ALL ON TABLE inventaires.v_zoneshumides TO cen_admin; + GRANT SELECT ON TABLE inventaires.v_zoneshumides TO grp_sig; + GRANT SELECT ON TABLE inventaires.v_zoneshumides TO cen_user; +""" + +with con_fon.begin() as cnx: + cnx.execute(foreign_server) + cnx.execute(user_mapping) + cnx.execute(foreign_table) + cnx.execute(view_v_zoneshumides) \ No newline at end of file diff --git a/0_FONCIER/cutfantoir_bydep.py b/0_FONCIER/cutfantoir_bydep.py new file mode 100644 index 0000000..0ae75fe --- /dev/null +++ b/0_FONCIER/cutfantoir_bydep.py @@ -0,0 +1,28 @@ + +import pandas as pd +import csv + +path = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/FONCIER/CADASTRE/2023/Fichier national FANTOIR (situation avril 2023)/' +fan = path+'FANTOIR0423' +lst_dep = ['07','26','38','42'] + +if __name__ == '__main__': + + df = pd.read_table(fan,chunksize=500000) + d07 = pd.DataFrame() + d26 = pd.DataFrame() + d38 = pd.DataFrame() + d42 = pd.DataFrame() + + for d in df: + d.columns=['tmp'] + d07 = pd.concat([d07,d[d.tmp.str.startswith('070')]]) + d26 = pd.concat([d26,d[d.tmp.str.startswith('260')]]) + d38 = pd.concat([d38,d[d.tmp.str.startswith('380')]]) + d42 = pd.concat([d42,d[d.tmp.str.startswith('420')]]) + + + d07.to_csv(path+'070.txt', header=None, index=None, quoting=csv.QUOTE_NONE) + d26.to_csv(path+'260.txt', header=None, index=None, quoting=csv.QUOTE_NONE) + d38.to_csv(path+'380.txt', header=None, index=None, quoting=csv.QUOTE_NONE) + d42.to_csv(path+'420.txt', header=None, index=None, quoting=csv.QUOTE_NONE) diff --git a/0_FONCIER/delete_foncier_acte.py b/0_FONCIER/delete_foncier_acte.py new file mode 100644 index 0000000..b12afaa --- /dev/null +++ b/0_FONCIER/delete_foncier_acte.py @@ -0,0 +1,15 @@ +from pycen import con_fon + +id_acte = 'MU_CHAR_ZI2' + +if id_acte.startswith('MF'): + lst_tab = ['r_mfstatut','r_frais_mf','r_cad_site_mf','mf_acquisitions'] + key = 'mf_id' +elif id_acte.startswith('MU'): + lst_tab = ['r_mustatut','r_frais_mu','r_cad_site_mu','mu_conventions'] + key = 'mu_id' + +sql = "DELETE FROM foncier.{tab} WHERE {key}='{acte_id}';" +for t in lst_tab: + with con_fon.begin() as cnx: + cnx.execute(sql.format(tab=t,key=key,acte_id=id_acte)) diff --git a/0_FONCIER/extract_cadastre.py b/0_FONCIER/extract_cadastre.py new file mode 100644 index 0000000..06890ef --- /dev/null +++ b/0_FONCIER/extract_cadastre.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : extract_cadastre.py +#Description : Extraction du cadastre en fonction du des coordonnées NO,NE,SO,SE +# ou d'un polygon (.shp). +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import pandas as pd +import geopandas as gpd +from sqlalchemy import create_engine +from sqlalchemy.engine import URL +from geoalchemy2 import Geometry +# from shapely.geometry.multipolygon import MultiPolygon +from shapely.geometry.polygon import Polygon +from pyproj import Transformer + + +# import contextily as ctx +# import matplotlib.pyplot as plt +def geom_map(geom): + map1 = ctx.providers.OpenTopoMap + map2 = ctx.providers.Esri.WorldImagery + maps = geom.to_crs(epsg=3857).copy() + ax = maps.plot(alpha=0.5, edgecolor='k') + ctx.add_basemap( + ax, + attribution_size=5, + reset_extent=False, + source=map1, + zoom='auto' + # zoom=14 + ) + ax.set_axis_off() + +output = '/home/colas/Documents/tmp/AUDE/cadastre_for_Aude.gpkg' +# Shape : chemin/nom.shp -- None +shp = None +# Coordonnées [[NO],[NE],[SE],[SO]] -- None +lat_point_list = [45.414249,45.414611,45.407813,45.407282] #,45.414249] +lon_point_list = [ 5.472428, 5.486879, 5.487019, 5.472701] #, 5.472428] +# bdd +bdd = True +code_site = 'CRAS' +type_zone = None + +# Parametres bdd +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +base = 'bd_cen' +epsg = 2154 +crs = 'EPSG:%s'%epsg + +# Connexion bdd +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) +con_open = con.connect() + +# Shape : chemin/nom.shp -- None +# shp = None +# Coordonnées [[NO],[NE],[SE],[SO]] -- None +# lat_point_list = [] +# lon_point_list = [] +if shp: + print('IMPORT du mask : %s'%shp) + polygon = gpd.read_file(shp) + if polygon.crs.srs != crs.lower(): + polygon.to_crs(epsg, inplace=True) +elif bdd: + from pycen import con_bdcen + gdf = gpd.read_postgis("SELECT * FROM sites.c_sites_zonages WHERE code_site = '%s' --and type_zonage = '%s'"%(code_site,type_zone),con_bdcen) + polygon = gdf[['geom']] + polygon.rename_geometry('geometry',inplace=True) +else: + if epsg != 2154: + from pyproj import Transformer + transformer = Transformer.from_crs(crs, "EPSG:2154", always_xy=True) + lon_point_list,lat_point_list = transformer.transform(lon_point_list,lat_point_list) + epsg = 2154 + crs = 'EPSG:%s'%epsg + polygon_geom = Polygon(zip(lon_point_list, lat_point_list)) + polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom]) + +sql = '''SELECT t1.par_id, t1.geom, t1.codcom, t4.libelle vl, + t1.ccopre, t1.ccosec, t1.dnupla, t1.dparpi, t5.typprop_lib,t5.typprop, t1.ccocomm, + t1.ccoprem, t1.ccosecm, t1.dnuplam, t1.type, + t8.ccodem, + --substring(t8.dnupro from 3) dnupro, + --substring(t9.dnuper from 3) dnuper, + t9.ddenom, t9.jdatnss, t9.dldnss, t9.dsglpm, t9.dlign3, t9.dlign4, + t9.dlign5, t9.dlign6, t9.dnatpr, t9.gtoper, t9.ccogrm + FROM cadastre.parcelles_38 t1 + LEFT JOIN cadastre.vl_38 t4 ON (t1.vl_id = t4.vl_id) + LEFT JOIN cadastre.d_typprop t5 ON (t1.typprop_id = t5.typprop_id) + LEFT JOIN cadastre.lots_38 t2 ON (t1.par_id = t2.par_id) + LEFT JOIN cadastre.lots_natcult_38 t3 ON (t2.lot_id = t3.lot_id) + LEFT JOIN cadastre.cadastre_38 t6 ON (t2.lot_id = t6.lot_id) + LEFT JOIN cadastre.cptprop_38 t7 ON (t6.dnupro = t7.dnupro) + LEFT JOIN cadastre.r_prop_cptprop_38 t8 ON (t7.dnupro = t8.dnupro) + LEFT JOIN cadastre.proprios_38 t9 ON (t8.dnuper = t9.dnuper) + WHERE ST_Intersects (t1.geom, 'SRID={epsg};{poly}');'''.format(epsg=epsg,poly=polygon.geometry[0]) +df = gpd.read_postgis( + sql=sql, + con=con, +) +for col in df.columns: + if (df[col].isna()).all(): + del df[col] + +df.to_file(output,driver='GPKG') diff --git a/0_FONCIER/foncier_call_fonctions.py b/0_FONCIER/foncier_call_fonctions.py new file mode 100755 index 0000000..bd4fede --- /dev/null +++ b/0_FONCIER/foncier_call_fonctions.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : foncier_insert_table.py +#Description : Correspondance entre les sites et les parcelles cadastrales. +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import pandas as pd +import geopandas as gpd +from sqlalchemy import create_engine #, func +# from sqlalchemy.orm import sessionmaker +import psycopg2 +import datetime as dt +import sys +import gc +from pycen import con_fon + + +# Parametres généraux +epsg = '2154' +crs = 'EPSG:%s'%epsg +chunk = None +pci_annee = '2020' +matrice_annee = '2020' +start_time = dt.datetime.today() +def time_exec (init_time): + time = dt.datetime.today() - init_time + return str(time) + +def get_data(table,schema,engine=con_fon,chunk=chunk): + sql = "SELECT * FROM {0}.{1}".format(schema, table) + tmp = gpd.read_postgis( + sql = sql, + con = con_fon, + geom_col = 'geom', + crs = crs, + chunksize = chunk, ) + + # Mise en forme des données + # start_time = dt.datetime.today() + if chunk: + df = pd.concat(tmp, ignore_index=True) + else: + df = tmp.copy() + + return df + + +############### +# Get sites +tab_site = 'sites' +sch_site = 'sites' +tab_fon = 'cadastre_site' +sch_fon = 'foncier' +print('''IMPORT data from schema: '%s' , table : '%s' '''%(sch_site,tab_site)) +# sql = "SELECT * FROM {0}.{1}".format(sch_site, tab_site) +sql = ''' + SELECT * FROM {0}.{1} + WHERE site_id = 'GDMA' + --WHERE site_id NOT IN (SELECT DISTINCT site_id FROM {2}.{3}) + '''.format(sch_site, tab_site, sch_fon, tab_fon) +site = gpd.read_postgis( + # table_name = table_in, + sql = sql, + con = con_fon, + geom_col = 'geom', + # schema = schema_in, + crs = crs, + chunksize = chunk, ) +# union_site = gpd.GeoSeries(site.geom.cascaded_union) +if site.empty: + print('Pas de nouveaux sites à lier au cadastre ====> EXIT') + sys.exit() + +################# +# Get parcelles +tab_parc = 'cadastre' +sch_parc = 'parcelles' +print('''IMPORT data from schema: '%s' , table : '%s' '''%(sch_parc,tab_parc)) +sql = """SELECT * FROM {sch}.{tab} WHERE ST_Intersects (geom, 'SRID={epsg};{poly}') +AND par_id NOT IN (SELECT par_id FROM {sch}.parcelles_cen)""".format( + sch=tab_parc, tab=sch_parc, epsg=epsg, poly=site.unary_union +) +parc = gpd.read_postgis( + sql = sql, + con = con_fon, + geom_col = 'geom', + crs = crs, + chunksize = chunk, ) + +parc_cent = parc.copy() +# parc_cent.geom = parc_cent.representative_point() +# parc_cent.geom = parc_cent.centroid +res = gpd.sjoin(site, parc_cent, predicate='intersects') + + +print('RUN fonction "import_parcelles_cen" pour {} lignes '.format(res.shape[0])) +# lst_site = res.site_id.unique() +start_time = dt.datetime.today() +res['sql'] = "SELECT cadastre.import_parcelles_cen('"+res.par_id+"','"+res.site_id+"',"+str(start_time.year)+");" + +with con_fon.begin() as cnx: + res['sql'].map(lambda x: cnx.execute(x)) + + +############### +# Get cptprop +lst_parid = "','".join(res.par_id.unique()) +sql = ''' + SELECT * FROM cadastre.cadastre + JOIN cadastre.lots USING (lot_id) + WHERE lots.par_id IN ('{lst_parid}')'''.format(lst_parid=lst_parid) +cptprop = pd.read_sql( + sql = sql, + con = con_fon,) +cptprop.drop_duplicates('dnupro', inplace=True) + +start_time = dt.datetime.today() +print('RUN fonction "import_cptprop_cen" pour {} lignes '.format(cptprop.shape[0])) + + +cptprop['sql'] = "SELECT cadastre.import_cptprop_cen('"+cptprop.dnupro+"',"+str(start_time.year)+");" + +with con_fon.begin() as cnx: + cptprop['sql'].map(lambda x: cnx.execute(x)) +time_exec(start_time) +print('END fonction : import_cptprop_cen .......... %s'%time_exec(start_time)) + +site_id = 'GDMA' +with con_fon.begin() as cnx: + cnx.execute("SELECT admin_sig.refresh_mview_foncier('{site_id}')"\ + .format( + site_id=site_id + ) + ) \ No newline at end of file diff --git a/0_FONCIER/foncier_insert_administratif.py b/0_FONCIER/foncier_insert_administratif.py new file mode 100644 index 0000000..d6df402 --- /dev/null +++ b/0_FONCIER/foncier_insert_administratif.py @@ -0,0 +1,437 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : foncier_insert_table.py +#Description : Insertion/MAJ des données administratives et territoriales à la base lors de sa création. +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import pandas as pd +import geopandas as gpd +from sqlalchemy import create_engine +from geoalchemy2 import Geometry +from pycen import bdd +from shapely.geometry.multipolygon import MultiPolygon +from pydate import cdate +import sys +import os + + +#################################### +#################################### +#################################### +# PARAMETRES + +# Liste des tables à mettre à jour. ATTENTION : le respect des nomenclatures est importante +# Liste dispo : ['com', 'dpt', 'ter', 'histo_com', 'ter_com'] Cette liste doit être identique aux index de l'object : Table +# Correspondance tables BD_FONCIER : ['communes', 'departements', 'territoires', 'r_histo_com', 'r_ter_com'] +run = ['com', 'dpt', 'ter', 'histo_com', 'ter_com'] +user = 'colas_geier' +pwd = 'adm1n*fOncier' + +# Parametres bdd +# user = 'postgres' +# pwd = 'tutu' +# adr = '192.168.60.9' +adr = '91.134.194.221' +port = '5432' +base = 'bd_cen' + +# Connexion bdd +# bd = bdd.CEN( +# user = user, +# pwd = pwd, +# adr = adr, +# base = base +# # schema = schema +# ) +crs = 'EPSG:2154' + +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user,pwd,adr,port,base), echo=False) +con_open = con.connect() +# create_engine('postgresql+psycopg2://cen_admin:#CEN38@venir@172.17.0.2:5432/bd_cen', echo=True) + +# PATH +PATHIN = '/home/colas/' +LIST_DEPT = ['07', '26', '42', '38'] +# +MAIN_FOLDER = 'Documents' +SUB_FOLDERS = os.path.join('5_BDD','1_QGIS') +FILE_COM = ['COMMUNE.shp'] +FILE_DPT = ['DEPARTEMENT.shp'] +FILE_TER = [ + 'EPCI.shp', + 'PARC_OU_RESERVE.shp', + 'BASSIN_VERSANT_TOPOGRAPHIQUE.shp', + 'Contours_GRPT_AURA.shp' ] + + +# Dict table +IN_COM = [{ + 'id': 'id', + 'insee_com': 'code_insee', + 'nom': 'nom', + None: 'prec_plani', + None: 'statut', + None: 'canton', + 'insee_arr': 'arrondisst', + 'insee_dep': 'depart', + 'insee_reg': 'region', + 'population': 'popul', + None: 'multican', + 'actif': 'actif', + None: 'epfl', + # 'geometry': 'geom', +}] + +IN_DPT = [{ + 'id': 'id', + 'nom': 'nom', + 'insee_dep': 'insee_dep', + 'insee_reg': 'insee_reg', + 'date_creat': 'date_creat', + 'date_maj': 'date_maj', + 'actif': 'actif', + # 'geometry': 'geom', +}] + +IN_TER = [{ # DICT epci + '': 'territoire_id', + 'code_siren': 'siren', + 'nom': 'territoire_lib', + '': 'territoire_sigle', + 'nature': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},{ # DICT parc_ou_reserve + '': 'territoire_id', + 'id': 'siren', # absence de code siren ==> récup des 10 derniers charactères du champs ID + 'toponyme': 'territoire_lib', + '': 'territoire_sigle', + 'nature': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},{ # DICT bassin_versant + '': 'territoire_id', + 'code_hydro': 'siren', + 'toponyme': 'territoire_lib', + '': 'territoire_sigle', + 'id': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},{ # DICT epci + '': 'territoire_id', + 'sirengrpt': 'siren', + 'grpt': 'territoire_lib', + '': 'territoire_sigle', + 'nature': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},] + +# liste des couches administratives listé dans PATH_TER +administratif = ['epci'] +# DICT typterritoire_id lorsqu'une correspondance +# n'est pas possible avec un champs de la table attributaire. +# Relation typterritoire_id / ID (de la table attributaire). +typterid = { + 'bassvers' : 'bv' +} +mask_path = 'Documents/5_BDD/1_QGIS/' +mask_file = 'mask_parcelles_cadastre.shp' + + + +#################################### +#################################### +#################################### +# FONCTIONS +def join_typterritoire(df, join): + tab = join['table'] + sch = join['schema'] + ind = join['id'] + on = join['on'] + + tmp = pd.read_sql_table( + table_name = tab, + con = con, + schema = sch, + index_col = ind, + ) + + df[on['x']] = df[on['x']].str.lower() + df[on['x']] = df[on['x']].replace(tmp[on['y']].str.lower().to_list(),tmp.index.to_list()) + df = df[ df[on['x']].isin(tmp.index.to_list()) ] + + return df + +def find_files(File, main_path='Documents', sub_path=None): + ''' + @File : list + @main_path : str + @sub_path : str + ''' + sub = '' + if sub_path: + sub = sub_path + path = os.path.join(main_path, sub) + list_path = [] + for F in File : + matches = [str(path) for path in Path(path).rglob(F)] + list_path += matches + return list_path + + + +#################################### +#################################### +#################################### +# MAIN +if os.path.exists(PATHIN + mask_path + mask_file): + mask = gpd.read_file(PATHIN + mask_path + mask_file, crs=crs) +else : + sys.exit() + +Table = { + 'dpt': {'schema':'administratif', 'table':'departements', 'file': FILE_DPT, 'geom': True, 'dict':IN_DPT}, + 'com' : {'schema':'administratif', 'table':'communes', 'file': FILE_COM, 'geom': True, 'dict':IN_COM}, + # 'histo_com': {'schema':'administratif', 'table':'r_histo_com', 'file':None, 'geom': False, 'dict':None}, + 'ter': {'schema':'territoires', 'table':'territoires', 'file': FILE_TER, 'geom': True, 'dict':IN_TER, 'join': { + 'schema':'territoires', 'table': 'd_typterritoire', 'id': 'typterritoire_id', 'on': {'x': 'typterritoire_id', 'y': 'typterritoire_lib'}}}, + 'ter_com': {'schema':'territoires', 'table':'r_ter_com', 'file':None, 'geom': False, 'dict':None} +} + +for d, dep in enumerate(LIST_DEPT): + + PATH = 'Documents/5_BDD/1_QGIS/202103_BDTOPO/bdtopo_dep{0}_202103_shapefile/BDT_3-0_SHP_LAMB93_D0{0}-ED2021-03-15/'.format(dep) + PATH_COM = [PATH + 'ADMINISTRATIF/COMMUNE.shp'] + PATH_DPT = [PATH + 'ADMINISTRATIF/DEPARTEMENT.shp'] + PATH_TER = [ + PATH + 'ADMINISTRATIF/EPCI.shp', + PATH + 'ZONES_REGLEMENTEES/PARC_OU_RESERVE.shp', + PATH + 'HYDROGRAPHIE/BASSIN_VERSANT_TOPOGRAPHIQUE.shp', + '' ] + + Table = { + 'dpt': {'schema':'administratif', 'table':'departements', 'file': [PATHIN + path for path in PATH_DPT], 'geom': True, 'dict':IN_DPT}, + 'com' : {'schema':'administratif', 'table':'communes', 'file': [PATHIN + path for path in PATH_COM], 'geom': True, 'dict':IN_COM}, + # 'histo_com': {'schema':'administratif', 'table':'r_histo_com', 'file':None, 'geom': False, 'dict':None}, + 'ter': {'schema':'territoires', 'table':'territoires', 'file': [PATHIN + path for path in PATH_TER], 'geom': True, 'dict':IN_TER, 'join': { + 'schema':'territoires', 'table': 'd_typterritoire', 'id': 'typterritoire_id', 'on': {'x': 'typterritoire_id', 'y': 'typterritoire_lib'}}}, + 'ter_com': {'schema':'territoires', 'table':'r_ter_com', 'file':None, 'geom': False, 'dict':None} + } + + not_run = [k for k in Table.keys() if k not in run] + for r in not_run: + del Table[r] + + for tab in Table: + Table['file'] = find_files(File= Table['file'], sub_path=SUB_FOLDERS, main_path=MAIN_FOLDER) + + if d == 0: + for tab in reversed(Table): + # continue + sql = "TRUNCATE TABLE {0}.{1} CASCADE".format(Table[tab]['schema'], Table[tab]['table']) + print(sql) + con_open.execute(sql) + + no_r_tab = lambda x: x not in ['ter_com'] + for key in filter(no_r_tab, Table.keys()): + + # Test existance de la table en bdd + lst_tab = con.dialect.get_table_names(con, schema=Table[key]['schema']) + test = Table[key]['table'] in lst_tab + + # Si la table existe + if test: + + DICT = Table[key]['dict'] + # Test présence d'un champ 'geom' ou 'geometry' dans la table d'export + geom = False + col_tab = con.dialect.get_columns(con, Table[key]['table'], schema=Table[key]['schema']) + for o, obj in enumerate(col_tab): + if 'geom' in obj['name']: + geom = True + geom_name = obj['name'] + geom_type = obj['type'].geometry_type + if DICT: + for D, tmp in enumerate(DICT): + DICT[D]['geometry'] = geom_name + + # Suppression des champs non utiles + if DICT: + for D, tmp in enumerate(DICT): + if DICT[D] and None in DICT[D].keys(): + del DICT[D][None] + + if Table[key]['file']: + for f, i_file in enumerate(Table[key]['file']): + # Si présence d'une géometrie dans la table à insérer + if geom: + # if Table[key]['geom']: + # Read new table + print('IMPORT shape for table {0}'.format(Table[key]['table'])) + df = gpd.read_file(filename=i_file) + df = gpd.sjoin(df, mask, how='inner', op='intersects', rsuffix='right') + del_cols = [col for col in tutu.columns if col.endswith('right')] + ['FID'] + df.drop(columns=del_cols, inplace=True) + df['actif'] = True + # if 'ID' in df.columns: + # df.set_index('ID', inplace=True) + + # typ_geom_out = con.dialect.get_columns(con, Table[key]['table'], schema=Table[key]['schema']) + + # Harmonisation des géometries + # Transformation des géometries POLYGON ==> MULTIPOLIGON + geom_df = df.geometry.geom_type.unique().tolist() + geom_df = [x.upper() for x in geom_df] + + if [geom_type] != geom_df: + if geom_type == 'MULTIPOLYGON' and 'POLYGON' in geom_df: + print('CORRECTION des géometries POLYGON ==> MULTIPOLYGON') + lst = [] + for o, obj in enumerate(df.geometry): + if obj.geom_type == 'Polygon': + obj = MultiPolygon([obj]) + lst.append(obj) + df['geometry'] = lst + # elif geom_type == 'POLYGON' and 'MULTIPOLYGON' in geom_df: + # df[df.geom.geom_type == 'MultiPolygon'] + # pass + else: + print('ERROR : conflit entre la géometrie du df {0} et de la table postgis {1}'.format(geom_df,geom_type)) + + # Conservation des lignes appartenant au département + print('CONSERVATION des données départementales') + df.columns = df.columns.str.lower() + if 'insee_dep' in df.columns: + df = df.loc[df.insee_dep == dep] + + # Formatage des champs pour insertion + print('FORMATAGE des données') + df.rename(columns=DICT[f], inplace=True) + rm_col = df.columns.difference(DICT[f].values()) + df.drop(columns=rm_col, inplace=True) + # break + + # Identification du champs 'administratif' pour la table territoire + couche = i_file.split('/') + couche = couche[len(couche)-1] + couche = couche.split('.')[0].lower() + if 'ter' == key and couche in administratif: + df['administratif'] = True + elif 'ter' == key and couche not in administratif: + df['administratif'] = False + df['siren'] = [siren[-10:] for siren in df['siren']] + + if 'typterritoire_id' in DICT[f].values(): + key_typterr = [k for (k, v) in DICT[f].items() if v == 'typterritoire_id'][0] + if 'join' in Table[key].keys() and key_typterr != 'id': + df = join_typterritoire(df, Table[key]['join']) + if key == 'ter' and key_typterr == 'id': + # df['typterritoire_id'] = df.index.to_list() + df['typterritoire_id'] = [typter[:8] for typter in df['typterritoire_id']] + df['typterritoire_id'] = df['typterritoire_id'].str.lower() + df['typterritoire_id'] = df['typterritoire_id'].replace(typterid) + # df[on['x']].replace(tmp[on['y']].str.lower().to_list(),tmp.index.to_list()) + + + # Si présence d'une géometrie dans la table à insérer + if geom: + if not isinstance(df, gpd.GeoDataFrame): + df = df.set_geometry('geom', drop=True, crs=crs) + df.rename(columns={'geometry': 'geom'}, inplace=True) + # if Table[key]['geom']: + df.to_postgis( + name = Table[key]['table'], + con = con, + schema = Table[key]['schema'], + index = False, + if_exists = 'append', + geom_col = geom_name, + # dtype={'geom': Geometry(geometry_type='MULTIPOLYGON', srid=df.crs.to_epsg())} + ) + print('''INSERT TABLE OK for DEPT {} + '''.format(dep)) + else: + None + # Si la table existe pas + else: + print('ERROR : La table {0} n\'existe pas dans le schéma {1} !'.format( + Table[key]['table'].upper(), + Table[key]['schema'].upper() + )) + + if 'ter_com' in Table.keys() and d == 0: + tab = Table['ter_com'] + print('IMPORT tables for table {0}'.format(tab['table'])) + ter_sql = 'SELECT * FROM {sch}.{tab}'.format(sch='territoires', tab='territoires' ) + ter = gpd.read_postgis( + sql = ter_sql, + con = con, + geom_col = 'geom', + crs = crs, + ) + com_sql = 'SELECT * FROM {sch}.{tab} WHERE actif = true'.format(sch='administratif', tab='communes' ) + com = gpd.read_postgis( + sql = com_sql, + con = con, + geom_col = 'geom', + crs = crs, + ) + col_id = ['territoire_id', 'code_insee', 'geom'] + for df in [ter, com]: + rm_col = [ col for col in df.columns[~df.columns.isin(col_id)] ] + df.drop(columns=rm_col, inplace=True) + + print('JOIN tables "territoires" & "communes"') + df = gpd.sjoin(ter, com, op='intersects') + rm_col = [ col for col in df.columns[~df.columns.isin(col_id)] ] + rm_col.append('geom') + df.drop(columns=rm_col, inplace=True) + df = pd.DataFrame(df) + + r_sql = 'SELECT code_insee, territoire_id FROM {sch}.{tab}'.format(sch=tab['schema'], tab=tab['table'] ) + r_tab = pd.read_sql( + sql = r_sql, + con = con, + ) + + if not r_tab.empty: + print('DROP lignes présentes dans la table {}'.format(tab['table'])) + df = pd.concat([df,r_tab]).drop_duplicates(keep=False) + + if not df.empty: + users = pd.read_sql_table( + table_name = 'utilisateurs', + con = con, + schema = 'admin_sig' + ) + + date_now = cdate.today() + df['actif'] = True + df['date_maj'] = date_now + df['utilisateur_id'] = users[users.utilisateur_id == user].iloc[0]['individu_id'] + df.to_sql( + name = tab['table'], + con = con, + schema = tab['schema'], + index = False, + if_exists = 'append' + ) + else: + print(''' + TOUTES les relations "communes" / "territoires" existent déjà ! + ''') + + +# gdf.set_index('id', drop=True, inplace=True) +# for key in Table.keys(): +# # schema = Table[key]['schema'] +# # table = Table[key]['table'] +# # query = 'SELECT * FROM {0}.{1}'.format(schema,table) +# # gdf = gpd.read_postgis(sql=query, con=con) +# df = bd.get_table( +# schema=Table[key]['schema'], +# table=Table[key]['table']) +# print(df) diff --git a/0_FONCIER/foncier_insert_administratif_V2.py b/0_FONCIER/foncier_insert_administratif_V2.py new file mode 100755 index 0000000..f35f453 --- /dev/null +++ b/0_FONCIER/foncier_insert_administratif_V2.py @@ -0,0 +1,573 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : foncier_insert_table.py +#Description : Insertion/MAJ des données administratives et territoriales à la base lors de sa création. +#Copyright : Mai 2021, CEN38 +#Auteur : Colas Geier +#Version : 2.0 + +import pandas as pd +import geopandas as gpd +from sqlalchemy import create_engine, text +from geoalchemy2 import Geometry +from shapely.geometry.multipolygon import MultiPolygon +from pydate import cdate +from pathlib import Path +from pycen import con_fon as con +import sys +import os +# from pycen import bdd +# import psycopg2 + + +#################################### +#################################### +#################################### +# PARAMETRES + +# Liste des tables à mettre à jour. ATTENTION : le respect des nomenclatures est importante +# Liste dispo : ['com', 'dpt', 'ter', 'ter_com'] Cette liste doit être identique aux index de l'object : Table +# Correspondance tables BD_FONCIER : ['communes', 'departements', 'territoires', 'r_histo_com', 'r_ter_com'] +run = ['com'] +# INSERT, TRUNCATE, UPDATE +action = 'UPDATE' + +# Parametres bdd +# user = 'remi_clement' +# pwd = 'adm1n*CEN' +# adr = '192.168.60.9' +user = 'colas_geier' +pwd = 'adm1n*fOncier' +adr = '91.134.194.221' +port = '5432' +base = 'bd_cen' +epsg = '2154' +crs = 'EPSG:%s'%epsg + +# PATH +INDEX_FOLDER = '/home/colas' # "/home/USER" (Unix), "C:" ou autre (Windows) +MAIN_FOLDER = os.path.join(INDEX_FOLDER,'Documents') # Dossier principale +SUB_FOLDERS = os.path.join('5_BDD','1_QGIS') # Sous dossier (facultatif: os.path.join('subFolder1','subFolder2'), None si non précisé) +FILE_COM = ['COMMUNE.shp'] +FILE_DPT = ['DEPARTEMENT.shp'] +FILE_TER = [ + 'EPCI.shp', + 'PARC_OU_RESERVE.shp', + 'BASSIN_VERSANT_TOPOGRAPHIQUE.shp', + 'Contours_GRPT_AURA.shp' ] +administratif = ['epci'] # liste des couches administratives listé dans PATH_TER + +mask_Mfolder = os.path.join(INDEX_FOLDER,'Documents') +mask_Sfolder = os.path.join('5_BDD','1_QGIS') +mask_file = 'mask_parcelles_cadastre.shp' + +# DICT FILE / TABLE +IN_COM = [{ + 'id': 'id', + 'insee_com': 'code_insee', + 'nom': 'nom', + None: 'prec_plani', # Plus fournis par la bd_topo + None: 'statut', # Plus fournis par la bd_topo + None: 'canton', # Plus fournis par la bd_topo + 'insee_arr': 'arrondisst', + 'insee_dep': 'depart', + 'insee_reg': 'region', + 'population': 'popul', + None: 'multican', # Plus fournis par la bd_topo + 'actif': 'actif', + None: 'epfl', # Plus fournis par la bd_topo +}] + +IN_DPT = [{ + 'id': 'id', + 'nom': 'nom', + 'insee_dep': 'insee_dep', + 'insee_reg': 'insee_reg', + 'date_creat': 'date_creat', + 'date_maj': 'date_maj', + 'actif': 'actif', +}] + +IN_TER = [{ # DICT epci + '': 'territoire_id', + 'code_siren': 'siren', + 'nom': 'territoire_lib', + '': 'territoire_sigle', + 'nature': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},{ # DICT parc_ou_reserve + '': 'territoire_id', + 'id': 'siren', # absence de code siren ==> récup des 10 derniers charactères du champs ID + 'toponyme': 'territoire_lib', + '': 'territoire_sigle', + 'nature': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},{ # DICT bassin_versant + '': 'territoire_id', + 'code_hydro': 'siren', # absence de code siren ==> récup des 10 derniers charactères du champs ID + 'toponyme': 'territoire_lib', + '': 'territoire_sigle', + 'id': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},{ # DICT epci + '': 'territoire_id', + 'sirengrpt': 'siren', + 'grpt': 'territoire_lib', + '': 'territoire_sigle', + 'nature': 'typterritoire_id', + '': 'administratif', + '': 'prefixe', +},] +# DICT typterritoire_id lorsqu'une correspondance +# n'est pas possible avec un champs de la table attributaire. +# Relation typterritoire_id / ID (de la table attributaire). +typterid = { + 'bassvers' : 'bv' +} + +Table = { + 'dpt': {'schema':'administratif', 'table':'departements', 'name_file': FILE_DPT, 'geom': True, 'dict':IN_DPT, 'unique': 'insee_dep'}, + 'com' : {'schema':'administratif', 'table':'communes', 'name_file': FILE_COM, 'geom': True, 'dict':IN_COM, 'unique': 'code_insee'}, + # 'histo_com': {'schema':'administratif', 'table':'r_histo_com', 'name_file':None, 'geom': False, 'dict':None}, + 'ter': {'schema':'territoires', 'table':'territoires', 'name_file': FILE_TER, 'geom': True, 'dict':IN_TER, 'unique': 'siren', 'join': { + 'schema':'territoires', 'table': 'd_typterritoire', 'id': 'typterritoire_id', 'on': {'x': 'typterritoire_id', 'y': 'typterritoire_lib'}}}, + 'ter_com': {'schema':'territoires', 'table':'r_ter_com', 'name_file':None, 'geom': False, 'dict':None, 'unique': ['code_insee', 'territoire_id']} +} + +#################################### +#################################### +#################################### +# FONCTIONS +def join_typterritoire(df, join): + ''' + @df : dataframe + @join : dict + ''' + tab = join['table'] + sch = join['schema'] + ind = join['id'] + on = join['on'] + + tmp = pd.read_sql_table( + table_name = tab, + con = con, + schema = sch, + index_col = ind, + ) + + df[on['x']] = df[on['x']].str.lower() + df[on['x']] = df[on['x']].replace(tmp[on['y']].str.lower().to_list(),tmp.index.to_list()) + df = df[ df[on['x']].isin(tmp.index.to_list()) ] + + return df + +def find_files(File, main_path='Documents', sub_path=None): + ''' + @File : list + @main_path : str + @sub_path : str + ''' + sub = '' + if sub_path: + sub = sub_path + path = os.path.join(main_path, sub) + list_path = [] + nb_path = [] + for F in File : + matches = [str(path) for path in Path(path).rglob(F)] + list_path += matches + nb_path += str(len(matches)) + return list_path, nb_path + +def tab_has_data(con, schema, table): + ''' + @con : connection sqlalchemy create_engine + @schema : str + @table : str + ''' + has_sch = con.dialect.has_schema(con, schema=schema) + if has_sch : + has_table = con.dialect.has_table(con, table_name=table, schema=schema) + if has_table: + sql = 'SELECT * FROM {sch}.{tab} LIMIT 1'.format(sch=schema,tab=table) + df = pd.read_sql_query( + sql = sql, + con = con + ) + return not df.empty + else: + return '''TABLE %s don't exist in SCHEMA %s''' %(table,schema) + else : + return '''SCHEMA %s don't exist'''%schema + +def update_data(df, con, sch, tab, epsg=None): + columns = df.columns.to_list() + frame = df.copy() + frame.replace("'","''", regex=True, inplace=True) + pkey = con.dialect.get_pk_constraint(con, table_name=tab, schema=sch)['constrained_columns'] + + + if 'geom' in columns or 'geometry' in columns: + if epsg or df.crs: + if not epsg: + epsg = df.crs.to_epsg() + name_geom = df.geometry.name + frame[name_geom] = 'SRID={epsg};'.format(epsg=epsg) + df[name_geom].map(str) + # else: return 'No crs define in update_data or in gdf' + + for c, col in enumerate(columns): + if c == 0: + frame['insert'] = "('" + frame[col].map(str) + # break + else: + frame['insert'] = frame['insert'] + "','" + frame[col].map(str) + if c == len(columns)-1: + frame['insert'] = frame['insert'] + "')" + # if c == 0: + # frame['insert'] = '("' + frame[col].map(str) + # # break + # else: + # frame['insert'] = frame['insert'] + '","' + frame[col].map(str) + # if c == len(columns)-1: + # frame['insert'] = frame['insert'] + '")' + + lst_cols = ', '.join(columns) + lst_vals = ','.join(frame['insert']) + lst_dupKey = ', '.join([col + '=EXCLUDED.' + col for col in columns]) + lst_pkey = ','.join(pkey) + + sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) VALUES {lst_vals} ON CONFLICT ({lst_pkey}) DO UPDATE SET {lst_dupKey} ;'''.format( + sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals, lst_dupKey=lst_dupKey, lst_pkey=lst_pkey) + # sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) + # VALUES {lst_vals} + # ON CONFLICT DO NOTHING; + # '''.format(sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals) + try: + con.execute(sql) + # con.execute(text(sql)) + print(''' +Update OK !''') + except Exception as exept: + print(exept) + + +# con = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user,pwd,adr,port,base), echo=False) +con_open = con.connect() +# conn = psycopg2.connect( +# user=user, +# password=pwd, +# host=adr, +# port=port, +# database=base) +#################################### +#################################### +#################################### +# MAIN +if __name__ == "__main__": + + mask_f = find_files(File=[mask_file], sub_path=mask_Sfolder, main_path=mask_Mfolder)[0] + if mask_f: + mask = gpd.read_file(mask_f[0], crs=crs) + else : + sys.exit('NO MASK FILE') + + not_run = [k for k in Table.keys() if k not in run] + for r in not_run: + del Table[r] + + for tab in Table : + if Table[tab]['name_file']: + Table[tab]['file'], Table[tab]['nb_typ_file'] = find_files(File=Table[tab]['name_file'], sub_path=SUB_FOLDERS, main_path=MAIN_FOLDER) + d = [] + for i, nb in enumerate(Table[tab]['nb_typ_file']): + d += [ Table[tab]['dict'][i] ] * int(nb) + Table[tab]['dict'] = d + else: + Table[tab]['file'] = None + + if action == 'TRUNCATE': + for tab in reversed(Table): + # continue + sql = "TRUNCATE TABLE {0}.{1} CASCADE".format(Table[tab]['schema'], Table[tab]['table']) + print(sql) + con_open.execute(sql) + sys.exit('END TRUNCATE') + + else: + # filter1 = lambda x: x not in ['ter_com'] + # for key in filter(filter1, Table.keys()): + for key in Table.keys(): + # Test existance de données dans la table en bdd + has_data = tab_has_data(con, schema=Table[key]['schema'], table=Table[key]['table']) + + # Si la table existe + if type(has_data) == bool : + # récupération des données présentent en bdd + if type(Table[key]['unique']) == str: + Table[key]['unique'] = [ Table[key]['unique'] ] + + df_exist = False + if has_data : + df_exist = pd.read_sql_table( + con = con, + schema = Table[key]['schema'], + table_name = Table[key]['table'], + columns = Table[key]['unique'] + ) + + DICT = Table[key]['dict'] + # Test présence d'un champ 'geom' ou 'geometry' dans la table d'export + # Si géometrie, ajout du champs au dictionnaire + geom = False + col_tab = con.dialect.get_columns(con, Table[key]['table'], schema=Table[key]['schema']) + for o, obj in enumerate(col_tab): + if 'geom' in obj['name']: + geom = True + geom_name = obj['name'] + geom_type = obj['type'].geometry_type + if DICT: + for D, tmp in enumerate(DICT): + DICT[D]['geometry'] = geom_name + + # Récupération des données existantes dans la base de données + + # Suppression des champs non utiles + if DICT: + for D, tmp in enumerate(DICT): + if DICT[D] and None in DICT[D].keys(): + del DICT[D][None] + + if Table[key]['file']: + for f, i_file in enumerate(Table[key]['file']): + # Test existance de données dans la table en bdd + has_data = tab_has_data(con, schema=Table[key]['schema'], table=Table[key]['table']) + + # Si la table existe + if type(has_data) == bool : + # récupération des données présentent en bdd + if type(Table[key]['unique']) == str: + Table[key]['unique'] = [ Table[key]['unique'] ] + + df_exist = False + if has_data : + pkey = con.dialect.get_pk_constraint(con, table_name=Table[key]['table'], schema=Table[key]['schema'])['constrained_columns'] + df_exist = pd.read_sql_table( + con = con, + schema = Table[key]['schema'], + table_name = Table[key]['table'], + columns = Table[key]['unique'], + index_col = pkey + ) + if df_exist.shape[1] == 0 : + df_exist[Table[key]['unique']] = df_exist.index + + print(''' + IMPORT {1} for table {0}'''.format(Table[key]['table'], i_file)) + # Si présence d'une géometrie dans la table à insérer + if geom: + # if Table[key]['geom']: + # Read new table + df = gpd.read_file(filename=i_file) + if not df.crs: + df.set_crs(crs=crs, inplace=True) + df = gpd.sjoin(df, mask, how='inner', op='intersects', rsuffix='right') + del_cols = [col for col in df.columns if col.endswith('right')] + ['FID'] + df.drop(columns=del_cols, inplace=True) + df['actif'] = True + + geom_df = df.geometry.geom_type.unique().tolist() + geom_df = [x.upper() for x in geom_df] + if [geom_type] != geom_df: + if geom_type == 'MULTIPOLYGON' and 'POLYGON' in geom_df: + print('CORRECTION des géometries POLYGON ==> MULTIPOLYGON') + lst = [] + for o, obj in enumerate(df.geometry): + if obj.geom_type == 'Polygon': + obj = MultiPolygon([obj]) + lst.append(obj) + df['geometry'] = lst + # elif geom_type == 'POLYGON' and 'MULTIPOLYGON' in geom_df: + # df[df.geom.geom_type == 'MultiPolygon'] + # pass + else: + print('ERROR : conflit entre la géometrie du df {0} et de la table postgis {1}'.format(geom_df,geom_type)) + else: + # Si le fichier à importer ne possède pas de géometrie + print('NO geom !') + print('IMPORT data without geom : No-config !') + + # Conservation des lignes appartenant au département + print('CONSERVATION des données départementales') + df.columns = df.columns.str.lower() + # if 'insee_dep' in df.columns: + # df = df.loc[df.insee_dep == dep] + + # Formatage des champs pour insertion + print('FORMATAGE des données') + df.rename(columns=DICT[f], inplace=True) + rm_col = df.columns.difference(DICT[f].values()) + df.drop(columns=rm_col, inplace=True) + + # Identification du champs 'administratif' pour la table territoire + couche = i_file.split('/') + couche = couche[len(couche)-1] + couche = couche.split('.')[0].lower() + if 'ter' == key and couche in administratif: + df['administratif'] = True + elif 'ter' == key and couche not in administratif: + df['administratif'] = False + if df['siren'].dtypes == float: + df['siren'] = df['siren'].astype(int) + if df['siren'].dtypes == int: + df['siren'] = df['siren'].astype(str) + df['siren'] = [siren[-10:] for siren in df['siren']] + + if 'typterritoire_id' in DICT[f].values(): + key_typterr = [k for (k, v) in DICT[f].items() if v == 'typterritoire_id'][0] + if 'join' in Table[key].keys() and key_typterr != 'id': + df = join_typterritoire(df, Table[key]['join']) + if key == 'ter' and key_typterr == 'id': + df['typterritoire_id'] = [typter[:8] for typter in df['typterritoire_id']] + df['typterritoire_id'] = df['typterritoire_id'].str.lower() + df['typterritoire_id'] = df['typterritoire_id'].replace(typterid) + # if key == 'com': + # sys.exit() + + # Suppression des lignes existantes en bdd + if action == 'INSERT' and has_data: + unique_cols = df_exist.columns.to_list() + for d in [df, df_exist]: + d['exist'] = '' + for col in unique_cols: + d['exist'] += d[col].astype(str) + df = df[~df['exist'].isin(df_exist['exist'])] + df.drop(columns='exist', inplace=True) + if action == 'UPDATE': + unique_cols = df_exist.columns.to_list() + for d in [df, df_exist]: + d['exist'] = '' + for col in unique_cols: + d['exist'] += d[col].astype(str) + df = df[df['exist'].isin(df_exist['exist'])].sort_values(unique_cols) + ind = df_exist[df_exist['exist'].isin(df['exist'])].sort_values(unique_cols).index + df.set_index(ind, inplace=True) + df.drop(columns='exist', inplace=True) + + # Si présence d'une géometrie dans la table à insérer + if geom and not df.empty and action == 'INSERT' : + if not isinstance(df, gpd.GeoDataFrame): + df = df.set_geometry('geom', drop=False, crs=crs) + # df.rename(columns={'geometry': 'geom'}, inplace=True) + df.to_postgis( + name = Table[key]['table'], + con = con, + schema = Table[key]['schema'], + index = False, + if_exists = 'append', + geom_col = geom_name, + ) + elif geom and df.empty and action == 'INSERT' : + print('NO NEWS data insert !') + # Si présence d'une géometrie dans la table à updater + elif geom and not df.empty and action == 'UPDATE' : + if not isinstance(df, gpd.GeoDataFrame): + df = df.set_geometry('geom', drop=False, crs=crs) + df.reset_index(inplace=True) + update_data(df, con, sch=Table[key]['schema'], tab=Table[key]['table']) + print('NO NEWS data update !') + sys.exit() + elif geom and df.empty and action == 'UPDATE' : + print('NO NEWS data update !') + else: + # Si les données à importer sont issues d'un fichier sans géometrie + print('FILE WITHOUT GEOM !') + else: + # Si il n'y a pas de données à importer + # Création des liens relationnelles Communes/Territoires. + print('NO IMPORT FILE !') + tab = Table[key] + print('IMPORT tables for table {0}'.format(tab['table'])) + # SELECT data territoires + ter_sql = 'SELECT * FROM {sch}.{tab}'.format(sch='territoires', tab='territoires' ) + ter = gpd.read_postgis( + sql = ter_sql, + con = con, + geom_col = 'geom', + crs = crs, + ) + # SELECT DATA communes + com_sql = 'SELECT * FROM {sch}.{tab} WHERE actif = true'.format(sch='administratif', tab='communes' ) + com = gpd.read_postgis( + sql = com_sql, + con = con, + geom_col = 'geom', + crs = crs, + ) + + # Conservation des données utiles + tab['unique'] += ['geom'] + for df in [ter, com]: + rm_col = [ col for col in df.columns[~df.columns.isin(tab['unique'])] ] + df.drop(columns=rm_col, inplace=True) + + # Jointure territoires VS communes + if not ter.empty and not com.empty: + print('JOIN tables "territoires" & "communes"') + df = gpd.sjoin(ter, com, op='intersects') + rm_col = [ col for col in df.columns[~df.columns.isin(tab['unique'])] ] + rm_col.append('geom') + df.drop(columns=rm_col, inplace=True) + df = pd.DataFrame(df) + + # Récupération des données déjà présentent en bdd + # r_sql = 'SELECT code_insee, territoire_id FROM {sch}.{tab}'.format(sch=tab['schema'], tab=tab['table'] ) + # r_tab = pd.read_sql( + # sql = r_sql, + # con = con, + # ) + + # if not r_tab.empty: + # print('DROP lignes présentes dans la table {}'.format(tab['table'])) + # df = pd.concat([df,r_tab]).drop_duplicates(keep=False) + + if action == 'INSERT' and has_data: + df = pd.concat([df,df_exist]).drop_duplicates(keep=False) + + if action == 'INSERT' and not df.empty: + users = pd.read_sql_table( + table_name = 'utilisateurs', + con = con, + schema = 'admin_sig' + ) + + date_now = cdate.today() + df['actif'] = True + df['date_maj'] = date_now + df['utilisateur_id'] = users[users.utilisateur_id == user].iloc[0]['individu_id'] + df.to_sql( + name = tab['table'], + con = con, + schema = tab['schema'], + index = False, + if_exists = 'append' + ) + else: + print(''' + TOUTES les relations "communes" / "territoires" existent déjà ! + ''') + elif ter.empty and not com.empty: + print(''' + NO DATA dans la table "territoires" ! + ''') + elif not ter.empty and com.empty: + print(''' + NO DATA dans la table "communes" ! + ''') + else: + print(''' + NO DATA dans la table "communes" et dans la table "territoires" ! + ''') +sys.exit() diff --git a/0_FONCIER/foncier_insert_cadastre.py b/0_FONCIER/foncier_insert_cadastre.py new file mode 100755 index 0000000..563e465 --- /dev/null +++ b/0_FONCIER/foncier_insert_cadastre.py @@ -0,0 +1,770 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : foncier_insert_table.py +#Description : Insertion des données cadastrales à la base après de sa création. +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + + +import pandas as pd +# import numpy as np +from sqlalchemy import create_engine, text +from geoalchemy2 import Geometry +import gc +import sys +# import time +import datetime as dt +# from pycen import bdd +# from shapely.geometry.multipolygon import MultiPolygon + + +# Parametrage geopandas +import geopandas as gpd +import warnings; warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning) +# import shapely +# shapely.speedups.disable() +# gpd.options.use_pygeos = True + + +# start_time = dt.datetime.today() +# tmp = dt.datetime.today() - start_time + + +check_duplicates = False +# Parametres bdd CADASTRE (in) +# Données de sortie du plugin qgis "Cadastre" +user_cad = 'postgres' +pwd_cad = 'foncier_test1' +adr_cad = '172.17.0.2' +port_cad = '5432' +base_cad = 'postgres' +schema_cad = '202007' + + +# Parametres bdd FONCIER (out) +user_fon = 'postgres' +pwd_fon = 'tutu' +adr_fon = '192.168.60.9' +port_fon = '5432' +base_fon = 'bd_cen' +schema_fon = 'cadastre' + + +# Correspondance entre les tables +crs = 'EPSG:2154' +dpt_nom_tab = '_73' +chunk = 100000 +list_dep = ['07', '26', '42', '38'] + +FIND_DOUBLON = [{ + 'tab_in': 'proprietaire', + 'on_col': ['ddenom', 'dprnlp', 'dldnss','jdatnss','ccogrm','dsglpm','dnatpr'] } + ] + +DICT_TAB = [{ + 'table_in' : 'proprietaire', # Table source qui provient de la sortie du plugin cadastre de qgis + 'index_tab': 'proprietaire', # Pkey de la table source + 'columns_in': ['ccodep', 'ccocom', 'dnupro', + 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'], + 'table_out': [{ + 'name': 'cptprop{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': False, # Supprime les champs vides à l'intérieure des chaines de carractères + 'columns_in': ['ccodep', 'ccocom', 'dnupro'], # Liste des columns à récupérer en entrée. + 'columns_add': {'dnupro': ['ccodep', 'ccocom', 'dnupro']}, # Définition des champs composés devant être ajoutés + 'unique': {'cols': ['dnupro'], 'keep': 'first'}, # Champs devant être uniques à l'intérieur de la table en sortie + 'dict': None, # Dictionnaire pour renommer les champs {'ancien_nom1': 'nouveau_nom1', 'ancien_nom2': 'nouveau_nom2', ...} + 'join': [{ + 'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'dnupro']},{ + 'bdd': 'in', 'table': 'lots', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'dnuprol'],'dict': {'dnuprol': 'dnupro'}},{ + 'bdd': 'in', 'table': 'parcelle', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'dnupro']},] + },{ + 'name': 'proprios{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'], + 'columns_add': {'dnuper': ['ccodep', 'dnuper']}, + 'unique': {'cols': ['dnuper'], 'keep': 'first'}, + 'dict': None, + 'join': False + },{ + 'name': 'r_prop_cptprop{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in': ['ccodep', 'dnuper', 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'], + 'columns_add': { + 'dnuper': ['ccodep', 'dnuper'], + 'dnupro': ['ccodep', 'ccocom', 'dnupro']}, + 'unique': {'cols': ['dnupro', 'dnuper'], 'keep': 'first'}, + 'dict': None, + 'join': False + },] +},{ + 'table_in' : 'parcelle', + 'index_tab': 'parcelle', + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'dvoilib', 'type_filiation', 'dnupro'], + 'table_out': [{ + 'name': 'vl{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': False, + 'columns_in' : ['ccodep', 'ccocom', 'ccovoi', 'dvoilib'], + 'columns_add': { + 'vl_id': ['ccodep', 'ccocom', 'ccovoi'], + 'geom': None}, + 'unique': {'cols': ['vl_id'], 'keep': 'first'}, + 'dict': {'dvoilib': 'libelle'}, + 'join': [{ # ERROR ! 2 dclssf pour 1 lot_id + 'bdd': 'in', 'table': 'voie', 'on': ['ccodep', 'ccocom', 'ccovoi'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'codvoi', 'libvoi'], + 'dict': {'libvoi': 'libelle', 'codvoi': 'ccovoi'}, + }] + },{ + 'name': 'parcelles{}'.format(dpt_nom_tab), + 'geom': { + 'table_geom_in': 'geo_parcelle', + 'index_geom': 'geo_parcelle' + }, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'type_filiation'], + 'columns_add': { + 'par_id': ['ccodep', 'ccocom', 'ccopre','ccosec', 'dnupla'], + 'codcom': ['ccodep', 'ccocom'], + 'vl_id': ['ccodep', 'ccocom', 'ccovoi'], + 'typprop_id': None }, + 'unique': False, + 'dict': {'type_filiation': 'type'}, + 'join': False + },{ + 'name': 'lots{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526) + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dcntpa'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], + 'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], + 'dnulot': None, }, + 'unique': False, + 'dict': {'dcntpa': 'dcntlo'}, + 'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(dpt_nom_tab), 'on': ['par_id'], 'type': 'isin', + 'select_cols' :['par_id'] }] + # },{ + # 'name': 'cptprop{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526) + # 'geom': None, + # 'drop_escape': True, + # 'columns_in' : ['ccodep', 'ccocom', 'dnupro'], + # 'columns_add': { + # 'dnupro': ['ccodep', 'ccocom', 'dnupro'], + # }, + # 'unique': {'cols': ['dnupro'], 'keep': 'first'}, + # 'dict': None, + # 'join': [{'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'notin', + # 'select_cols' :['dnupro'] }] + },] +# },{ +# 'table_in' : 'suf', +# 'index_tab': 'suf', +# 'columns_in' : ['ccodep', 'ccocom', 'dnupro'], +# 'table_out': [{ +# 'name': 'cptprop{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526) +# 'geom': None, +# 'drop_escape': True, +# 'columns_in' : ['ccodep', 'ccocom', 'dnupro'], +# 'columns_add': { +# 'dnupro': ['ccodep', 'ccocom', 'dnupro'], +# }, +# 'unique': {'cols': ['dnupro'], 'keep': 'first'}, +# 'dict': None, +# 'join': [{'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'notin', +# 'select_cols' :['dnupro'] }] +# },] +},{ + 'table_in' : 'lots', + 'index_tab': 'lots', + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo', 'dnuprol'], + 'table_out': [{ + # 'name': 'cptprop{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526) + # 'geom': None, + # 'drop_escape': True, + # 'columns_in' : ['ccodep', 'ccocom', 'dnuprol'], + # 'columns_add': { + # 'dnupro': ['ccodep', 'ccocom', 'dnuprol'], + # }, + # 'unique': {'cols': ['dnupro'], 'keep': 'first'}, + # 'dict': None, + # 'join': [{'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'notin', + # 'select_cols' :['dnupro'] }] + # },{ + 'name': 'lots{}'.format(dpt_nom_tab), # !!!!!! parcelles avec lots: existe par_id NOT IN parcelles_73 + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],}, + 'unique': {'cols': ['lot_id'], 'keep': 'first'}, + 'dict': None, + 'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(dpt_nom_tab), 'on': ['par_id'], 'type': 'isin', + 'select_cols' :['par_id'] }] + },{ + 'name': 'lots_natcult{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],}, + 'unique': {'cols': ['lot_id'], 'keep': 'first'}, + 'dict': None, + 'join': [{ # ERROR ! 2 dclssf pour 1 lot_id + 'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], 'type': 'merge', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot','dsgrpf','cnatsp','dclssf','ccosub','dcntsf'], + },{ + 'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin', + 'select_cols' :['lot_id'] }] + },{ + 'name': 'cadastre{}'.format(dpt_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnuprol'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'dnupro': ['ccodep', 'ccocom', 'dnuprol'],}, + 'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'}, + 'dict': None, + 'join': [{ # ERROR ! 2 dclssf pour 1 lot_id + 'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnuprol'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupro'], 'dict': {'dnupro': 'dnuprol'} + },{ + 'bdd': 'in', 'table': 'parcelle', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnuprol'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], 'dict': {'dnupro': 'dnuprol'} + },{ + 'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin', + 'select_cols' :['lot_id'] },{ + 'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'isin', + 'select_cols' :['dnupro'] },] + },] +# },{ +# 'table_in' : 'proprietaire', # Table source qui provient de la sortie du plugin cadastre de qgis +# 'index_tab': 'proprietaire', # Pkey de la table source +# 'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm', +# 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'], +# 'table_out': [] +# },{ +# 'table_in' : 'parcelle', +# 'index_tab': 'parcelle', +# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], +# 'table_out': [{ +# 'name': 'cadastre{}'.format(dpt_nom_tab), +# 'geom': None, +# 'drop_escape': True, +# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], +# 'columns_add': { +# 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], +# 'dnupro': ['ccodep', 'ccocom', 'dnupro'],}, +# 'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'}, +# 'dict': None, +# 'join': [{ +# 'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin', +# 'select_cols' :['lot_id'], 'where': {'dnulot': None} },{ +# 'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'isin', +# 'select_cols' :['dnupro'] },] +# },] +# },{ +# 'table_in' : 'parcelle', +# 'index_tab': 'parcelle', +# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], +# 'table_out': [{ +# 'name': 'cadastre{}'.format(dpt_nom_tab), +# 'geom': None, +# 'drop_escape': True, +# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], +# 'columns_add': { +# 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], +# 'dnupro': ['ccodep', 'ccocom', 'dnupro'],}, +# 'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'}, +# 'dict': None, +# 'join': [{ +# 'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin', +# 'select_cols' :['lot_id'], },{ +# 'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'isin', +# 'select_cols' :['dnupro'] },] +# },] +}] + + + +# # Connexion bdd +# bd_cad = bdd.CEN( +# user = user_cad, +# pwd = pwd_cad, +# adr = adr_cad, +# base = base_cad +# # schema = schema +# ) + +################################ +########## Fonctions ########## +################################ +start_time = dt.datetime.today() +def time_exec (init_time): + time = dt.datetime.today() - init_time + return str(time) + + +def replace_escape_by_0 (df): + # Remplacement des espaces dans les chaines de caractères par des 0 + # if 'ccopre' in df.columns: + # df['ccopre'].replace([None, '', ' '], '000', inplace=True) + + cols = ['ccopre', 'ccosec', 'dnupla', 'dparpi', 'dnuplam', 'dclssf', 'ccovoi'] + for col in cols: + if col in df.columns: + df[col].replace([' '], '0', regex=True, inplace=True) + + return df + + +def join_data (df, join, schema_in): + # Jointure des données avec une autre table + table = join['table'] + bdd = join['bdd'] + typ = join['type'] + on = join['on'] + if bdd == 'out': + con = engine_fon + sch = schema_fon + if bdd == 'in': + con = engine_cad + sch = schema_in + select_col = [] + if 'select_cols' in join.keys(): + select_col.extend(join['select_cols']) + if 'where' in join.keys(): + select_col.extend(join['where'].keys()) + + tmp = pd.read_sql_table( + table_name = table, + con = con, + schema = sch, + columns = select_col + ) + tmp = replace_escape_by_0(tmp) + if 'dict' in join.keys(): + tmp.rename(columns=join['dict'], inplace=True) + if 'where' in join.keys(): + where = join['where'] + for key in where.keys(): + tmp = tmp[tmp[key] == where[key] ] + + if typ in ['isin', 'notin']: + # on = on[0] + for d in [df, tmp]: + d['on'] = '' + for col in on: + d['on'] += d[col].astype(str) + if typ == 'isin': + df = df[df['on'].isin(tmp['on'])] + if typ == 'notin': + df = df[~df['on'].isin(tmp['on'])] + df.drop(columns='on', inplace=True) + # if typ == 'notin': + # on = on[0] + # df = df[~df[on].isin(tmp[on])] + # df = pd.concat([df,tmp]).drop_duplicates(on, keep=False) + if typ == 'merge': + df = df.merge(tmp, on = on, how='left') + if typ == 'concat': + df = pd.concat([df,tmp], ignore_index=True).drop_duplicates() + + return df + +def get_geom_parcelle (df,get_geo,schema): + print('INIT import geodata ........... %s sec'%( time_exec(start_time) )) + + # Définition des variables géometriques + ind_geo = get_geo['index_geom'] + tab_geo = get_geo['table_geom_in'] + + sql = """select distinct on (t2.{0}) + t2.{0}, + t1.geom, + t1.supf::integer as dcntpa -- récupération de la contenance cadastrale associée car présence de géometrie non référencées dans la table "parcelles" + FROM "{1}".{2} t1 + INNER JOIN (select distinct on ({0}) {0}, max(creat_date) creat_date, max(update_dat) update_dat FROM "{1}".{2} GROUP BY ({0})) t2 + USING ({0}, creat_date, update_dat)""".format(ind_geo, schema, tab_geo) + tmp = gpd.read_postgis( + sql = sql, + con = engine_cad, + geom_col = 'geom', + crs = crs, + chunksize = chunk, + ) + + if chunk: + gdf = gpd.GeoDataFrame(pd.concat(tmp, ignore_index=True)) + else: + gdf = tmp.copy() + # del tmp; gc.collect() + # gdf = tmp.copy() + del tmp + gdf.set_index(ind_geo, inplace=True) + gdf.index.name = ind_in + print('END import geodata ........... %s sec'%( time_exec(start_time) )) + + + print('INIT merge data - geodata ........... %s sec'%( time_exec(start_time) )) + if not gdf[gdf.dcntpa.isna()].empty: + gdf.dcntpa.fillna(0, inplace=True) + gdf['dcntpa'] = gdf['dcntpa'].astype(df.dtypes['dcntpa'].type) + # gdf = gdf.merge(df, on = [ind_in, 'dcntpa'], how='left') + tmp = gdf.merge(df, on = [ind_in, 'dcntpa'], how='right') + tmp = tmp.set_geometry('geom', drop=True, crs=crs) + tmp.rename(columns={'geometry': 'geom'}, inplace=True) + + if tmp[tmp.geom.isna()].empty: + lst_ind_df = tmp[tmp.geom.isna()].index.tolist() + lst_ind_gdf = gdf.loc[gdf.index.isin(lst_ind_df)].index.tolist() + tmp.loc[tmp.index.isin(lst_ind_gdf), 'geom'] = gdf.loc[gdf.index.isin(lst_ind_gdf), 'geom'] + + del [gdf, df] + gdf = tmp.copy() + del tmp + export_data(gdf) + + +def export_data( df): + print('INIT export data TO {0}, {1} ........... {2} sec'.format(tab_out, df.shape[0], time_exec(start_time) )) + rang = [e for e in range(0, df.shape[0], chunk*5)] + for i, j in enumerate(rang): + if j == max(rang) : + jj = df.shape[0] + else: + jj = rang[i+1] + + df_imp = df[j:jj].copy() + + print('INIT export data TO {0} ..... {1}/{2} ...... {3} sec'.format(tab_out, jj, df.shape[0], time_exec(start_time) )) + if 'geom' in df.columns and not df[~df['geom'].isna()].empty : + df_imp = df_imp.set_geometry('geom', drop=True, crs=crs) + df_imp.rename(columns={'geometry': 'geom'}, inplace=True) + df_imp.to_postgis( + name = tab_out, + con = engine_fon, + schema = schema_fon, + index = False, + if_exists = 'append', + geom_col = 'geom', + chunksize = chunk, + ) + else: + df_imp.to_sql( + name = tab_out, + con = engine_fon, + schema = schema_fon, + index = False, + if_exists = 'append', + chunksize = chunk, + method = 'multi', + ) + print('END export data TO {0} ........... {1} sec'.format(tab_out, time_exec(start_time) )) + +def optimize_data_frame(df): + columns = df.columns + for col in columns: + dtype = df[col].dtypes + # if dtype == 'int64' or dtype == 'int32': + len_col = len(df[col].unique()) + if len_col <= df.shape[0]*0.8: + df[col] = df[col].astype('category') + + return df + + +# Initiation des connexions bdd +engine_cad = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False) +engine_fon = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_fon,pwd_fon,adr_fon,port_fon,base_fon), echo=False) +con_cad = engine_cad.connect() +con_fon = engine_fon.connect() + + +################################ +########## Main ########## +################################ +if __name__ == "__main__": + ################ + # CORRECTION DUPLICATES TABLE_IN + if check_duplicates: + for DOUBLON in FIND_DOUBLON: + tab = DOUBLON['tab_in'] + on_col = DOUBLON['on_col'] + for col in on_col: + for dep in list_dep: + schema_in = dep + '_' + schema_cad + sql = ''' + -- il existe des doublons en raison d'orthographes voisines : + -- recherche de ces doublons + SELECT DISTINCT '{0}' as insee_dep, dnuper, string_agg(DISTINCT {1},' / ') as orthographes_voisines + FROM "{2}".{3} GROUP BY dnuper HAVING count(DISTINCT {1}) > 1'''.format(dep, col, schema_in, tab) + df = pd.read_sql( + sql = sql, + con = engine_cad, + ) + if df.empty: + print('No duplicate value dep {0} table {1} column {2} ====> next request'.format(dep, tab, col)) + continue + + for i, row in df.iterrows(): + dnuper = row.dnuper + choix = row.orthographes_voisines.split(' / ') + choix = [i.strip() for i in choix] + Question = input("""Des orthographes voisines existent pour l'identifiant : {0} + dans la colonne : {1}. + Les valeurs voisines sont : {2} + Ecrire la mise à jour du champs {1} à enregistrer (c cancel) :""".format(dnuper,col, choix)) + if Question.lower() == 'c' or Question.lower() == 'cancel': + continue + + update = '''UPDATE "{0}".{1} + SET {2} = '{3}' + WHERE {2} like '{4}%' + AND dnuper = '{5}';'''.format(schema_in, tab, col, Question, "%' OR {} like '".format(col).join(map(str,choix)), dnuper) + try: + con_cad.execute(text(update)) + print(''' + Update OK !''') + except Exception as exept: + print('ERROR : {0}'.format(update)) + print(exept) + sys.exit() + + + + ################ + # TRUNCATE TABLE OUT + for i, DICT in enumerate(DICT_TAB): + # continue + # Définition des variables + # i = 1 + # if i != 2: + # continue + tab_in = DICT_TAB[i]['table_in'] + col_in = DICT_TAB[i]['columns_in'] + ind_in = DICT_TAB[i]['index_tab'] + tabs_out = DICT_TAB[i]['table_out'] + + for tab_out in reversed(tabs_out): + # continue + sql = "TRUNCATE TABLE {0}.{1} CASCADE".format(schema_fon, tab_out['name']) + print(sql) + con_fon.execute(sql) + + + + for dep in list_dep: + schema_in = dep + '_' + schema_cad + print(''' + + INIT import data FROM {} + + '''.format(schema_in)) + ################ + # IMPORT IN TABLE OUT + for i, DICT in enumerate(DICT_TAB): + # Définition des variables + # i = 1 + # if i != 1: + # continue + tab_in = DICT_TAB[i]['table_in'] + col_in = DICT_TAB[i]['columns_in'] + ind_in = DICT_TAB[i]['index_tab'] + tabs_out = DICT_TAB[i]['table_out'] + + # Import data + print(''' + INIT import data FROM {0}........... {1} sec'''.format(tab_in, time_exec(start_time) )) + tmp = pd.read_sql_table( + table_name = tab_in, + con = engine_cad, + schema = schema_in, + columns = col_in + [ind_in], + chunksize = chunk, + ) + + # Mise en forme des données + # start_time = dt.datetime.today() + if chunk: + DF = pd.concat(tmp, ignore_index=True) + else: + DF = tmp.copy() + + DF.drop_duplicates(inplace=True) + del tmp + # DF = optimize_data_frame(DF) + DF.set_index(ind_in, inplace=True) + print('END import data ........... %s sec'%( time_exec(start_time) )) + + for tab in tabs_out: + tab_out = tab['name'] + dictio = tab['dict'] + col_df = tab['columns_in'] + col_ad = tab['columns_add'] + get_geo = tab['geom'] + drp_esc = tab['drop_escape'] + unique = tab['unique'] + join = tab['join'] + # if tab_out == 'parcelles_73': + # break + # continue + + print('INIT TABLE {0} ........... {1} sec'.format(tab_out, time_exec(start_time) )) + df = DF[DF.columns.intersection(col_df)].copy() + # df = optimize_data_frame(df) + # del DF; gc.collect() + + + # Remplacement des espaces dans les chaines de caractères par des 0 + df = replace_escape_by_0(df) + if drp_esc: + df_obj = df.select_dtypes(['object']) + df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip()) + # df.replace([' '], '', regex=True, inplace=True) + + if dictio : + df.rename(columns=dictio, inplace=True) + + if join : + for j in join: + if j['bdd'] == 'in' : + # sys.exit() + df = join_data(df, j, schema_in) + if df.empty: + print('df EMPTY ====> next table') + # pass + continue + + # Ajout des champs additionnels + if col_ad : + print('INIT addition columns ........... %s sec'%( time_exec(start_time) )) + for key in col_ad.keys(): + if key in df.columns: + df[key + '_tmp'] = df[key].copy() + col_ad[key] = [x if x != key else key+'_tmp' for x in col_ad[key]] + + aggreg = col_ad[key] + if aggreg : + df[key] = '' + for col in aggreg: + df[key] += df[col].fillna('') + # df[key] = df[aggreg].agg(''.join, axis=1) + # break + else: + df[key] = aggreg + + print('ADD column {0} : {1} ........... {2} sec'.format(key,aggreg, time_exec(start_time) )) + + # JOINTURE + if join : + for j in join: + if j['bdd'] == 'out' : + # break + # sys.exit() + df = join_data(df, j, schema_in) + if df.empty: + print('df EMPTY ====> next table') + # pass + continue + + # sys.exit() + if unique: + df.drop_duplicates(unique['cols'], keep=unique['keep'], inplace=True) + + # Conservation des champs utiles à l'insertion en bdd + name_col_out = engine_fon.dialect.get_columns(engine_fon, tab_out, schema=schema_fon) + name_col_out = [ sub['name'] for sub in name_col_out ] + if 'geom' in name_col_out and 'geom' not in df.columns: + name_col_out.remove('geom') + df = df[df.columns.intersection(name_col_out)] + + #################### + # Read geodataframe + # Dans le cas où un champs géometrique est nécessaire. + if get_geo: + get_geom_parcelle(df=df, get_geo=get_geo, schema=schema_in) + # print('INIT import geodata ........... %s sec'%( time_exec(start_time) )) + + # # Définition des variables géometriques + # ind_geo = get_geo['index_geom'] + # tab_geo = get_geo['table_geom_in'] + + # # Get geodata from Postgis + # # sql = "SELECT {0}, geom FROM (SELECT {0}, geom, max(creat_date), max(update_dat) FROM {1}.{2})".format(ind_geo,schema_in,tab_geo) + + # sql = """select distinct on (t2.{0}) + # t2.{0}, + # t1.geom, + # t1.supf::integer as dcntpa -- récupération de la contenance cadastrale associée car présence de géometrie non référencées dans la table "parcelles" + # FROM "{1}".{2} t1 + # INNER JOIN (select distinct on ({0}) {0}, max(creat_date) creat_date, max(update_dat) update_dat FROM "{1}".{2} GROUP BY ({0})) t2 + # USING ({0}, creat_date, update_dat)""".format(ind_geo, schema_in, tab_geo) + # tmp = gpd.read_postgis( + # sql = sql, + # con = engine_cad, + # geom_col = 'geom', + # crs = crs, + # chunksize = chunk, + # ) + + # if chunk: + # gdf = gpd.GeoDataFrame(pd.concat(tmp, ignore_index=True)) + # else: + # gdf = tmp.copy() + # # del tmp; gc.collect() + # # gdf = tmp.copy() + # del tmp; gc.collect() + # gdf.set_index(ind_geo, inplace=True) + # gdf.index.name = ind_in + # print('END import geodata ........... %s sec'%( time_exec(start_time) )) + + + # print('INIT merge data - geodata ........... %s sec'%( time_exec(start_time) )) + # if not gdf[gdf.dcntpa.isna()].empty: + # gdf.dcntpa.fillna(0, inplace=True) + # gdf['dcntpa'] = gdf['dcntpa'].astype(df.dtypes['dcntpa'].type) + # # gdf = gdf.merge(df, on = [ind_in, 'dcntpa'], how='left') + # tmp = gdf.merge(df, on = [ind_in, 'dcntpa'], how='right') + # tmp = tmp.set_geometry('geom', drop=True, crs=crs) + # tmp.rename(columns={'geometry': 'geom'}, inplace=True) + + # if tmp[tmp.geom.isna()].empty: + # lst_ind_df = tmp[tmp.geom.isna()].index.tolist() + # lst_ind_gdf = gdf.loc[gdf.index.isin(lst_ind_df)].index.tolist() + # tmp.loc[tmp.index.isin(lst_ind_gdf), 'geom'] = gdf.loc[gdf.index.isin(lst_ind_gdf), 'geom'] + + # del [gdf, df]; gc.collect() + # gdf = tmp.copy() + # del tmp; gc.collect() + # export_data(gdf) + # del gdf; gc.collect() + # récupération de la liste des géometries où l'id est non présentent dans la table parcelles + # lst = gdf[gdf.par_id.isna()].index.tolist() + # # Recomposition des infos principales + # par_id = [l.replace('0','',1) for l in lst] + # gdf.loc[gdf.index.isin(lst), 'par_id'] = par_id + # gdf.loc[gdf.index.isin(lst), 'codcom'] = [l[:5] for l in par_id] + # gdf.loc[gdf.index.isin(lst), 'ccopre'] = [l[5:8] for l in par_id] + # gdf.loc[gdf.index.isin(lst), 'ccosec'] = [l[8:10] for l in par_id] + # gdf.loc[gdf.index.isin(lst), 'dnupla'] = [l[10:14] for l in par_id] + # # gdf.loc[gdf.index.isin(lst), 'vl_id'] = [l[:8] for l in par_id] + # # gdf = gdf[gdf.vl_id.str.len() == 10] + else: + export_data(df) + del df + del DF #; gc.collect() + print('END transfert data FROM département {0} ........... {1} sec'.format(dep, time_exec(start_time) )) + print('END SCRIPT') + sys.exit() + print('NOT EXIT') + diff --git a/0_FONCIER/foncier_insert_cadastre_V2.py b/0_FONCIER/foncier_insert_cadastre_V2.py new file mode 100755 index 0000000..77380b6 --- /dev/null +++ b/0_FONCIER/foncier_insert_cadastre_V2.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : foncier_insert_table.py +#Description : Insertion des données cadastrales à la base . +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 2.0 + + +from sqlalchemy import create_engine, text +from geoalchemy2 import Geometry # need for pandas +import datetime as dt +import pandas as pd +import sys + + +# Parametrage geopandas +import geopandas as gpd +import warnings; warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning) + + +check_duplicates = True +debug = True +# Parametres bdd CADASTRE (in) +# Données de sortie du plugin qgis "Cadastre" +# Les schéma ont une nomenclarture numérique du type "dep_nom" +user_cad = 'postgres' # utilisateur de connexion à la bdd +pwd_cad = 'foncier_test1' # mot de passe de la bdd +adr_cad = '172.17.0.2' # adresse ip de la bdd +port_cad = '5432' # port de la bdd +base_cad = 'postgres' # nom de la bdd +schema_cad = '202007' # nom unique qui compose le nom des schémas cibles de l'année à récupérer +list_dep = ['07', '26', '42', '38'] # liste des départements + + +# Parametres bdd FONCIER (out) +user_fon = 'colas_geier' +pwd_fon = 'adm1n*fOncier' +adr_fon = '91.134.194.221' +# user_fon = 'postgres' # utilisateur de connexion à la bdd +# pwd_fon = 'tutu' # mot de passe de la bdd +# adr_fon = '192.168.60.9' # adresse ip de la bdd +port_fon = '5432' # port de la bdd +base_fon = 'bd_cen' # nom de la bdd +schema_fon = 'cadastre' # nom du schéma cible dans la bdd +sufx_nom_tab = '_38' # sufixe du nom des tables où insérer la données dans le schéma cadastre (ex: _dep) + + +# Correspondance entre les tables +epsg = '2154' # code EPSG de la projection géographique en entrée. +crs = 'EPSG:%s'%epsg +chunk = 100000 + +# Il existe parfois des doublons en raison d'orthographes voisinent (entre autres) +# dans les tables de sortie du du plugin qgis "Cadastre" +FIND_DOUBLON = [{ + 'tab_in': 'proprietaire', # nom de la table où des doublons se trouvent + 'on_col': ['ddenom', 'dprnlp', 'dldnss','jdatnss','ccogrm','dsglpm','dnatpr'] } # nom des champs où des corrections sont nécessaires + ] + +DICT_TAB = [{ + 'table_in' : 'proprietaire', # Table source qui provient de la sortie du plugin cadastre de qgis + 'index_tab': 'proprietaire', # Pkey de la table source + 'columns_in': ['ccodep', 'ccocom', 'dnupro', # Liste exaustive des champs à récupérer dans la table source + 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'], + 'table_out': [{ + 'name': 'cptprop{}'.format(sufx_nom_tab), # Nom de la table cible + 'geom': None, # Existance d'une géometrie à récupérer pour insertion (Non: None, Oui: cf tab_out parcelle_xx ci-dessous) + 'drop_escape': False, # Supprime les champs vides à l'intérieure des chaines de carractères + 'columns_in': ['ccodep', 'ccocom', 'dnupro'], # Liste des columns à récupérer en entrée. + 'columns_add': {'dnupro': ['ccodep', 'ccocom', 'dnupro']}, # Définition des champs composés devant être ajoutés + 'unique': {'cols': ['dnupro'], 'keep': 'first'}, # Champs devant être uniques à l'intérieur de la table en sortie, keep: ['first', 'last', False] + 'dict': None, # Dictionnaire pour renommer les champs {'ancien_nom1': 'nouveau_nom1', 'ancien_nom2': 'nouveau_nom2', ...} + 'join': [{ # Jointure de tables à réaliser. + 'bdd': 'in', # ['in', 'out']: jointure des tables à l'import / avant export + 'table': 'suf', # Nom de la table à joindre + 'select_cols' : ['ccodep', 'ccocom', 'dnupro'], # Nom des champs à récupérer + 'on': ['ccodep', 'ccocom', 'dnupro'], # Nom des champs sur lesquelles faire la jointure + 'type': 'concat' # Type de jointure à réaliser ['concat', 'merge', 'isin', 'notin'] + # 'concat': concatène les 2 tables; 'merge': merge les 2 tables en fonction du paramètre 'on' + # 'isin': Conservation des données présentent dans les 2 tables en fonction du paramètre 'on' + # 'notin': Conservation des données NON présentent dans la table 'join' en fonction du paramètre 'on' + },{ + 'bdd': 'in', 'table': 'lots', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'dnuprol'],'dict': {'dnuprol': 'dnupro'}},{ + 'bdd': 'in', 'table': 'parcelle', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'dnupro']},] + },{ + 'name': 'proprios{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'], + 'columns_add': {'dnuper': ['ccodep', 'dnuper']}, + 'unique': {'cols': ['dnuper'], 'keep': 'first'}, + 'dict': None, + 'join': False + },{ + 'name': 'r_prop_cptprop{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in': ['ccodep', 'dnuper', 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'], + 'columns_add': { + 'dnuper': ['ccodep', 'dnuper'], + 'dnupro': ['ccodep', 'ccocom', 'dnupro']}, + 'unique': {'cols': ['dnupro', 'dnuper'], 'keep': 'first'}, + 'dict': None, + 'join': False + },] +},{ + 'table_in' : 'parcelle', + 'index_tab': 'parcelle', + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'dvoilib', 'type_filiation'], + 'table_out': [{ + 'name': 'vl{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': False, + 'columns_in' : ['ccodep', 'ccocom', 'ccovoi', 'dvoilib'], + 'columns_add': { + 'vl_id': ['ccodep', 'ccocom', 'ccovoi'], + 'geom': None}, + 'unique': {'cols': ['vl_id'], 'keep': 'first'}, + 'dict': {'dvoilib': 'libelle'}, + 'join': [{ + 'bdd': 'in', 'table': 'voie', 'on': ['ccodep', 'ccocom', 'ccovoi'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'codvoi', 'libvoi'], + 'dict': {'libvoi': 'libelle', 'codvoi': 'ccovoi'}, + }] + },{ + 'name': 'parcelles{}'.format(sufx_nom_tab), + 'geom': { + 'table_geom_in': 'geo_parcelle', + 'index_geom': 'geo_parcelle' + }, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'type_filiation'], + 'columns_add': { + 'par_id': ['ccodep', 'ccocom', 'ccopre','ccosec', 'dnupla'], + 'codcom': ['ccodep', 'ccocom'], + 'vl_id': ['ccodep', 'ccocom', 'ccovoi'], + 'typprop_id': None }, + 'unique': False, + 'dict': {'type_filiation': 'type'}, + 'join': False + },{ + 'name': 'lots{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dcntpa'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], + 'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'], + 'dnulot': None, }, + 'unique': False, + 'dict': {'dcntpa': 'dcntlo'}, + 'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(sufx_nom_tab), 'on': ['par_id'], 'type': 'isin', + 'select_cols' :['par_id'] }] + },] +},{ + 'table_in' : 'lots', + 'index_tab': 'lots', + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo', 'dnuprol'], + 'table_out': [{ + 'name': 'lots{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],}, + 'unique': {'cols': ['lot_id'], 'keep': 'first'}, + 'dict': None, + 'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(sufx_nom_tab), 'on': ['par_id'], 'type': 'isin', + 'select_cols' :['par_id'] }] + },{ + 'name': 'lots_natcult{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],}, + 'unique': {'cols': ['lot_id'], 'keep': 'first'}, + 'dict': None, + 'join': [{ + 'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], 'type': 'merge', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot','dsgrpf','cnatsp','dclssf','ccosub','dcntsf'], + },{ + 'bdd': 'out', 'table': 'lots{}'.format(sufx_nom_tab), 'on': ['lot_id'], 'type': 'isin', + 'select_cols' :['lot_id'] }] + },{ + 'name': 'cadastre{}'.format(sufx_nom_tab), + 'geom': None, + 'drop_escape': True, + 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnuprol'], + 'columns_add': { + 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], + 'dnupro': ['ccodep', 'ccocom', 'dnuprol'],}, + 'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'}, + 'dict': None, + 'join': [{ + 'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnuprol'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupro'], 'dict': {'dnupro': 'dnuprol'} + },{ + 'bdd': 'in', 'table': 'parcelle', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnuprol'], 'type': 'concat', + 'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], 'dict': {'dnupro': 'dnuprol'} + },{ + 'bdd': 'out', 'table': 'lots{}'.format(sufx_nom_tab), 'on': ['lot_id'], 'type': 'isin', + 'select_cols' :['lot_id'] },{ + 'bdd': 'out', 'table': 'cptprop{}'.format(sufx_nom_tab), 'on': ['dnupro'], 'type': 'isin', + 'select_cols' :['dnupro'] },] + },] +}] + + + +################################ +########## Fonctions ########## +################################ +start_time = dt.datetime.today() +def time_exec (init_time): + time = dt.datetime.today() - init_time + return str(time) + + +def replace_escape_by_0 (df): + # Remplacement des espaces dans les chaines de caractères par des 0 + # if 'ccopre' in df.columns: + # df['ccopre'].replace([None, '', ' '], '000', inplace=True) + + cols = ['ccopre', 'ccosec', 'dnupla', 'dparpi', 'dnuplam', 'dclssf', 'ccovoi'] + for col in cols: + if col in df.columns: + df[col].replace([' '], '0', regex=True, inplace=True) + + return df + + +def join_data (df, join, schema_in): + # Jointure des données avec une autre table + table = join['table'] + bdd = join['bdd'] + typ = join['type'] + on = join['on'] + if bdd == 'out': + con = engine_fon + sch = schema_fon + if bdd == 'in': + con = engine_cad + sch = schema_in + select_col = [] + if 'select_cols' in join.keys(): + select_col.extend(join['select_cols']) + if 'where' in join.keys(): + select_col.extend(join['where'].keys()) + + tmp = pd.read_sql_table( + table_name = table, + con = con, + schema = sch, + columns = select_col + ) + tmp = replace_escape_by_0(tmp) + if 'dict' in join.keys(): + tmp.rename(columns=join['dict'], inplace=True) + if 'where' in join.keys(): + where = join['where'] + for key in where.keys(): + tmp = tmp[tmp[key] == where[key] ] + + if typ in ['isin', 'notin']: + for d in [df, tmp]: + d['on'] = '' + for col in on: + d['on'] += d[col].astype(str) + if typ == 'isin': + df = df[df['on'].isin(tmp['on'])] + if typ == 'notin': + df = df[~df['on'].isin(tmp['on'])] + df.drop(columns='on', inplace=True) + if typ == 'merge': + df = df.merge(tmp, on = on, how='left') + if typ == 'concat': + df = pd.concat([df,tmp], ignore_index=True).drop_duplicates() + + return df + +def get_geom_parcelle (df,get_geo,schema): + print('INIT import geodata ........... %s sec'%( time_exec(start_time) )) + + # Définition des variables géometriques + ind_geo = get_geo['index_geom'] + tab_geo = get_geo['table_geom_in'] + + sql = """select distinct on (t2.{0}) + t2.{0}, + t1.geom, + t1.supf::integer as dcntpa -- récupération de la contenance cadastrale associée car présence de géometrie non référencées dans la table "parcelles" + FROM "{1}".{2} t1 + INNER JOIN (select distinct on ({0}) {0}, max(creat_date) creat_date, max(update_dat) update_dat FROM "{1}".{2} GROUP BY ({0})) t2 + USING ({0}, creat_date, update_dat)""".format(ind_geo, schema, tab_geo) + tmp = gpd.read_postgis( + sql = sql, + con = engine_cad, + geom_col = 'geom', + crs = crs, + chunksize = chunk, + ) + + if chunk: + gdf = gpd.GeoDataFrame(pd.concat(tmp, ignore_index=True)) + else: + gdf = tmp.copy() + del tmp + gdf.set_index(ind_geo, inplace=True) + gdf.index.name = ind_in + print('END import geodata ........... %s sec'%( time_exec(start_time) )) + + + print('INIT merge data - geodata ........... %s sec'%( time_exec(start_time) )) + if not gdf[gdf.dcntpa.isna()].empty: + gdf.dcntpa.fillna(0, inplace=True) + gdf['dcntpa'] = gdf['dcntpa'].astype(df.dtypes['dcntpa'].type) + tmp = gdf.merge(df, on = [ind_in, 'dcntpa'], how='right') + tmp = tmp.set_geometry('geom', drop=True, crs=crs) + tmp.rename(columns={'geometry': 'geom'}, inplace=True) + + if tmp[tmp.geom.isna()].empty: + lst_ind_df = tmp[tmp.geom.isna()].index.tolist() + lst_ind_gdf = gdf.loc[gdf.index.isin(lst_ind_df)].index.tolist() + tmp.loc[tmp.index.isin(lst_ind_gdf), 'geom'] = gdf.loc[gdf.index.isin(lst_ind_gdf), 'geom'] + + del [gdf, df] + gdf = tmp.copy() + del tmp + export_data(gdf) + + +def export_data(df): + print('INIT export data TO {0}, {1} ........... {2} sec'.format(tab_out, df.shape[0], time_exec(start_time) )) + rang = [e for e in range(0, df.shape[0], chunk*5)] + + cd1 = 'jdatnss' in df.columns + if cd1 : + tmp = df.jdatnss.str.split(r'/',expand=True) + if tmp.shape[1] == 3 and tmp[2].str.len().max() == 4 and tmp[1].astype(int).max() <= 12: + df.jdatnss = ['%s-%s-%s'%(i[2],i[1],i[0]) for i in df.jdatnss.str.split(r'/').sort_index()] + + for i, j in enumerate(rang): + if j == max(rang) : + jj = df.shape[0] + else: + jj = rang[i+1] + + df_imp = df[j:jj].copy() + + print('INIT export data TO {0} ..... {1}/{2} ...... {3} sec'.format(tab_out, jj, df.shape[0], time_exec(start_time) )) + if 'geom' in df.columns and not df[~df['geom'].isna()].empty : + df_imp = df_imp.set_geometry('geom', drop=True, crs=crs) + df_imp.rename(columns={'geometry': 'geom'}, inplace=True) + df_imp.to_postgis( + name = tab_out, + con = engine_fon, + schema = schema_fon, + index = False, + if_exists = 'append', + geom_col = 'geom', + chunksize = chunk, + ) + else: + df_imp.to_sql( + name = tab_out, + con = engine_fon, + schema = schema_fon, + index = False, + if_exists = 'append', + chunksize = chunk, + method = 'multi', + ) + print('END export data TO {0} ........... {1} sec'.format(tab_out, time_exec(start_time) )) + +def optimize_data_frame(df): + columns = df.columns + for col in columns: + # dtype = df[col].dtypes + # if dtype == 'int64' or dtype == 'int32': + len_col = len(df[col].unique()) + if len_col <= df.shape[0]*0.8: + df[col] = df[col].astype('category') + + return df + + +# Initiation des connexions bdd +engine_cad = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False) +engine_fon = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_fon,pwd_fon,adr_fon,port_fon,base_fon), echo=False) +con_cad = engine_cad.connect() +con_fon = engine_fon.connect() + + +################################ +########## Main ########## +################################ +if __name__ == "__main__": + ################ + # CORRECTION DUPLICATES TABLE_IN + ################ + if check_duplicates: + for DOUBLON in FIND_DOUBLON: + tab = DOUBLON['tab_in'] + on_col = DOUBLON['on_col'] + for col in on_col: + for dep in list_dep: + schema_in = dep + '_' + schema_cad + sql = ''' + -- il existe des doublons en raison d'orthographes voisines : + -- recherche de ces doublons + SELECT DISTINCT '{0}' as insee_dep, dnuper, string_agg(DISTINCT {1},' / ') as orthographes_voisines + FROM "{2}".{3} GROUP BY dnuper HAVING count(DISTINCT {1}) > 1'''.format(dep, col, schema_in, tab) + df = pd.read_sql( + sql = sql, + con = engine_cad, + ) + if df.empty: + print('No duplicate value dep {0} table {1} column {2} ====> next request'.format(dep, tab, col)) + continue + + for i, row in df.iterrows(): + dnuper = row.dnuper + choix = row.orthographes_voisines.split(' / ') + choix = [i.strip() for i in choix] + Question = input("""Des orthographes voisines existent pour l'identifiant : {0} + dans la colonne : {1}. + Les valeurs voisines sont : {2} + Ecrire la mise à jour du champs {1} à enregistrer (c cancel) :""".format(dnuper,col, choix)) + if Question.lower() == 'c' or Question.lower() == 'cancel': + continue + + update = '''UPDATE "{0}".{1} + SET {2} = '{3}' + WHERE {2} like '{4}%' + AND dnuper = '{5}';'''.format(schema_in, tab, col, Question, "%' OR {} like '".format(col).join(map(str,choix)), dnuper) + try: + con_cad.execute(text(update)) + print(''' + Update OK !''') + except Exception as exept: + print('ERROR : {0}'.format(update)) + print(exept) + sys.exit() + + + + ################ + # TRUNCATE TABLE OUT + ################ + if not debug: + for i, DICT in enumerate(DICT_TAB): + tab_in = DICT_TAB[i]['table_in'] + col_in = DICT_TAB[i]['columns_in'] + ind_in = DICT_TAB[i]['index_tab'] + tabs_out = DICT_TAB[i]['table_out'] + + for tab_out in reversed(tabs_out): + sql = "TRUNCATE TABLE {0}.{1} CASCADE".format(schema_fon, tab_out['name']) + print(sql) + con_fon.execute(sql) + + + +################ +# IMPORT IN TABLE OUT +################ + for dep in list_dep: + schema_in = dep + '_' + schema_cad + print(''' + + INIT import data FROM {} + + '''.format(schema_in)) + for i, DICT in enumerate(DICT_TAB): + tab_in = DICT_TAB[i]['table_in'] + col_in = DICT_TAB[i]['columns_in'] + ind_in = DICT_TAB[i]['index_tab'] + tabs_out = DICT_TAB[i]['table_out'] + + # Import data + print(''' + INIT import data FROM {0}........... {1} sec'''.format(tab_in, time_exec(start_time) )) + tmp = pd.read_sql_table( + table_name = tab_in, + con = engine_cad, + schema = schema_in, + columns = col_in + [ind_in], + chunksize = chunk, + ) + + # Mise en forme des données + if chunk: + DF = pd.concat(tmp, ignore_index=True) + else: + DF = tmp.copy() + + DF.drop_duplicates(inplace=True) + del tmp + DF.set_index(ind_in, inplace=True) + print('END import data ........... %s sec'%( time_exec(start_time) )) + + for tab in tabs_out: + tab_out = tab['name'] + dictio = tab['dict'] + col_df = tab['columns_in'] + col_ad = tab['columns_add'] + get_geo = tab['geom'] + drp_esc = tab['drop_escape'] + unique = tab['unique'] + join = tab['join'] + + print('INIT TABLE {0} ........... {1} sec'.format(tab_out, time_exec(start_time) )) + df = DF[DF.columns.intersection(col_df)].copy() + + + # Remplacement des espaces dans les chaines de caractères par des 0 + df = replace_escape_by_0(df) + if drp_esc: + df_obj = df.select_dtypes(['object']) + df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip()) + + if dictio : + df.rename(columns=dictio, inplace=True) + + if join : + for j in join: + if j['bdd'] == 'in' : + df = join_data(df, j, schema_in) + if df.empty: + print('df EMPTY ====> next table') + continue + + # Ajout des champs additionnels + if col_ad : + print('INIT addition columns ........... %s sec'%( time_exec(start_time) )) + for key in col_ad.keys(): + if key in df.columns: + df[key + '_tmp'] = df[key].copy() + col_ad[key] = [x if x != key else key+'_tmp' for x in col_ad[key]] + + aggreg = col_ad[key] + if aggreg : + df[key] = '' + for col in aggreg: + df[key] += df[col].fillna('') + else: + df[key] = aggreg + + print('ADD column {0} : {1} ........... {2} sec'.format(key,aggreg, time_exec(start_time) )) + + if tab_out == 'proprios%s'%sufx_nom_tab: + add = pd.DataFrame(data={'dnuper': ['%sY99999'%dep], 'ddenom': ['PROPRIETAIRES NON RENSEIGNES']}) + df = pd.concat([df,add]) + + # JOINTURE + if join : + for j in join: + if j['bdd'] == 'out' : + df = join_data(df, j, schema_in) + if df.empty: + print('df EMPTY ====> next table') + continue + + if unique: + df.drop_duplicates(unique['cols'], keep=unique['keep'], inplace=True) + + # Conservation des champs utiles à l'insertion en bdd + name_col_out = engine_fon.dialect.get_columns(engine_fon, tab_out, schema=schema_fon) + name_col_out = [ sub['name'] for sub in name_col_out ] + if 'geom' in name_col_out and 'geom' not in df.columns: + name_col_out.remove('geom') + df = df[df.columns.intersection(name_col_out)] + + #################### + # Read geodataframe + # Dans le cas où un champs géometrique est nécessaire. + if get_geo: + get_geom_parcelle(df=df, get_geo=get_geo, schema=schema_in) + else: + export_data(df) + # del df + + # Si insertion de données dans r_prop_cptprop, + # attribution d'un compte 'PROPRIETAIRES NON RENSEIGNES' au cptprop sans proprio + if tab_out == 'r_prop_cptprop%s'%sufx_nom_tab: + print('''ATTRIBUTION d'un compte 'PROPRIETAIRES NON RENSEIGNES' au cptprop sans proprio''') + df = pd.read_sql_table( + con = engine_fon, + table_name = 'cptprop%s'%sufx_nom_tab, + schema = schema_fon, + columns = ['dnupro'] ) + tmp = pd.read_sql_table( + con = engine_fon, + table_name = 'r_prop_cptprop%s'%sufx_nom_tab, + schema = schema_fon, + columns = ['dnupro'] ) + df = df[~df.dnupro.isin(tmp.dnupro)] + if df.empty: + print('TOUS les propriétaires sont renseignés ===> NEXT !') + continue + df['dnuper'] = '%sY99999'%dep + export_data(df) + del df + del DF + + + print('END transfert data FROM département {0} ........... {1} sec'.format(dep, time_exec(start_time) )) + print('END SCRIPT') diff --git a/0_FONCIER/foncier_insert_cadastre_V3.py b/0_FONCIER/foncier_insert_cadastre_V3.py new file mode 100644 index 0000000..d506c1a --- /dev/null +++ b/0_FONCIER/foncier_insert_cadastre_V3.py @@ -0,0 +1,540 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from pycen import con_fon +from sqlalchemy.sql import text +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import pandas as pd +import geopandas as gpd + +# user = 'cgeier' +# pwd = 'adm1n*bdCen' +# adr = '91.134.194.221' +# base = 'cadastre' +user = 'postgres' +pwd = 'foncier_test1' +adr = '172.17.0.2' +base = 'postgres' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con_cad = create_engine(url) +# from pycen import con_cad + + +# sql = '''SELECT * FROM "38_202207".suf WHERE parcelle = '3800740000B0705';''' +# df = pd.read_sql_query(sql,con_cad) +# df.drop_duplicates(inplace=True) +# df.annee = '2020' +# df.to_sql('suf',con_cad,"38_202207",if_exists='append',index=False) + + +# fadd = '/home/colas/Documents/tmp/FONCIER_FEDE/add_parc.gpkg' +# add = gpd.read_file(fadd) + + +def recup_cols_table(table,con,schema='38_202207'): + lst_cols = con.dialect.get_columns(con,table,schema) + return [x['name'] for x in lst_cols] + + +def __get_pkey__(engine,table_name,schema): + pk = engine.dialect.get_pk_constraint(engine,table_name=table_name,schema=schema) + return pk + + +def _where_parcelle(sql0,schema,list_parid): + + if list_parid is not None: + chunk = None + if isinstance(list_parid,str): + list_parid = [list_parid] + + LIST_ID = str(tuple(list_parid)).replace(',)',')') + + sql1 = ''' + WHERE p.parcelle IN {list_id} + ;'''.format(sch=schema,list_id=LIST_ID) + else : + chunk = 200000 + sql1 = ';' + + sql = sql0 + sql1 + df = pd.read_sql_query(text(sql),con=con_cad,chunksize=chunk) + + # if chunk is not None: + # for d in df: + # print(d.shape[0]) + # d.drop_duplicates(inplace=True) + # print(d.drop_duplicates().shape[0]) + # else : + # df.drop_duplicates(inplace=True) + + return df + + +def _get_chunk(df1,df2): + # cptp1 = pd.DataFrame() + list_DF1 = [] + for d1 in df1: + list_DF1.append(d1) + + DF1 = pd.concat(list_DF1) + + # cptp2 = pd.DataFrame() + list_DF2 = [] + for d2 in df2: + list_DF2.append(d2) + + DF2 = pd.concat(list_DF2) + + return pd.concat([DF1,DF2]).drop_duplicates() + + +def __get_parcelles__(sql0,list_parid): + + if list_parid is not None: + chunk = None + if isinstance(list_parid,str): + list_parid = [list_parid] + + LIST_ID = str(tuple(list_parid)).replace(',)',')') + + sql1 = ''' + WHERE t1.geo_parcelle IN {list_id} + ;'''.format(list_id=LIST_ID) + else : + chunk = None + sql1 = ';' + + sql = sql0 + sql1 + # print(text(sql)) + return gpd.read_postgis(sql=sql,con=con_cad,chunksize=chunk) + + +def _get_parcelles1(schema='38_202207',list_parid=None): + sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle) + t1.geo_parcelle, + case when t1.geom is null then t2.geom + else t1.geom + end geom, + substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 12) par_id, + substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3) codcom, + substring(p.parcelle from 1 for 2) ccodep, + substring(p.parcelle from 4 for 3) ccocom, + substring(p.parcelle from 7 for 3) ccopre, + substring(p.parcelle from 10 for 2) ccosec, + substring(p.parcelle from 12 for 4) dnupla, + p.annee annee_pci, + t1.update_dat, + p.dparpi, + p.dcntpa, + p.ccocomm, + p.ccoprem, + p.ccosecm, + p.dnuplam, + p.ccovoi, + p.ccoriv, + p.type_filiation "type", + substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id, + (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) gtoper, + (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm, + (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm, + (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ddenom + FROM "{sch}".{t1} p + LEFT JOIN "{sch}".parcelle_info t2 ON t2.geo_parcelle = p.parcelle + LEFT JOIN "{sch}"."geo_parcelle" t1 ON t1.geo_parcelle = p.parcelle + LEFT JOIN "{sch}".proprietaire t3 USING (ccodep,ccocom,dnupro) + '''.format( + sch=schema, + t1='parcelle') + + return __get_parcelles__(sql0,list_parid) + + +def _get_parcelles2(schema='38_202207',list_parid=None): + + sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle) + t1.geo_parcelle, + substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, + substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3) codcom, + substring(t1.geo_parcelle from 1 for 2) ccodep, + substring(t1.geo_parcelle from 4 for 3) ccocom, + substring(t1.geo_parcelle from 7 for 3) ccopre, + substring(t1.geo_parcelle from 10 for 2) ccosec, + substring(t1.geo_parcelle from 12 for 4) dnupla, + t1.annee annee_pci, + t1.update_dat, + p.dparpi, + p.dcntpa, + p.ccocomm, + p.ccoprem, + p.ccosecm, + p.dnuplam, + p.ccovoi, + p.ccoriv, + p.type_filiation "type", + t1.geom, + substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id, + (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) gtoper, + (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm, + (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm, + (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ddenom + FROM "{sch}"."{t1}" t1 + LEFT JOIN ("{sch}".parcelle_info t2 + LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal)) + USING (geo_parcelle) + LEFT JOIN "{sch}".parcelle p ON t1.geo_parcelle = p.parcelle + '''.format( + sch=schema, + t1='geo_parcelle') + + return __get_parcelles__(sql0,list_parid) + + +def _get_parcelles(schema='38_202207',list_parid=None): + + p1 = _get_parcelles1(schema,list_parid) + print('parcelles from parcelle .......... OK') + p2 = _get_parcelles2(schema,list_parid) + print('parcelles from geo_parcelle ...... OK') + + return _get_chunk(p1,p2) + + +def _get_voie2(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT + t1.ccodep||t1.ccocom||p.ccovoi vl_id, + t1.libvoi libelle + FROM "{sch}"."{t1}" t1 + JOIN "{sch}".parcelle p USING (voie) + '''.format( + sch=schema, + t1='voie') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_lots_natcult2(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT + CASE WHEN TRIM(dnulot) = '' + THEN substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||'0000000' + ELSE substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||TRIM(dnulot) + END lot_id, + parcelle, + substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12) par_id, + dsgrpf, + cnatsp, + dclssf, + ccosub, + dcntsf + FROM "{sch}"."{t1}" + JOIN "{sch}".parcelle p USING(parcelle) + '''.format( + sch=schema, + t1='suf') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_lots2(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT DISTINCT + CASE WHEN TRIM(t.dnulot) = '' + THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000' + ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot) + END lot_id, + t.parcelle, + substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12) par_id, + CASE WHEN TRIM(t.dnulot) = '' OR TRIM(t.dnulot) IS NULL + THEN TRIM(l.dnulot) + ELSE TRIM(t.dnulot) + END dnulot, + CASE WHEN l.dcntlo IS NULL + THEN 0 + ELSE l.dcntlo + END dcntlo, + geo_sub.geom + FROM "{sch}"."{t1}" t + JOIN "{sch}".parcelle p USING (parcelle) + LEFT JOIN "{sch}".lots l USING (parcelle) + LEFT JOIN "{sch}".lotslocaux ll USING (lots) + LEFT JOIN "{sch}".geo_subdfisc_parcelle geo_sub_p ON p.parcelle = geo_sub_p.geo_parcelle + LEFT JOIN "{sch}".geo_subdfisc geo_sub USING (geo_subdfisc) + '''.format( + sch=schema, + t1='suf') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_cadastre2(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT DISTINCT + CASE WHEN TRIM(t.dnulot) = '' + THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000' + ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot) + END lot_id, + substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro + FROM "{sch}"."{t1}" t + JOIN "{sch}".parcelle p + LEFT JOIN "{sch}".lots l USING (parcelle) + USING (parcelle) + '''.format( + sch=schema, + t1='suf') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_cptprop1(schema='38_202207',list_parid=None): + sql0 = ''' + WITH lot as ( + SELECT ccodep,ccocom,dnuprol dnupro + FROM "{sch}".lots + ) + SELECT DISTINCT + t.ccodep||t.ccocom||t.dnupro dnupro, + t."annee" annee_matrice + FROM "{sch}"."{t1}" t + --JOIN "{sch}".suf s USING (ccodep,ccocom,dnupro) + --JOIN lot l USING (ccodep,ccocom,dnupro) + JOIN "{sch}".parcelle p USING (ccodep,ccocom,dnupro) + '''.format( + sch=schema, + t1='proprietaire') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_cptprop2(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT DISTINCT + substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro, + t.annee annee_matrice + FROM "{sch}"."{t1}" t + JOIN "{sch}".parcelle p + LEFT JOIN "{sch}".lots l USING (parcelle) + USING (parcelle) + '''.format( + sch=schema, + t1='suf') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_cptprop(schema='38_202207',list_parid=None): + cptprop1 = _get_cptprop1(schema=schema, list_parid=list_parid) + print('cptprop from proprietaire ... OK') + cptprop2 = _get_cptprop2(schema=schema, list_parid=list_parid) + print('cptprop from suf ............ OK') + + return _get_chunk(cptprop1,cptprop2) + + +def _get_r_prop_cptprop1(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT DISTINCT + substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3)||t.dnupro dnupro, + substring(p.parcelle from 1 for 2)||t.dnuper dnuper, + ccodro, + ccodem + FROM "{sch}"."{t1}" t + JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro) + '''.format( + sch=schema, + t1='proprietaire') + + return _where_parcelle(sql0,schema,list_parid) + + +def _get_proprios1(schema='38_202207',list_parid=None): + sql0 = ''' + SELECT DISTINCT + t.ccodep||t.dnuper dnuper, + TRIM(ccoqua)::int ccoqua, + TRIM(ddenom) ddenom, + TRIM(dqualp) dqualp, + TRIM(dnomlp) dnomlp, + TRIM(dnomus) dnomus, + TRIM(dprnus) dprnus, + TRIM(dprnlp) dprnlp, + TRIM(epxnee) epxnee, + TRIM(dnomcp) dnomcp, + TRIM(dprncp) dprncp, + TRIM(jdatnss) jdatnss, + TRIM(dldnss) dldnss, + TRIM(dlign3) dlign3, + TRIM(dlign4) dlign4, + TRIM(dlign5) dlign5, + TRIM(dlign6) dlign6, + TRIM(gtoper)::int gtoper, + TRIM(ccogrm)::int ccogrm, + TRIM(dnatpr) dnatpr, + TRIM(dsglpm) dsglpm, + t."annee" annee_matrice + FROM "{sch}"."{t1}" t + JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro) + '''.format( + sch=schema, + t1='proprietaire') + + return _where_parcelle(sql0,schema,list_parid) + + +def _to_cadaste_table(df,con,pkey,table,schema): + + # Si la clé-primaire de la table est > 1 colonne + if len(pkey) > 1 : + lstid = str( + tuple( + df[pkey]\ + .drop_duplicates()\ + .itertuples(index=False, name=None) + ) + ).replace(',)',')') + pk = str(tuple(pkey)).replace("'",'"') + # Si la clé-primaire de la table est 1 colonne + else: + pk = pkey[0] + lstid = str(tuple(df[pk].drop_duplicates()))\ + .replace(',)',')') + + # Récupération des données déjà en BDD + sql = ''' + SELECT * FROM {sch}.{tab} + WHERE {id} IN {lst} + ;'''.format( + sch = schema, + tab = table, + id = pk, + lst = lstid + ) + indb = pd.read_sql_query(sql,con,index_col=pkey) + + # Exclusion des données déjà en base + df.set_index(pkey,inplace=True) + todb = df[ + ~df.index.isin(indb.index) + ].copy() + df.reset_index(drop=False, inplace=True) + todb.reset_index(drop=False, inplace=True) + indb.reset_index(drop=False, inplace=True) + + # Envoie du tableau dans la bdd si non vide + if not todb.empty: + # Adaptation des types des champs si nécessaire + dtyp = {} + if 'geom' in todb.columns: + from geoalchemy2 import Geometry + todb = todb.to_wkt() + dtyp = {'geom':Geometry(geometry_type='MULTIPOLYGON',srid=2154)} + + cols = todb.columns[todb.columns.isin(indb.columns)] + todb[cols]\ + .to_sql( + table, + con, + schema, + if_exists='append', + index=False, + dtype = dtyp + ) + print('INSERT %s news data ! OK'%todb.shape[0]) + else : + print('NO news data to insert !') + + +def to_vl(df,con): + table = 'vl' + schema = 'cadastre' + pkey = __get_pkey__(con,table,schema)['constrained_columns'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_parcelles(df,con): + table = 'parcelles' + schema = 'cadastre' + pkey = __get_pkey__(con,table,schema)['constrained_columns'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_lots(df,con): + table = 'lots' + schema = 'cadastre' + pkey = __get_pkey__(con,table,schema)['constrained_columns'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_lots_natcult(df,con): + table = 'lots_natcult' + schema = 'cadastre' + # pkey = __get_pkey__(con,table,schema)['constrained_columns'] + pkey = ['lot_id'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_cadastre(df,con): + table = 'cadastre' + schema = 'cadastre' + # pkey = __get_pkey__(con,table,schema)['constrained_columns'] + pkey = ['lot_id','dnupro'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_r_prop_cptprop(df,con): + table = 'r_prop_cptprop' + schema = 'cadastre' + pkey = __get_pkey__(con,table,schema)['constrained_columns'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_proprios(df,con): + table = 'proprios' + schema = 'cadastre' + pkey = __get_pkey__(con,table,schema)['constrained_columns'] + _to_cadaste_table(df,con,pkey,table,schema) + + +def to_cptprop(df,con): + table = 'cptprop' + schema = 'cadastre' + pkey = __get_pkey__(con,table,schema)['constrained_columns'] + _to_cadaste_table(df,con,pkey,table,schema) + + + + + +if __name__ == "__main__": + + from pycen import update_to_sql + # par = '3805050000E0523' + par = None + sch = '38_202007' + # cptprop1 = _get_cptprop1(schema=sch, list_parid=par) + # cptprop2 = _get_cptprop2(schema=sch, list_parid=par) + cptprop = _get_cptprop(schema=sch, list_parid=par) + proprios = _get_proprios1(schema=sch, list_parid=par) + r_prop_cptprop = _get_r_prop_cptprop1(schema=sch, list_parid=par) + voie = _get_voie2(schema=sch, list_parid=par) + # parcelles = _get_parcelles2(schema=sch, list_parid=par) + parcelles = _get_parcelles(schema=sch, list_parid=par) + lots = _get_lots2(schema=sch, list_parid=par) + lots_natcult = _get_lots_natcult2(schema=sch, list_parid=par) + cadastre = _get_cadastre2(schema=sch, list_parid=par) + + to_cptprop(cptprop,con_fon) + to_proprios(proprios,con_fon) + to_r_prop_cptprop(r_prop_cptprop,con_fon) + to_vl(voie,con_fon) + to_parcelles(parcelles,con_fon) + to_lots(lots,con_fon) + to_lots_natcult(lots_natcult,con_fon) + to_cadastre(cadastre,con_fon) \ No newline at end of file diff --git a/0_FONCIER/foncier_insert_site.py b/0_FONCIER/foncier_insert_site.py new file mode 100755 index 0000000..1da7b83 --- /dev/null +++ b/0_FONCIER/foncier_insert_site.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : foncier_insert_table.py +#Description : Insertion des données cadastrales à la base après de sa création. +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + + +import pandas as pd +import geopandas as gpd +from sqlalchemy import create_engine +from geoalchemy2 import Geometry, shape +from shapely import wkb +from shapely.geometry.multipolygon import MultiPolygon +from shapely.ops import unary_union +import datetime as dt +import sys +import gc + + +# Parametres bdd cen_38 #bart (in) +# user_in = 'cen_admin' +# pwd_in = '#CEN38@venir' +# adr_in = '192.168.0.3' +# port_in = '5432' +# base_in = 'bd_cen38' + + +from pycen import con_bdcen +schema_in = 'sites' +table_in = 'c_sites_zonages' + + + +# Parametres bdd FONCIER NEW (out) +# user_out = 'postgres' +# pwd_out = 'tutu' +# adr_out = '192.168.60.9' +# port_out = '5432' +# base_out = 'bd_cen' +from pycen import con_fon +schema_out = 'sites' + + + +# Parametres généraux +crs = 'EPSG:2154' +chunk = None + + + +# Définition des fonctions +start_time = dt.datetime.today() +def time_exec (init_time): + time = dt.datetime.today() - init_time + return str(time) + + +def drop_exist(df, con, table, schema, chunk=None): + exist = pd.read_sql_table( + table_name = table, + con = con, + schema = schema, + chunksize = chunk, ) + if chunk: + exist = pd.concat(exist, ignore_index=True) + + return df + + +def give_z(x): + if x.type == 'Polygon': + x = [x] + zlist = [] + for polygon in x: + zlist.extend([c[-1] for c in polygon.exterior.coords[:-1]]) + for inner_ring in polygon.interiors: + zlist.extend([c[-1] for c in inner_ring.coords[:-1]]) + return zlist + #return sum(zlist)/len(zlist) #In your case to get mean. Or just return zlist[0] if they are all the same + +def get_data(table,schema,engine=con_bdcen,chunk=chunk): + tmp = pd.read_sql_table( + table_name = table, + # sql = sql, + con = engine, + # geom_col = 'geom', + schema = schema, + # crs = crs, + chunksize = chunk, ) + + # Mise en forme des données + # start_time = dt.datetime.today() + if chunk: + df = pd.concat(tmp, ignore_index=True) + else: + df = tmp.copy() + if 'geom' in df.columns and not df[~df['geom'].isna()].empty: + df = gpd.GeoDataFrame(df) + df['geom'] = df.geom.apply(lambda x: shape.to_shape(x)) + df.set_geometry('geom', inplace=True, crs=crs) + return df + + +# Initiation des connexions +# con_bdcen = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_in,pwd_in,adr_in,port_in,base_in), echo=False) +# con_in = engine_in.connect() +# engine_out = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_out,pwd_out,adr_out,port_out,base_out), echo=False) +# con_fon = engine_out.connect() + + + +######################## +###### MAIN ###### +######################## +# get table milieu +tmi = get_data(table = 'd_milieux', schema='sites', engine=con_fon) +# Import data +print('INIT import data ........... %s sec'%( time_exec(start_time) )) +sql = "SELECT * FROM {0}.{1}".format(schema_in, table_in) +df = get_data(table = table_in, schema=schema_in, engine=con_bdcen) +df.loc[(df.code_site=='RNIP')&(df.type_zonage=='ZO'),'nom_site'] = df.loc[(df.code_site=='RNIP')&(df.type_zonage=='ZI'),'nom_site'].values[0] +df = df[df.date_fin.isna()] +df.sort_values('code_site',inplace=True) + +# Table sites +tab_out = 'sites' +name_col_out = con_fon.dialect.get_columns(con_fon, tab_out, schema=schema_out) +name_col_out = [ sub['name'] for sub in name_col_out ] +dictio = { + 'code_site': 'site_id', + 'nom_site' : 'site_nom', + # 'id_mnhn' : 'mnhn_id', + # 'idfcen' : 'fcen_id', + 'milieux': 'milieu_id', + 'date_ajout': 'annee_creation', + 'surface_ha': 'surf_m2', +} +df_sites = gpd.GeoDataFrame(df[dictio.keys()], geometry=df.geom, crs=crs) +df_sites.rename(columns= dictio, inplace=True) +df_sites.rename(columns= {'geometry': 'geom'}, inplace=True) +df_sites.sort_values(['site_id','surf_m2'],inplace=True) +df_sites.drop_duplicates(subset=['site_id','site_nom','surf_m2'],inplace=True) +df_sites.loc[df_sites.site_nom.duplicated(keep='first'),'site_id'] = df_sites.loc[df_sites.site_nom.duplicated(keep='first'),'site_id'] + '_ZO' +df_sites.loc[df_sites.site_nom.duplicated(keep='last'),'site_id'] = df_sites.loc[df_sites.site_nom.duplicated(keep='last'),'site_id'] + '_ZI' +df_sites.loc[df_sites.site_id.str.endswith('ZO'),'site_nom'] = df_sites.loc[df_sites.site_id.str.endswith('ZO'),'site_nom'] + ' - ZO' +df_sites.loc[df_sites.site_id.str.endswith('ZI'),'site_nom'] = df_sites.loc[df_sites.site_id.str.endswith('ZI'),'site_nom'] + ' - ZI' + + +# Correspondance site / type_site +site_gere = '|'.join(df[df.gestion].code_site.unique().tolist()) +site_asst = '|'.join(df[df.assist_cnv|df.assist_col].code_site.unique().tolist()) +df_sites['typsite_id'] = '0' +df_sites.loc[df_sites.site_id.str.contains(site_asst), 'typsite_id'] = '2-1' +df_sites.loc[df_sites.site_id.str.contains(site_gere), 'typsite_id'] = '1-1' + + +# Correspondance site / milieu_lib_simpl +df_sites.loc[df_sites['milieu_id'] == 'Tourbières et marais', 'milieu_id'] = 'Milieux humides' +df_sites.loc[df_sites['milieu_id'] == 'Gîte à chiroptères', 'milieu_id'] = 'Gîtes à chiroptères' +df_sites.loc[df_sites['milieu_id'] == 'Ecosystèmes alluviaux', 'milieu_id'] = 'Milieux alluviaux' +df_sites.loc[df_sites['milieu_id'] == 'Ecosystèmes aquatiques', 'milieu_id'] = 'Milieux aquatiques' +df_sites.loc[df_sites['milieu_id'] == 'Pelouses sèches', 'milieu_id'] = 'Pelouses sèches' +df_sites.loc[df_sites['milieu_id'].isna(), 'milieu_id'] = 'N.P.' + +for m in df_sites['milieu_id'].unique(): + df_sites.loc[df_sites['milieu_id'] == m, 'milieu_id'] = tmi[ + (tmi['milieu_lib_simpl'] == m) | + (tmi['milieu_lib'] == m ) + ]['milieu_id'].values[0] + +# Complétion des champs pour test... +# df_sites['annee_creation'] = '2021' +df_sites.loc[~df_sites.annee_creation.isna(),'annee_creation'] = df_sites.loc[ + ~df_sites.annee_creation.isna() + ,'annee_creation'].astype(str).str[:4] +df_sites['annee_creation'].fillna(9999,inplace=True) + +if all(df_sites['geom'].has_z): + # Suppression de la dimension Z + geom_type = df_sites['geom'].geom_type + df_sites['geom'] = [wkb.loads(wkb.dumps(geom, output_dimension=2)) for geom in df_sites['geom']] + df_sites.set_geometry('geom', drop=False, inplace=True, crs=crs) + # df_sites.rename(columns= {'geometry': 'geom'}, inplace=True) + +df_sites.to_postgis( + name = tab_out, + con = con_fon, + schema = schema_out, + index = False, + if_exists = 'append', + chunksize = chunk, + geom_col = 'geom', + ) + + + +# Table d_typsite + +# Table d_milieux +# print('INIT construction data for d_milieux table ...... %s sec'%( time_exec(start_time) )) +# df = pd.DataFrame(DF['tmilieux']) +# df['milieu_lib_simpl'] = None +# df['milieu_descrip'] = None +# df.drop_duplicates(inplace=True) +# df.dropna(inplace=True, how='all',axis=0) +# df = drop_exist(df, con=con_fon, table='d_milieux', schema=schema_out) + + +# sql = "UPDATE {0}.{1} SET tmilieux = 'Ecosystèmes alluviaux' WHERE tmilieux = 'Ecosystemes alluviaux'".format(schema_in, table_in) +# print(sql) +# con_in.execute(sql) + +# for index, d in df.iterrows(): +# tmp = df[df.id != d.id].copy() +# tmp['intersect'] = tmp.geometry.intersects(d.geom) +# if not tmp[tmp.intersect].empty: +# print(tmp[tmp.intersect]) +# else : +# print('No insterction for {}'.format(d.id)) \ No newline at end of file diff --git a/0_FONCIER/get_infos_parcelle.py b/0_FONCIER/get_infos_parcelle.py new file mode 100644 index 0000000..7eb89e2 --- /dev/null +++ b/0_FONCIER/get_infos_parcelle.py @@ -0,0 +1,18 @@ +import pycen +import geopandas as gpd + +par_id = '38412000ZD0136' +PATH_OUT = '/home/colas/Documents/tmp/'+par_id + +sql = "SELECT par_id, geom FROM cadastre.parcelles_38 WHERE par_id = '%s'"%par_id +parc = gpd.read_postgis(sql,pycen.con_fon) +parc.to_crs(4326).to_file(PATH_OUT+'.geojson', driver='GeoJSON') +parc.to_crs(4326).to_file(PATH_OUT+'.gpkg', driver='GPKG') + +sql = "SELECT * FROM zones_humides.v_zoneshumides WHERE ST_INTERSECTS(geom,'SRID=2154;%s')"%parc.geometry[0] +zh =gpd.read_postgis(sql,pycen.con) + + +sql = "SELECT * FROM saisie.vm_synthese_observations WHERE ST_INTERSECTS(geom,'SRID=2154;%s')"%parc.geometry[0] +obs_sicen =gpd.read_postgis(sql,pycen.con_sicen) + diff --git a/0_FONCIER/grant_table.py b/0_FONCIER/grant_table.py new file mode 100644 index 0000000..9acac6a --- /dev/null +++ b/0_FONCIER/grant_table.py @@ -0,0 +1,59 @@ +from pycen import con_bdcen,con_fon as con +import geopandas as gpd +from sqlalchemy import create_engine, text + + +user_cad = 'cgeier' # utilisateur de connexion à la bdd +pwd_cad = 'adm1n*bdCen' # mot de passe de la bdd +adr_cad = '91.134.194.221' # adresse ip de la bdd +port_cad = '5432' # port de la bdd +base_cad = 'cadastre' # nom de la bdd +schema_cad = '07_202307' +con_cad = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False) + + +def get_list_table(con,sch): + return con.dialect.get_table_names(con,schema=sch) + +def get_list_view(con,sch): + return con.dialect.get_view_names(con,schema=sch) + +def get_table_pkey(con,sch,tab): + return con.dialect.get_foreign_keys(con,tab,sch)['constrained_columns'] + +def grant_all_table(con,sch): + # sch = 'cadastre' + sql = 'GRANT USAGE ON SCHEMA "%s" TO grp_consult;'%sch + with con.begin() as cnx: + cnx.execute(sql) + print('GRANT USAGE TO grp_consult FOR SCHEMA "%s"'%(sch)) + for tab in get_list_table(con,sch): + sql = 'GRANT SELECT ON TABLE "%s".%s TO grp_consult;'%(sch,tab) + with con.begin() as cnx: + cnx.execute(sql) + print('GRANT SELECT TO grp_consult FOR "%s".%s'%(sch,tab)) + for tab in get_list_view(con,sch): + sql = 'GRANT SELECT ON TABLE "%s".%s TO grp_consult;'%(sch,tab) + with con.begin() as cnx: + cnx.execute(sql) + print('GRANT SELECT TO grp_consult FOR "%s".%s'%(sch,tab)) + + + +def revoke_all_table(con,sch): + for tab in get_list_table(con,sch): + sql = 'REVOKE SELECT ON "%s".%s FROM grp_consult;'%(sch,tab) + with con.begin() as cnx: + cnx.execute(sql) + print('REVOKE SELECT grp_consult FOR "%s".%s'%(sch,tab)) + + for tab in get_list_view(con,sch): + sql = 'REVOKE SELECT ON "%s".%s TO grp_consult;'%(sch,tab) + with con.begin() as cnx: + cnx.execute(sql) + print('REVOKE SELECT TO grp_consult FOR "%s".%s'%(sch,tab)) + + +if __name__ == "__main__": + grant_all_table(con_cad,schema_cad) + # revoke_all_table(con,base_cad) \ No newline at end of file diff --git a/0_FONCIER/remove_parcelles_site.py b/0_FONCIER/remove_parcelles_site.py new file mode 100644 index 0000000..5295cf2 --- /dev/null +++ b/0_FONCIER/remove_parcelles_site.py @@ -0,0 +1,39 @@ +from pycen import con_fon +import geopandas as gpd +import sys + + +site_id = 'CRAS' +lst_parcelle = None + +site = gpd.read_postgis( + "SELECT * FROM sites.v_sites WHERE site_id = '%s'"%site_id, + con_fon) +parc = gpd.read_postgis( + "SELECT * FROM sites.v_sites_parcelles WHERE site_id = '%s'"%site_id, + con_fon) + +# lst_parcelle = parc[~parc.intersects(site.unary_union)].par_id.unique().tolist() +lst_parcelle = parc.par_id.unique().tolist() + +for par_id in lst_parcelle: + try: + with con_fon.begin() as cnx: + cnx.execute("SELECT cadastre.suppr_parcelles_cen('{par_id}','{site_id}')"\ + .format( + par_id=par_id,site_id=site_id + ) + ) + except Exception as e: + print(e) + print('\n') + print('La parcelle %s ne peut pas être supprimé'%par_id) + + +with con_fon.begin() as cnx: + r = cnx.execute("SELECT admin_sig.refresh_mview_foncier('{site_id}')"\ + .format( + site_id=site_id + ) + ) + res = r.fetchall() diff --git a/11_CALC_RHOMEO/insert_bdtopo3.py b/11_CALC_RHOMEO/insert_bdtopo3.py new file mode 100644 index 0000000..b292745 --- /dev/null +++ b/11_CALC_RHOMEO/insert_bdtopo3.py @@ -0,0 +1,172 @@ +from sqlalchemy import create_engine, text,PrimaryKeyConstraint +from geoalchemy2 import Geometry +import geopandas as gpd +from os import listdir,path + +user_cad = 'cgeier' # utilisateur de connexion à la bdd +pwd_cad = 'adm1n*bdCen' # mot de passe de la bdd +adr_cad = '91.134.194.221' # adresse ip de la bdd +port_cad = '5432' # port de la bdd +base_cad = 'rhomeo' # nom de la bdd +schema_cad = 'bdtopo3' +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False) + + +def get_list_table(con,sch="public"): + with con.begin() as cnx: + return cnx.dialect.get_table_names(cnx,schema=sch) + +def get_columns(con,sch,tab): + with con.begin() as cnx: + return cnx.dialect.get_columns(cnx,tab,sch) + +def get_table_pkey(con,sch,tab): + with con.begin() as cnx: + return cnx.dialect.get_foreign_keys(cnx,tab,sch) + + +# for tab in ['spatial_ref_sys',#'info_metadonnees','metadonnees_lot','metadonnees_theme', +# 'layer_styles']: +# sql = """ALTER TABLE bdtopo3.%s +# SET SCHEMA public;"""%tab +# with con.begin() as cnx: +# cnx.execute(sql) +# print('Table %s SET TO SCHEMA bdtopo3'%(tab)) + +lst_zip = [ + '/vsizip//home/colas/Craig/CEN_38/BD Vectorielles/IGN - BDTOPO - 202309/bdtopo_dep07_202309_geopackage.zip/BDT_3-3_GPKG_LAMB93_D007-ED2023-09-15/BDT_3-3_GPKG_LAMB93_D007-ED2023-09-15.gpkg', + '/vsizip//home/colas/Craig/CEN_38/BD Vectorielles/IGN - BDTOPO - 202309/bdtopo_dep26_202309_geopackage.zip/BDT_3-3_GPKG_LAMB93_D026-ED2023-09-15/BDT_3-3_GPKG_LAMB93_D026-ED2023-09-15.gpkg', + '/vsizip//home/colas/Craig/CEN_38/BD Vectorielles/IGN - BDTOPO - 202309/bdtopo_dep42_202309_geopackage.zip/BDT_3-3_GPKG_LAMB93_D042-ED2023-09-15/BDT_3-3_GPKG_LAMB93_D042-ED2023-09-15.gpkg' +] + +lst_tab = [ + 'cimetiere', # BATI + 'construction_surfacique', # BATI + 'reservoir', + 'batiment', + 'troncon_de_route', + 'piste_d_aerodrome', + 'troncon_de_voie_ferree', + 'terrain_de_sport', +] +# cimetiere +# construction_surfacique +# reservoir +# batiment +# terrain_de_sport + + +dict_cols = { + 'id':'cleabs' +} + + +PATH = "/home/colas/Documents/5_BDD/6_IGN/BDTOPO/BDTOPO_3-3_TOUSTHEMES_SHP_LAMB93_D074_2023-09-15/BDTOPO/1_DONNEES_LIVRAISON_2023-09-00196/BDT_3-3_SHP_LAMB93_D074-ED2023-09-15/BATI" +PATH2 = '../TRANSPORT' +for tab in lst_tab: + file = tab.upper()+'.shp' + if file in listdir(path.join(PATH)): + df = gpd.read_file(path.join(PATH,file)) + df.columns = df.columns.str.lower() + df.set_index('id',inplace=True) + # df.rename(columns=dict_cols,inplace=True) + print('Load %s'%tab) + else: continue + + # columns_type = get_columns(con,schema_cad,tab) + # geom_type = [x for x in columns_type if x['name']=='geometrie'] + # print('Ident geom_type OK') + + # if geom_type: + # geom_type = geom_type[0] + + # if df.geometry.name != geom_type['name']: + # df.rename_geometry(geom_type['name'],inplace=True) + if df.geometry.name != 'geom': + df.rename_geometry('geom',inplace=True) + + has_z = True if df.has_z.all() else False + unique_geom = df.geom_type.unique() + geom_type = df.geom_type[0].upper() if len(unique_geom) == 1 else 'GEOMETRY' + geom_type = geom_type+'Z'if has_z else geom_type + + (df + .to_wkt() + .to_sql( + tab, + con, + schema_cad, + if_exists='append', + index=True, + dtype={ + 'geom':Geometry(geometry_type=geom_type,srid=2154) + } + ) + ) + print(tab) + + +# get_table_pkey(con,schema_cad,tab) +# get_columns(con,schema_cad,tab) + +## 38 +# Load troncon_de_voie_ferree +# Ident geom_type OK +# troncon_de_voie_ferree +# Load terrain_de_sport +# Ident geom_type OK +# terrain_de_sport +## 26 +# Load cimetiere +# Ident geom_type OK +# cimetiere +# Load construction_surfacique +# Ident geom_type OK +# construction_surfacique +# Load reservoir +# Ident geom_type OK +# reservoir +# Load batiment +# Ident geom_type OK +# batiment + +## NO INSERT +# Load troncon_de_route +# Ident geom_type OK + + +for Zip in lst_zip: + print('\n'+Zip+'\n') + for tab in get_list_table(con,schema_cad): + if tab not in lst_tab: + continue + if Zip in lst_zip[:2] or ( + Zip==lst_zip[2] and tab in [ + 'cimetiere', + 'construction_surfacique', + 'reservoir', + 'batiment', + ]): + continue + df = gpd.read_file(Zip,layer=tab) + print('Load %s'%tab) + + columns_type = get_columns(con,schema_cad,tab) + geom_type = [x for x in columns_type if x['name']=='geometrie'] + print('Ident geom_type OK') + + if geom_type: + geom_type = geom_type[0] + + if df.geometry.name != geom_type['name']: + df.rename_geometry(geom_type['name'],inplace=True) + + df.to_wkt().to_sql( + tab, + con, + schema_cad, + if_exists='append', + index=False, + dtype={geom_type['name']:geom_type['type']} if geom_type else None + ) + print(tab) \ No newline at end of file diff --git a/11_CALC_RHOMEO/insert_bvmdo.py b/11_CALC_RHOMEO/insert_bvmdo.py new file mode 100644 index 0000000..724b73f --- /dev/null +++ b/11_CALC_RHOMEO/insert_bvmdo.py @@ -0,0 +1,40 @@ +from geoalchemy2 import Geometry +import geopandas as gpd +from sqlalchemy import create_engine + +user_cad = 'cgeier' # utilisateur de connexion à la bdd +pwd_cad = 'adm1n*bdCen' # mot de passe de la bdd +adr_cad = '91.134.194.221' # adresse ip de la bdd +port_cad = '5432' # port de la bdd +base_cad = 'rhomeo' # nom de la bdd +schema_cad = 'refgeo' +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False) + + +file = '/home/colas/Documents/5_BDD/BASSIN_VERSANT/AGENCE EAU/bvmdo/bvmdo.shp' +df = gpd.read_file(file) +df.rename_geometry('geom', inplace=True) + +has_z = True if df.has_z.all() else False +unique_geom = df.geom_type.unique() +geom_type = df.geom_type[0].upper() if len(unique_geom) == 1 else 'GEOMETRY' +geom_type = geom_type+'Z'if has_z else geom_type + +(df + .to_wkt() + .to_sql( + 'zh', + con, + schema_cad, + if_exists='append', + index=False, + dtype={ + 'geom':Geometry(geometry_type=geom_type,srid=2154) + } + ) +) + + +sql = 'SELECT * FROM tachenew.tache_urbaine' +sql = 'SELECT * FROM arvfnew.arvf_global_buffer' +df = gpd.read_postgis(sql,con) diff --git a/11_CALC_RHOMEO/run_indicRhomeo.py b/11_CALC_RHOMEO/run_indicRhomeo.py new file mode 100644 index 0000000..e84b7cd --- /dev/null +++ b/11_CALC_RHOMEO/run_indicRhomeo.py @@ -0,0 +1,366 @@ +from pycen import con_rho + +def indicI12_zh(con): + sql = """ + DROP TABLE IF EXISTS indicnew.indic_sig_zh; + CREATE TABLE IF NOT EXISTS indicnew.indic_sig_zh AS ( + SELECT DISTINCT + zh.id_bdd, + st_area2d(zh.geom)::double precision AS area + FROM refgeo.zh + ); + ALTER TABLE indicnew.indic_sig_zh + ADD CONSTRAINT indicnew_sig_zh_pkey PRIMARY KEY (id_bdd); + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS isole double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS diffus double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS groupe double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS peudense double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS dense double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS tresdense double precision; + + --DROP TABLE IF EXISTS indicnew.indic_sig_zh_temp; + CREATE TABLE IF NOT EXISTS indicnew.indic_sig_zh_temp AS ( + SELECT + zh.id_bdd, + tache_urbaine.reclasse AS reclasse, + Sum(st_area2d(st_intersection(tache_urbaine.geom, zh.geom)))::double precision AS reclasse_surf + FROM tachenew.tache_urbaine + JOIN refgeo.zh ON (st_intersects(tache_urbaine.geom, zh.geom)) + GROUP BY zh.id_bdd, tache_urbaine.reclasse, zh.geom + ); + UPDATE indicnew.indic_sig_zh SET isole = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '1- isolé'); + UPDATE indicnew.indic_sig_zh SET diffus = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '2- diffus'); + UPDATE indicnew.indic_sig_zh SET groupe = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '3- groupé'); + UPDATE indicnew.indic_sig_zh SET peudense = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '4- urbain peu dense'); + UPDATE indicnew.indic_sig_zh SET dense = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '5- urbain dense'); + UPDATE indicnew.indic_sig_zh SET tresdense = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '6- urbain très dense'); + DROP TABLE IF EXISTS indicnew.indic_sig_zh_temp; + + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS bati_area double precision; + WITH t2 AS ( + SELECT + t1.id_bdd, + t1.surf + FROM refgeo.zh + JOIN ( + SELECT + zh.id_bdd, + Sum(st_area2d(st_intersection(zh.geom, tache_urbaine.geom))) AS surf + FROM refgeo.zh + JOIN tachenew.tache_urbaine ON (st_intersects(zh.geom, tache_urbaine.geom)) + GROUP BY id_bdd + ) AS t1 USING (id_bdd) + ) + UPDATE indicnew.indic_sig_zh SET bati_area = t2.surf FROM t2 WHERE indic_sig_zh.id_bdd = t2.id_bdd; + + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS arvf_area double precision; + WITH t2 AS ( + SELECT + t1.id_bdd, + t1.surf + FROM refgeo.zh + JOIN ( + SELECT + zh.id_bdd, + Sum(st_area2d(st_intersection(zh.geom, arvf_global_buffer.geom))) AS surf + FROM refgeo.zh + JOIN arvfnew.arvf_global_buffer ON (st_intersects(zh.geom, arvf_global_buffer.geom)) + GROUP BY id_bdd + ) AS t1 USING (id_bdd) + ) + UPDATE indicnew.indic_sig_zh SET arvf_area = t2.surf FROM t2 WHERE indic_sig_zh.id_bdd = t2.id_bdd; + + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS artif_area double precision; + WITH t2 AS ( + SELECT + t1.id_bdd, + t1.surf + FROM refgeo.zh + JOIN ( + SELECT + zh.id_bdd, + Sum(st_area2d(st_intersection(zh.geom, tache_artif.geom))) AS surf + FROM refgeo.zh + JOIN tachenew.tache_artif ON (st_intersects(zh.geom, tache_artif.geom)) + GROUP BY id_bdd + ) AS t1 USING (id_bdd) + ) + UPDATE indicnew.indic_sig_zh SET artif_area = t2.surf FROM t2 WHERE indic_sig_zh.id_bdd = t2.id_bdd; + """ + with con.begin() as cnx: + cnx.execute(sql) + print('indicI12_zh created') + + +def indicI12_zh_buff(con): + sql = ''' + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS area_buff double precision; + UPDATE indicnew.indic_sig_zh SET area_buff = st_area2d(zh.geom_buff) FROM refgeo.zh WHERE indic_sig_zh.id_bdd = zh.id_bdd; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS isole_buff double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS diffus_buff double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS groupe_buff double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS peudense_buff double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS dense_buff double precision; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS tresdense_buff double precision; + + --DROP TABLE IF EXISTS indicnew.indic_sig_zh_temp; + CREATE TABLE IF NOT EXISTS indicnew.indic_sig_zh_temp AS ( + SELECT + zh.id_bdd, + tache_urbaine.reclasse AS reclasse, + Sum(st_area2d(st_Intersection(tache_urbaine.geom, zh.geom_buff)))::double precision AS reclasse_surf + FROM tachenew.tache_urbaine + JOIN refgeo.zh ON (st_intersects(tache_urbaine.geom, zh.geom_buff)) + GROUP BY zh.id_bdd, tache_urbaine.reclasse, zh.geom_buff + ); + UPDATE indicnew.indic_sig_zh SET isole_buff = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '1- isolé'); + UPDATE indicnew.indic_sig_zh SET diffus_buff = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '2- diffus'); + UPDATE indicnew.indic_sig_zh SET groupe_buff = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '3- groupé'); + UPDATE indicnew.indic_sig_zh SET peudense_buff = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '4- urbain peu dense'); + UPDATE indicnew.indic_sig_zh SET dense_buff = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '5- urbain dense'); + UPDATE indicnew.indic_sig_zh SET tresdense_buff = (SELECT indic_sig_zh_temp.reclasse_surf FROM indicnew.indic_sig_zh_temp WHERE indic_sig_zh.id_bdd = indic_sig_zh_temp.id_bdd AND indic_sig_zh_temp.reclasse = '6- urbain très dense'); + DROP TABLE IF EXISTS indicnew.indic_sig_zh_temp; + + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS bati_area_buff double precision; + WITH t2 AS ( + SELECT + t1.id_bdd, + t1.surf + FROM refgeo.zh + JOIN ( + SELECT + zh.id_bdd, + Sum(st_area2d(st_Intersection(zh.geom_buff, tache_urbaine.geom))) AS surf + FROM refgeo.zh + JOIN tachenew.tache_urbaine ON (st_intersects(zh.geom_buff, tache_urbaine.geom)) + GROUP BY id_bdd + ) AS t1 USING (id_bdd) + ) + UPDATE indicnew.indic_sig_zh SET bati_area_buff = t2.surf FROM t2 WHERE indic_sig_zh.id_bdd = t2.id_bdd; + + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS arvf_area_buff double precision; + WITH t2 AS ( + SELECT + t1.id_bdd, + t1.surf + FROM refgeo.zh + JOIN ( + SELECT + zh.id_bdd, + Sum(st_area2d(st_intersection(zh.geom_buff, arvf_global_buffer.geom))) AS surf + FROM refgeo.zh + JOIN arvfnew.arvf_global_buffer ON (st_intersects(zh.geom_buff, arvf_global_buffer.geom)) + GROUP BY id_bdd + ) AS t1 USING (id_bdd) + ) + UPDATE indicnew.indic_sig_zh SET arvf_area_buff = t2.surf FROM t2 WHERE indic_sig_zh.id_bdd = t2.id_bdd; + + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS artif_area_buff double precision; + WITH t2 AS ( + SELECT + t1.id_bdd, + t1.surf + FROM refgeo.zh + JOIN ( + SELECT + zh.id_bdd, + Sum(st_area2d(st_intersection(zh.geom_buff, tache_artif.geom))) AS surf + FROM refgeo.zh + JOIN tachenew.tache_artif ON (st_intersects(zh.geom_buff, tache_artif.geom)) + GROUP BY id_bdd + ) AS t1 USING (id_bdd) + ) + UPDATE indicnew.indic_sig_zh SET artif_area_buff = t2.surf FROM t2 WHERE indic_sig_zh.id_bdd = t2.id_bdd; + ''' + with con.begin() as cnx: + cnx.execute(sql) + print('indicI12_zh_buff created') + + +def indicI13_zh(con): + sql = """ + -- Pression agricole sur sites + ALTER TABLE IF EXISTS indicnew.indic_sig_zh + DROP COLUMN IF EXISTS agri_ign_impact_area; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN IF NOT EXISTS agri_ign_impact_area double precision; + WITH calcul_impact_rpg AS ( + SELECT + ilots_sites.id_bdd, + Sum(ilots_sites.surf_impact) AS surf_impact, + Sum(ilots_sites.surf_impact) * 100 / ilots_sites.surf_site AS pourc_impact + FROM ( + SELECT + t1.num_ilot, + id_bdd, + st_area(t2.geom) AS surf_site, + st_area(st_intersection(t1.geom, t2.geom)) AS surf_inters, + (st_area(st_intersection(t1.geom, t2.geom))) * t1.pourc_impact AS surf_impact, + st_intersection(t1.geom, t2.geom) AS geom + FROM rpgnew.rpg_global t1 + JOIN refgeo.zh t2 ON (st_intersects(t1.geom, t2.geom)) + ) AS ilots_sites + GROUP BY ilots_sites.id_bdd, ilots_sites.surf_site + ) + UPDATE indicnew.indic_sig_zh SET agri_ign_impact_area = calcul_impact_rpg.surf_impact FROM calcul_impact_rpg WHERE indic_sig_zh.id_bdd = calcul_impact_rpg.id_bdd; + -- Requête exécutée avec succès en 2 s 116 msec. + """ + with con.begin() as cnx: + cnx.execute(sql) + print('indicI13_zh created') + + +def indicI13_zh_buff(con): + sql = """ + -- Pression agricole sur sites_buffer + ALTER TABLE IF EXISTS indicnew.indic_sig_zh + DROP COLUMN IF EXISTS agri_ign_impact_area_buff; + ALTER TABLE indicnew.indic_sig_zh + ADD COLUMN agri_ign_impact_area_buff double precision; + WITH calcul_impact_rpg_buff AS ( + SELECT + ilots_sites.id_bdd, + Sum(ilots_sites.surf_impact_buff) AS surf_impact_buff, + Sum(ilots_sites.surf_impact_buff) * 100 / ilots_sites.surf_site_buff AS pourc_impact_buff + FROM ( + SELECT + t1.num_ilot, + t2.id_bdd, + st_area(t2.geom_buff) AS surf_site_buff, + st_area(st_intersection(t1.geom, t2.geom_buff)) AS surf_inters, + (st_area(st_intersection(t1.geom, t2.geom_buff))) * t1.pourc_impact AS surf_impact_buff, + st_intersection(t1.geom, t2.geom_buff) AS geom + FROM rpgnew.rpg_global t1 + JOIN refgeo.zh t2 ON (st_intersects(t1.geom, t2.geom_buff)) + ) AS ilots_sites + GROUP BY ilots_sites.id_bdd, ilots_sites.surf_site_buff + ) + UPDATE indicnew.indic_sig_zh SET agri_ign_impact_area_buff = calcul_impact_rpg_buff.surf_impact_buff FROM calcul_impact_rpg_buff WHERE indic_sig_zh.id_bdd = calcul_impact_rpg_buff.id_bdd; + -- Requête exécutée avec succès en 4 s 843 msec. + """ + with con.begin() as cnx: + cnx.execute(sql) + print('indicI13_zh_buff created') + +def indicI13_bv(con): + sql = """ + -- Pression agricole sur BVMDO + ALTER TABLE IF EXISTS indicnew.indic_sig_bvmdo + DROP COLUMN IF EXISTS agri_ign_impact_area; + ALTER TABLE indicnew.indic_sig_bvmdo + ADD COLUMN agri_ign_impact_area double precision; + WITH calcul_impact_rpg_bvmdo AS ( + SELECT + ilots_bvmdo.cmdo, + Sum(ilots_bvmdo.surf_impact) AS surf_impact, + Sum(ilots_bvmdo.surf_impact) * 100 / ilots_bvmdo.surf_bvmdo AS pourc_impact + FROM ( + SELECT + t1.num_ilot, + t2.cmdo, + st_area(t2.geom) AS surf_bvmdo, + st_area(st_intersection(t1.geom, t2.geom)) AS surf_inters, + (st_area(st_intersection(t1.geom, t2.geom))) * t1.pourc_impact AS surf_impact, + st_intersection(t1.geom, t2.geom) AS geom + FROM rpgnew.rpg_global t1 + JOIN refgeo.bvmdo t2 ON (st_intersects(t1.geom, t2.geom)) + ) AS ilots_bvmdo + GROUP BY ilots_bvmdo.cmdo, ilots_bvmdo.surf_bvmdo + ) + UPDATE indicnew.indic_sig_bvmdo SET agri_ign_impact_area = calcul_impact_rpg_bvmdo.surf_impact FROM calcul_impact_rpg_bvmdo WHERE indic_sig_bvmdo.cmdo = calcul_impact_rpg_bvmdo.cmdo; + -- Requête exécutée avec succès en 6 s 414 msec. + """ + with con.begin() as cnx: + cnx.execute(sql) + print('indicI13_bv created') + + +def create_zhGeom_buff(con): + sql = """ + ALTER TABLE refgeo.zh ADD COLUMN IF NOT EXISTS geom_buff geometry(MultiPolygon,2154); + UPDATE refgeo.zh + SET geom_buff = st_multi(st_difference(st_buffer(zh.geom, ( |/ (st_area(zh.geom)*2/pi()) ) - ( |/ (st_area(zh.geom)/pi()) ), 40), zh.geom)); + """ + with con.begin() as cnx: + cnx.execute(sql) + print('geom_buff created') + +def indicBilan(con): + sql = """ + DROP TABLE IF EXISTS indicnew.indic_bilan_zh; + CREATE TABLE indicnew.indic_bilan_zh AS + WITH + t1 AS ( + SELECT zh.id_bdd as id_bdd, Count(*) as nb_zone_hydro, Sum(indic_sig_bvmdo.area) as surf_zone_hydro, Sum(indic_sig_bvmdo.isole) as isole_zone_hydro, Sum(indic_sig_bvmdo.diffus) as diffus_zone_hydro, Sum(indic_sig_bvmdo.groupe) as groupe_zone_hydro, Sum(indic_sig_bvmdo.peudense) as peudense_zone_hydro, Sum(indic_sig_bvmdo.dense) as dense_zone_hydro, Sum(indic_sig_bvmdo.tresdense) as tresdense_zone_hydro, Sum(indic_sig_bvmdo.bati_area) as surf_zone_hydro_bati, Sum(indic_sig_bvmdo.arvf_area) as surf_zone_hydro_arvf, Sum(indic_sig_bvmdo.artif_area) as surf_zone_hydro_artif, Sum(indic_sig_bvmdo.agri_ign_impact_area) as surf_zone_hydro_agri + FROM refgeo.zh + JOIN refgeo.bvmdo ON (st_intersects(zh.geom, bvmdo.geom)) + JOIN indicnew.indic_sig_bvmdo USING (cmdo) + GROUP BY zh.id_bdd) + SELECT + id_bdd, + coalesce(indic_sig_zh.area, 0) as surf_zh, + coalesce(indic_sig_zh.area, 0) + + coalesce(indic_sig_zh.area_buff, 0) as surf_rhomeo, + CASE WHEN (coalesce(indic_sig_zh.bati_area, 0) + + coalesce(indic_sig_zh.bati_area_buff, 0)) > 0 THEN + (coalesce(indic_sig_zh.peudense, 0) + + coalesce(indic_sig_zh.dense, 0) + + coalesce(indic_sig_zh.tresdense, 0) + + coalesce(indic_sig_zh.peudense_buff, 0) + + coalesce(indic_sig_zh.dense_buff, 0) + + coalesce(indic_sig_zh.tresdense_buff, 0)) / (coalesce(indic_sig_zh.bati_area, 0) + coalesce(indic_sig_zh.bati_area_buff, 0)) * 100 + ELSE + 0 + END as presdirect_urba, + (coalesce(indic_sig_zh.artif_area, 0) + coalesce(indic_sig_zh.artif_area_buff, 0)) / (coalesce(indic_sig_zh.area, 0) + coalesce(indic_sig_zh.area_buff, 0)) * 100 as presdirect_artif, + (coalesce(indic_sig_zh.agri_ign_impact_area, 0) + coalesce(indic_sig_zh.agri_ign_impact_area_buff, 0)) / (indic_sig_zh.area + indic_sig_zh.area_buff) * 100 as presdirect_agri, + CASE WHEN t1.surf_zone_hydro_bati > 0 THEN + (coalesce(t1.peudense_zone_hydro, 0) + coalesce(t1.dense_zone_hydro, 0) + coalesce(t1.tresdense_zone_hydro, 0)) / t1.surf_zone_hydro_bati * 100 + ELSE + 0 + END as presindir_urba, + coalesce(t1.surf_zone_hydro_artif, 0) / t1.surf_zone_hydro * 100 as presindir_artif, + coalesce(t1.surf_zone_hydro_agri, 0) / t1.surf_zone_hydro * 100 as presindir_agri, + LEFT(current_timestamp(0)::text, 10) as date_maj + FROM refgeo.zh + INNER JOIN indicnew.indic_sig_zh USING (id_bdd) + INNER JOIN t1 USING (id_bdd); + ALTER TABLE indicnew.indic_bilan_zh DROP CONSTRAINT IF EXISTS indic_bilan_zh_pkey; + ALTER TABLE indicnew.indic_bilan_zh + ADD CONSTRAINT indic_bilan_zh_pkey PRIMARY KEY(id_bdd); + """ + with con.begin() as cnx: + cnx.execute(sql) + print('indicBilan created') + + + + +if __name__ == '__main__': + from pycen import con_rho + + create_zhGeom_buff(con_rho) + indicI12_zh(con_rho) + indicI12_zh_buff(con_rho) + indicI13_zh(con_rho) + indicI13_zh_buff(con_rho) + indicBilan(con_rho) \ No newline at end of file diff --git a/1_SICEN/MIGRATION GEONATURE/classification_group.py b/1_SICEN/MIGRATION GEONATURE/classification_group.py new file mode 100644 index 0000000..43c853a --- /dev/null +++ b/1_SICEN/MIGRATION GEONATURE/classification_group.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from pycen import con_sicen +import geopandas as gpd + +DICT_TAXREF_OLDNAME = { + 116744 : 521658, # Quercus petraea + # 9999056 : 0, # Micromammalia sp. + 9999077 : 183820, # Arachnida sp. + 9999078 : 187496, # Fungi sp. + # 9999081 : 0, # Heterocera sp. + 9999058 : 195005, # Myotis sp. +} + +DICT_TAXREF = { + # 116744:{'cd_nom':521658,'lb_nom':'Quercus petraea (Matt.) Liebl., 1784'}, # Quercus petraea + # 9999077:{'cd_nom':183820,'lb_nom':'Arachnida Cuvier, 1812'}, # Arachnida sp. + + # Espèce domestique + 9999031 : {'cd_nom':441709,'lb_nom':'Cairina moschata (Linnaeus, 1758)'}, # Cairina moschata f. domestica + + # GROUPE + 9999005: {'cd_nom':194069,'lb_nom':'Leucanthemum Mill., 1754'}, # Leucanthemum vulgare (#groupe) (#ref) + 9999014: {'cd_nom':197281,'lb_nom':'Rubus L., 1753 [nom. et typ. cons.]'}, # Rubus fruticosus (#groupe) (#ref) + 9999017: {'cd_nom':198226,'lb_nom':'Taraxacum F.H.Wigg., 1780'}, # Taraxacum officinale (#groupe) (#ref) + 9999019: {'cd_nom':198449,'lb_nom':'Thymus L., 1753'}, # Thymus serpyllum (#groupe) (#ref) + 9999020: {'cd_nom':198902,'lb_nom':'Vicia L., 1753'}, # Vicia sativa (#groupe) (#ref) + + # COMPLEXE + 9999075: {'cd_nom':196296,'lb_nom':'Pipistrellus Kaup, 1829'}, # Pipistrellus nathusii / kuhlii (#complexe) (#ref) + 9999074: {'cd_nom':197040,'lb_nom':'Rana Linnaeus, 1758'}, # Rana dalmatina / temporaria (#complexe) (#ref) + 9999057: {'cd_nom':195005,'lb_nom':'Myotis Kaup, 1829'}, # Myotis myotis / blythii (#complexe) (#ref) + 9999050: {'cd_nom':193993,'lb_nom':'Leptidea Billberg, 1820'}, # Leptidea sinapis / reali (#complexe) (#ref) + 9999066: {'cd_nom':196980,'lb_nom':'Pyrgus Hübner, 1819'}, # Pyrgus malvae / malvoides (#complexe) (#ref) + 9999063: {'cd_nom':444436,'lb_nom':'Pelophylax Fitzinger, 1843'}, # Pelophylax kl. esculentus / lessonae (#complex... + 9999054: {'cd_nom':194481,'lb_nom':'Martes Pinel, 1792'}, # Martes martes / foina (#complexe) (#ref) + 9999037: {'cd_nom':191029,'lb_nom':'Colias Fabricius, 1807'}, # Colias alfacariensis / hyale (#complexe) (#ref) + 9999023: {'cd_nom':188772,'lb_nom':'Acrocephalus J. A. Naumann & J. F. Naumann, 1811'}, # Acrocephalus palustris / scirpaceus (#complexe... + 9999064: {'cd_nom':4280,'lb_nom':'Phylloscopus collybita (Vieillot, 1817)'}, # Phylloscopus collybita tristis / "abietinus" ... + 9999046: {'cd_nom':192539,'lb_nom':'Felis Linnaeus, 1758'}, # Felis silvestris / catus (#complexe) (#ref) + 9999080: {'cd_nom':194357,'lb_nom':'Lysandra Hemming, 1933'},# Lysandra coridon / hispana (#complexe) + 9999033: {'cd_nom':886228,'lb_nom':'Acanthis Borkhausen, 1797'}, # Carduelis flammea flammea / cabaret / Carduelis hornemanni (#complexe) + 9999041: {'cd_nom':4503,'lb_nom':'Corvus corone Linnaeus, 1758'}, # Corvus corone corone / cornix (#complexe) + 9999042: {'cd_nom':186239,'lb_nom':'Vespertilionidae J. E. Gray, 1821'}, # Eptesicus / Nyctalus sp. (#complexe) (#ref) + 9999082: {'cd_nom':195005,'lb_nom':'Myotis Kaup, 1829'}, # Myotis daubentonii/Myotis mystacinus (#complexe) + 9999083: {'cd_nom':699094,'lb_nom':'Yangochiroptera Koopman, 1985'}, # Pipistrellus/Miniopterus (#complexe) +} + +def get_pheno_genre(esp_fx, df): + nom = esp_fx + return + + + +def _vm_synthese_observations_(where=''): + # Lecture des données + sql = 'SELECT * FROM saisie.vm_synthese_observations ' + df = gpd.read_postgis(sql+where,con_sicen) + + # drop Micromammalia sp. + drp = df.loc[df.cd_nom == 9999056].index + df.drop(drp,inplace=True) + + is_group = df.nom_complet.str.contains('#group') + is_complex = df.nom_complet.str.contains('#complex') + is_domestic = df.nom_complet.str.contains('domestic') + lst_group = [*df[is_group].nom_complet.unique()] + lst_complex = [*df[is_complex].nom_complet.unique()] + lst_domestic = [*df[is_domestic].nom_complet.unique()] + + # df['group'] = None + # df['complex'] = None + # df.loc[is_group,'group'] = df[is_group].nom_complet.copy() + # df.loc[is_complex,'complex'] = df[is_complex].nom_complet.copy() + df['rmq_taxonomie'] = None + df.loc[is_complex|is_group|is_domestic,'rmq_taxonomie'] = df[is_complex|is_group|is_domestic].nom_complet.copy() + + for cd in df[is_complex|is_group|is_domestic].cd_nom.unique(): + lb = df[df.cd_nom==cd].nom_complet.unique()[0] + cdn = {cd:DICT_TAXREF[cd]['cd_nom']} + lbn = {lb:DICT_TAXREF[cd]['lb_nom']} + + df.nom_complet.replace(lbn,inplace=True) + df.nom_latin.replace(lbn,inplace=True) + df.cd_nom.replace(cdn,inplace=True) + + # Transform geom TO WKT format + df['geom_wkt'] = df.geometry.to_wkt() + # Récup code insee département + df['insee_dep'] = df.insee_commune.str[:2] + # Mise en forme de la précision de la données + df.precision = format_precision(df.precision) + # Actualisation des cd_nom + df.cd_nom.replace(DICT_TAXREF_OLDNAME,inplace=True) + + # Mise en forme des effectifs + df.effectif = format_effectif(df.effectif.copy()) + df.effectif_min = format_effectif(df.effectif_min.copy()) + df.effectif_max = format_effectif(df.effectif_max.copy()) + df.loc[df.effectif.isna(),'effectif'] = df[df.effectif.isna()].effectif_min + df.effectif.fillna('1',inplace=True) + df.loc[df.effectif_max.isna(),'effectif_max'] = df[df.effectif_max.isna()].effectif + df.effectif = df.effectif.astype(int) + df.effectif_max = df.effectif_max.astype(int) + + check_effectif = df['effectif'].astype(float) > df['effectif_min'].astype(float) + if not df[check_effectif].empty: + print('WARNING : "effectif" > "effectif_min"') + print(' nrows : %s'%df[check_effectif].shape[0]) + + df.loc[check_effectif,'effectif_max'] = df[check_effectif].effectif.copy() + df.loc[check_effectif,'effectif'] = df[check_effectif].effectif_min.copy() + print( 'effectif TO effectif_max : OK !') + + df.effectif_min = df.effectif.copy() + + return df + + +def format_precision(lst): + return lst.copy().replace({ + 'GPS': '5', + '0 à 10m': '5', + '10 à 100m': '50', + '100 à 500m': '250', + '500 à 1000m': '750', + '> 1000m': '2000', + 'lieu-dit': '2500', + 'commune': '5000', + }) + +def format_effectif(lst): + lst.loc[lst.notna()] = lst[lst.notna()].astype(int).astype(str) + return lst + + +def additional_data(df,columns): + df['additional_data'] = df[columns].to_json(orient="records",force_ascii=False) + df.drop(columns=columns,inplace=True) + return df + + +class flore: + + def all(): + w = "WHERE regne = 'Plantae'" + res = _vm_synthese_observations_(w) + # return additional_data(res,['strate_flore','effectif_textuel']) + return res + + def vasculaire(): + w = "WHERE group1_inpn = 'Plantes vasculaires'" + res = _vm_synthese_observations_(w) + # return additional_data(res,['strate_flore','effectif_textuel']) + return res + + def bryophyte(): + w = "WHERE group1_inpn = 'Bryophytes'" + res = _vm_synthese_observations_(w) + # return additional_data(res,['strate_flore','effectif_textuel']) + return res + + def characee(): + w = "WHERE famille = 'Characeae'" + res = _vm_synthese_observations_(w) + # return additional_data(res,['strate_flore','effectif_textuel']) + return res + + +class faune: + def all(): + w = "WHERE regne = 'Animalia'" + return _vm_synthese_observations_(w) + + class invertebre: + def all(): + w = "WHERE regne = 'Animalia' AND phylum <> 'Chordata'" + return _vm_synthese_observations_(w) + + def odonate(): + w = "WHERE ordre = 'Odonata'" + return _vm_synthese_observations_(w) + def orthoptere(): + w = "WHERE ordre = 'Orthoptera'" + return _vm_synthese_observations_(w) + def lepidoptere(): + w = "WHERE ordre = 'Lepidoptera'" + return _vm_synthese_observations_(w) + def mollusque(): + w = "WHERE group1_inpn = 'Mollusques'" + return _vm_synthese_observations_(w) + def crustacees(): + w = "WHERE group2_inpn = 'Crustacés'" + return _vm_synthese_observations_(w) + def ascomycete(): + w = "WHERE ordre = 'Ascomycètes'" + return _vm_synthese_observations_(w) + def autre(): + w = """ + WHERE regne = 'Animalia' + AND phylum <> 'Chordata' + AND ordre <> 'Odonata' + AND ordre <> 'Orthoptera' + AND ordre <> 'Lepidoptera' + AND group1_inpn <> 'Mollusques' + AND group2_inpn <> 'Crustacés' + AND ordre <> 'Ascomycètes' + """ + return _vm_synthese_observations_(w) + + class vertebre: + def all(): + w = "WHERE regne = 'Animalia' AND phylum = 'Chordata'" + return _vm_synthese_observations_(w) + + def mammifere(): + w = "WHERE group2_inpn = 'Mammifères' AND ordre <> 'Chiroptera'" + return _vm_synthese_observations_(w) + def chiroptere(): + w = "WHERE ordre = 'Chiroptera'" + return _vm_synthese_observations_(w) + def oiseau(): + w = "WHERE group2_inpn = 'Oiseaux'" + return _vm_synthese_observations_(w) + def reptile(): + w = "WHERE group2_inpn = 'Reptiles'" + return _vm_synthese_observations_(w) + def amphibien(): + w = "WHERE group2_inpn = 'Amphibiens'" + return _vm_synthese_observations_(w) + def poisson(): + w = "WHERE group2_inpn = 'Poissons'" + return _vm_synthese_observations_(w) + + +def champignon(): + w = "WHERE regne IN ('Fungi','Chromista')" + return _vm_synthese_observations_(w) + + +def export_csv(df,path_name): + df.dropna(how='all',axis=1,inplace=True) + drop_cols = [ + 'regne','phylum','classe','ordre','famille','group1_inpn','group2_inpn','cd_ref', + 'altitude_z','longitude_x','latitude_y','geom','nom_commune', + 'etude','lot_donnee','observateurs','structures' + ] + d = df.columns[df.columns.isin(drop_cols)] + df.drop(columns=d,inplace=True) + if df.shape[0] > 50000: + from math import ceil + loop = ceil(df.shape[0]/50000) + i = 0 + for j in range(1,loop+1): + p = path_name.replace('.csv','_%s.csv'%j) + jj = j*50000 + df.iloc[i:jj].to_csv(p,index=False) + i = jj + else: + df.to_csv(path_name,index=False) + +if __name__ == "__main__": + + test = False + PATH = '/home/colas/Documents/9_PROJETS/4_SICEN/GN_MIGRATION/' + al = flore.all() + va = flore.vasculaire() + br = flore.bryophyte() + ch = flore.characee() + + export_csv(al,PATH+'FLORE/all.csv') + # export_csv(va[va.effectif< va.effectif_max].copy(),PATH+'FLORE/test_eff_vasculaire.csv') + export_csv(va,PATH+'FLORE/vasculaire.csv') + export_csv(br,PATH+'FLORE/bryophyte.csv') + export_csv(ch,PATH+'FLORE/characee.csv') + al.shape[0] - (va.shape[0]+br.shape[0]+ch.shape[0]) + + fa = faune.all() + fia = faune.invertebre.all() + fva = faune.vertebre.all() + fa.shape[0] - (fia.shape[0] + fva.shape[0]) + if test : + fa[~fa.id_obs.isin([*fia.id_obs,*fva.id_obs])] + + ca = champignon() + + vm = _vm_synthese_observations_() + date_cols = vm.columns[vm.columns.str.contains('date')] + mtd = vm[['lot_donnee','protocole','etude']].drop_duplicates() + + vm.shape[0] - (al.shape[0]+fa.shape[0]+ca.shape[0]) + if test : + vm[~vm.id_obs.isin([*al.id_obs,*fa.id_obs,*ca.id_obs])] + + + # Sub verification + fiod = faune.invertebre.odonate() + fior = faune.invertebre.orthoptere() + file = faune.invertebre.lepidoptere() + fimo = faune.invertebre.mollusque() + ficr = faune.invertebre.crustacees() + fias = faune.invertebre.ascomycete() + fiau = faune.invertebre.autre() + fia.shape[0] - ( + fiod.shape[0] + fior.shape[0] + file.shape[0] + fimo.shape[0] + ficr.shape[0] + fias.shape[0] + fiau.shape[0] + ) + if test : + fia[~fia.id_obs.isin([*fiod.id_obs,*fior.id_obs,*file.id_obs,*fimo.id_obs,*ficr.id_obs,*fias.id_obs,*fiau.id_obs])] + + fvma = faune.vertebre.mammifere() + fvch = faune.vertebre.chiroptere() + fvoi = faune.vertebre.oiseau() + fvre = faune.vertebre.reptile() + fvam = faune.vertebre.amphibien() + fvpo = faune.vertebre.poisson() + fva.shape[0] - ( + fvma.shape[0] + fvch.shape[0] + fvoi.shape[0] + fvre.shape[0] + fvam.shape[0] + fvpo.shape[0] + ) + if test : + fva[~fva.id_obs.isin([*fvma.id_obs,*fvch.id_obs,*fvoi.id_obs,*fvre.id_obs,*fvam.id_obs,*fvpo.id_obs])] + \ No newline at end of file diff --git a/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py b/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py new file mode 100644 index 0000000..9629f13 --- /dev/null +++ b/1_SICEN/MIGRATION GEONATURE/sicen_to_gn.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from pycen import update_to_sql, con_sicen +import geopandas as gpd +import os + +DIR = '/home/colas/Documents/9_PROJETS/4_SICEN/GN_MIGRATION' + +def export(path_name, data, format='csv'): + detect_date = data.columns[data.columns.str.startswith('date')] + data[detect_date] = data[detect_date].astype(str) + df = data.to_wkt().drop(columns='geom',errors='ignore') + if format=='xlsx': + df.to_excel(path_name+'.%s'%format) + elif format=='csv': + df.to_csv(path_name+'.%s'%format) + else: + raise('format non prévu') + + +v_synthese_invertebre = 'v_synthese_invertebre' +v_synthese_vertebre = 'v_synthese_vertebre' +v_synthese_flore = 'v_synthese_flore' + +sql = 'SELECT * FROM saisie.%s'%v_synthese_invertebre +v_inv = gpd.read_postgis(sql,con_sicen) +sql = 'SELECT * FROM saisie.%s'%v_synthese_vertebre +v_ver = gpd.read_postgis(sql,con_sicen) +sql = 'SELECT * FROM saisie.%s'%v_synthese_flore +v_flo = gpd.read_postgis(sql,con_sicen) + +export(os.path.join(DIR,v_synthese_invertebre),v_inv) +export(os.path.join(DIR,v_synthese_vertebre),v_ver) +export(os.path.join(DIR,v_synthese_flore),v_flo) + + +v_ver.etude.unique() +v_ver.protocole.unique() +v_ver.lot_donnee.unique() + diff --git a/1_SICEN/SERENA_siege/recovery_geo.py b/1_SICEN/SERENA_siege/recovery_geo.py new file mode 100644 index 0000000..1eab2e3 --- /dev/null +++ b/1_SICEN/SERENA_siege/recovery_geo.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +from pycen import con_sicen, update_to_sql +import pandas as pd +import geopandas as gpd +from os import path + +def get_site_serena(): + sql = ''' + SELECT s.*, + t.*, + c.choi_nom + FROM serenabase.rnf_site s + JOIN serenabase.tmp_sgll t ON t.sgll_site_id = s.site_id + JOIN serenabase.rnf_choi c ON c.choi_id = s.site_categ_choi_id + ''' + return pd.read_sql_query(sql,con_sicen).dropna(axis=1,how='all') + +if __name__ == "__main__": + + # Lecture des sites SIG + PATH = '/media/colas/SRV/FICHIERS/OUTILS/BASES DE DONNEES/RHOMEO/2018/RhoMéO_Données' + FILE = '38_sig_points_de_suivis_point.shp' + sig = gpd.read_file(path.join(PATH,FILE)) + + # Récupération des sites SERENA + site_serena = get_site_serena() + no_coord = site_serena.sgll_lat=='999' + is_rhomeo = site_serena.site_nom.str[-3:].str.isnumeric() + is_rhomeo2 = site_serena.choi_nom.str.contains('rhomeo',case=False) + # site_serena[no_coord] + # site_serena[is_rhomeo] + + # Isolation des sites RHOMEO + site_rhomeo_all = site_serena[is_rhomeo2&no_coord].copy() + site_rhomeo = site_rhomeo_all.merge(sig[['id_regio','geometry']],left_on='site_ref_sig',right_on='id_regio')\ + .set_geometry('geometry',crs=2154) + site_rhomeo.to_crs(4326,inplace=True) + site_rhomeo.sgll_lon = site_rhomeo.geometry.x.astype(str) + site_rhomeo.sgll_lat = site_rhomeo.geometry.y.astype(str) + updt_site = site_rhomeo[['sgll_site_id','sgll_lat','sgll_lon']].copy() + + # Update data + update_to_sql( + df=updt_site, + con=con_sicen, + table_name='tmp_sgll', + schema_name='serenabase', + key_name='sgll_site_id' + ) \ No newline at end of file diff --git a/1_SICEN/SERENA_siege/recovery_relv.py b/1_SICEN/SERENA_siege/recovery_relv.py new file mode 100644 index 0000000..afaf473 --- /dev/null +++ b/1_SICEN/SERENA_siege/recovery_relv.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +import pandas_access as mdb # pour lecture de la bd medwet +from os import path +from pycen import con_sicen, update_to_sql +import pandas as pd + +# access / postgis +SOURCE = 'postgis' +postgis_sch = 'serenabase' + +DICT_RLV_COLS = { + 'relv_id':'id_etude', + 'relv_nom':'nom_etude', + 'relv_prop_libel':'proprietaire', + 'relv_1date_c':'date_debut', + 'relv_2date_c':'date_fin', + 'relv_comment':'description', +} + +DICT_OBS_COLS = { + 'obse_id':'id_origine', + 'obse_relv_id':'id_etude', +} + + +def list_access_datatable(): + + PATH_IN = '/media/colas/SRV/FICHIERS/OUTILS/BASES DE DONNEES/ESPECES/backup' + BDD_IN = 'AVENIR.serena_V2.mdb' + + LST_SCHEMA = mdb.read_schema(path.join(PATH_IN,BDD_IN)) + LST_TABLE = [ *LST_SCHEMA.keys() ] + LST_TABLE = mdb.list_tables(path.join(PATH_IN,BDD_IN)) + TABLE_DATA= [] + + for TABLE in LST_TABLE: + DTYPE = {x : float for x in LST_SCHEMA[TABLE].keys() if LST_SCHEMA[TABLE][x]=='Long Integer'} + df = mdb.read_table(path.join(PATH_IN,BDD_IN),TABLE, dtype=DTYPE) + if df.shape[0] == 0: + print('%s IS EMPTY !'%TABLE) + else : + TABLE_DATA += [TABLE] + return TABLE_DATA + + +def list_releve(source='postgis'): + if source=='postgis': + + choi = pd.read_sql_table('rnf_choi',con_sicen,postgis_sch)\ + .dropna(axis=1,how='all') + relv = pd.read_sql_table('rnf_relv',con_sicen,postgis_sch)\ + .dropna(axis=1,how='all') + + # Correction coquille + relv.loc[relv.relv_categ_choi_id==100800,'relv_categ_choi_id'] = 100008 + + # Jointure relevé / catégorie + df = relv.merge(choi[['choi_id','choi_nom']],how='inner',left_on='relv_categ_choi_id',right_on='choi_id')\ + .drop(columns=['relv_categ_choi_id']) + + # Mise en forme des dates + lstcol_dat = df.columns[df.columns.str.contains('date')] + for lcd in lstcol_dat: + # df[lcd] = df[lcd].replace({'VIDE':None}) + df.loc[df[lcd]=='2000 à 2001',lcd] = '2000' if '1date' in lcd else '2001' + df[lcd].replace({'VIDE':None},inplace=True) + df[lcd] = pd.to_datetime(df[lcd]) + + return df + + +def get_serena_obs(source='postgis'): + if source=='postgis': + obs = pd.read_sql_table('rnf_obse',con_sicen,postgis_sch) + + return obs + + + +def get_sicen_obs(): + return pd.read_sql_table('saisie_observation',con_sicen,'saisie') + + + + +if __name__ == "__main__": + + + if SOURCE == 'access': + lst_tab = list_access_datatable() + elif SOURCE == 'postgis': + lstrelv_serena = list_releve() + obs_serena = get_serena_obs() + obs_serena = obs_serena.merge(lstrelv_serena[['relv_id','relv_nom']],left_on='obse_relv_id',right_on='relv_id')\ + .drop(columns='relv_id') + + # Isolation des relevés HORS RhoMéo + is_relv = [x for x in lstrelv_serena.relv_nom.str[:3].unique() if not x.isnumeric()] + relv_serena = lstrelv_serena[lstrelv_serena.relv_nom.str[:3].isin(is_relv)] + + # Formatage des relevés HORS RhoMéo + relv_2sicen = relv_serena.rename(columns=DICT_RLV_COLS).filter(DICT_RLV_COLS.values(),axis=1) + relv_2sicen.sort_values('id_etude',inplace=True) + relv_2sicen.reset_index(inplace=True,drop=True) + relv_2sicen.set_index(-relv_2sicen.index,inplace=True) + + + # # Intégration des relevés SERENA dans la table 'etude' + # relv_2sicen.drop(columns=['id_etude','proprietaire'])\ + # .rename_axis('id_etude')\ + # .to_sql('etude',con_sicen,'md',if_exists='append') + + + # Récupération des données SICEN2 + # id_lot = 'SERENA' + sicen_obs = get_sicen_obs() + is_serena = sicen_obs.id_lot==3 + is_horsetude = sicen_obs.id_etude==2 + lst_idorigine = sicen_obs[is_serena].id_origine.astype(int).tolist() + + # isolation des données "suposées" non présentes + obs_notinsicen = obs_serena[~obs_serena.obse_id.isin(lst_idorigine)].copy() + # rhomeo_notinsicen = obs_notinsicen.obse_nom.str.startswith('RhoMéO',na=False) + # obs_notinsicen[rhomeo_notinsicen] + # test = obs_notinsicen.obse_site_id == 101335 + # obs_notinsicen[test] + + sql1 = "SELECT * FROM {sch}.tmp_ogll WHERE ogll_obse_id IN {lst} AND ogll_lat <> '999'" + res1 = pd.read_sql_query(sql1.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_id)),con_sicen) + sql2 = "SELECT * FROM {sch}.tmp_sgll WHERE sgll_site_id IN {lst} AND sgll_lat <> '999'" + res2 = pd.read_sql_query(sql2.format(sch=postgis_sch,lst=tuple(obs_notinsicen.obse_site_id.unique())),con_sicen) + + + miss_serena1 = obs_notinsicen.merge( + res1.rename(columns={ + 'ogll_obse_id':'obse_id', + 'ogll_lat':'lat', + 'ogll_lon':'lon' + }), + on='obse_id') + miss_serena2 = obs_notinsicen.merge( + res2.rename(columns={ + 'sgll_site_id':'obse_site_id', + 'sgll_lat':'lat', + 'sgll_lon':'lon' + }), + on='obse_site_id') + miss_serena = pd.concat([miss_serena1,miss_serena2]) + rhomeo_missserena = miss_serena.obse_nom.str.startswith('RhoMéO',na=False) + miss_serena[rhomeo_missserena] + + # Formatages des données OBS + obs_2sicen = obs_serena[obs_serena.obse_relv_id.isin(relv_serena.relv_id)].rename(columns=DICT_OBS_COLS).filter(DICT_OBS_COLS.values(),axis=1) + obs_2sicen['id_lot'] = 3 + obs_2sicen.id_origine = obs_2sicen.id_origine.astype(str) + + # Correspondance id_relv (SERENA) / id_etude (SICEN2) + DICT_ID_ETUDE = dict(zip(relv_2sicen.id_etude,relv_2sicen.index)) + obs_2sicen.id_etude.replace(DICT_ID_ETUDE,inplace=True) + + # Mise à jour du champs id_etude de la table saisie_observation + update_to_sql( + obs_2sicen[obs_2sicen.id_origine.astype(int).isin(lst_idorigine)], + con_sicen, + 'saisie_observation', + 'saisie', + ['id_lot','id_origine'] + ) + + + # identification des données restantes taguées "hors etude" + id_origine_he = sicen_obs[is_serena&is_horsetude].id_origine.astype(int) + is_horsetude2 = obs_serena.obse_id.isin(id_origine_he) + res_he = obs_serena[is_horsetude2].copy() + DICT2_OBS_COLS = {**DICT_OBS_COLS,'relv_nom':'id_waypoint'} + obs2_2sicen = res_he.rename(columns=DICT2_OBS_COLS).filter(DICT2_OBS_COLS.values(),axis=1) + obs2_2sicen['id_lot'] = 3 + obs2_2sicen['id_etude'] = -152 + obs2_2sicen.id_origine = obs2_2sicen.id_origine.astype(str) + + # Mise à jour du champs id_etude de la table saisie_observation + update_to_sql( + obs2_2sicen, + con_sicen, + 'saisie_observation', + 'saisie', + ['id_lot','id_origine'] + ) + diff --git a/1_SICEN/SERENA_siege/recovery_rhomeo.py b/1_SICEN/SERENA_siege/recovery_rhomeo.py new file mode 100644 index 0000000..9289668 --- /dev/null +++ b/1_SICEN/SERENA_siege/recovery_rhomeo.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +import pandas_access as mdb +from pycen import con_sicen, update_to_sql +import pandas as pd +import geopandas as gpd +from os import path + + +postgis_sch = 'serenabase' +ACCESS_PATH = '/media/colas/SRV/FICHIERS/OUTILS/BASES DE DONNEES/RHOMEO/2012/CEN-38_RHOMEO_BDD_2012' +ACCESS_FILE = 'BD_Rhomeo_data.mdb' +DICT_COLS = { + 'obse_id':'id_origine', +} + + +def get_columns_oftable(tab,sch,con): + res = con.dialect.get_columns(con,tab,sch) + return [x['name'] for x in res] + + +def get_columns_dtypes(tab,sch,con,df:pd.DataFrame=None): + res = con.dialect.get_columns(con,tab,sch) + if isinstance(df,pd.DataFrame): + return {x['name']:x['type'] for x in res if x['name'] in df.columns} + else: + return {x['name']:x['type'] for x in res} + + +def match_columns(df,tab,sch,con): + cols_saisie = get_columns_oftable(tab,sch,con) + obs_cols = df.columns + return obs_cols[obs_cols.isin(cols_saisie)] + + +def lower(df): + for c in df.columns : + df[c] = df[c].str.lower() if df[c].apply(type).eq(str).all() else df[c] + return df + + +def get_odat_obse(id_obse:gpd.np.ndarray|list|pd.Series|int=None): + sql = ''' + SELECT + o.odat_obse_id, + c.choi_nom, + l.list_nom, + o.odat_nom + FROM {sch}.rnf_odat o + LEFT JOIN {sch}.rnf_choi c ON c.choi_id = o.odat_choi_id + LEFT JOIN {sch}.rnf_list l ON l.list_id = o.odat_c_list_id + '''.format(sch=postgis_sch) + + if isinstance(id_obse,int) : + sql += ' WHERE odat_obse_id = %i'%id_obse + elif isinstance(id_obse,(list,gpd.pd.Series,gpd.np.ndarray)) : + sql += ' WHERE odat_obse_id IN {lst_id}'.format(lst_id=tuple(id_obse)) + + result = pd.read_sql_query(sql,con_sicen,index_col=['odat_obse_id','list_nom']).replace({r' \(RhoMeO\)':'',r'\?':'',r'[.]':''},regex=True) + + df = lower(result).pivot(columns='choi_nom',values='odat_nom').reset_index(drop=False) + + return format_comportement(df) + + +def format_comportement(oda:pd.DataFrame): + DICT = { + 'exuvie/émergent':'ODO_Exuvie/émergence', + 'mâles+femelles':'ODO_Mâles+Femelles', + 'autre':'ODO_Autre', + 'territoriale':'ODO_Territorial', + 'ponte':'ODO_Ponte', + 'accouplement':'ODO_Tandem', + 'tandem':'ODO_Tandem', + 'aucun':None, + } + return oda.replace(DICT) + + +def format_odat(df,odat): + DICT_COLS = { + 'comportement':'reprostatut', + 'strate':'type_effectif', + 'rmq_odat':'remarque_obs' + } + + + df_odat = df[['obse_id','obse_nombre','obse_nom','cd_nom','ordre']].merge( + odat, + left_on='obse_id',right_on='odat_obse_id' + ) + + nb_cols = odat.columns[odat.columns.str.startswith('nb')] + df_odat['rmq_odat'] = None + for c in nb_cols: + test1 = df_odat.obse_nombre!=df_odat[c] + test2 = df_odat[c].notna() + test3 = df_odat.rmq_odat.notna() + test = test1&test2&test3 + tes = test1&test2&~test3 + if any(tes) or any(test): + tt = '{rmq_o}; {c}:{data}'.format(rmq_o=df_odat[test].rmq_odat,c=c,data=df_odat[test][c]) + df_odat.loc[test,'rmq_odat'] = df_odat[test].rmq_odat+';'+c+':'+df_odat[test][c] + df_odat.loc[tes,'rmq_odat'] = c+':'+df_odat[tes][c] + + return df_odat.rename(columns=DICT_COLS)[['odat_obse_id',*DICT_COLS.values()]] + + + +def get_obs_serena(insicen:bool=None): + sql = ''' + SELECT o.*, + s.site_nom, + s.site_ref_sig, + CASE WHEN t.taxo_id = 203471 THEN (SELECT cd_nom FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT cd_nom FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.cd_nom IS NULL THEN ta.cd_nom::text + ELSE tax.cd_nom + END cd_nom, + CASE WHEN t.taxo_id = 203471 THEN (SELECT phylum FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT phylum FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.phylum IS NULL THEN ta.phylum + ELSE tax.phylum + END phylum, + CASE WHEN t.taxo_id = 203471 THEN (SELECT classe FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT classe FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.classe IS NULL THEN ta.classe + ELSE tax.classe + END classe, + CASE WHEN t.taxo_id = 203471 THEN (SELECT ordre FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT ordre FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.ordre IS NULL THEN ta.ordre + ELSE tax.ordre + END ordre, + CASE WHEN t.taxo_id = 203471 THEN (SELECT famille FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT famille FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.famille IS NULL THEN ta.famille + ELSE tax.famille + END famille, + CASE WHEN t.taxo_id = 203471 THEN (SELECT nom_complet FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT nom_complet FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.nom_complet IS NULL THEN ta.nom_complet + ELSE tax.nom_complet + END nom_complet, + CASE WHEN t.taxo_id = 203471 THEN (SELECT nom_vern FROM inpn.taxref WHERE cd_nom = '54213' ) + WHEN t.taxo_id = 203491 THEN (SELECT nom_vern FROM inpn.taxref WHERE cd_nom = '521494' ) + WHEN tax.nom_vern IS NULL THEN ta.nom_vern + ELSE tax.nom_vern + END nom_vern, + CASE WHEN tmpo.ogll_lon = '999' THEN tmps.sgll_lon::float + ELSE tmpo.ogll_lon::float + END lon, + CASE WHEN tmpo.ogll_lat = '999' THEN tmps.sgll_lat::float + ELSE tmpo.ogll_lat::float + END lat + FROM {sch}.rnf_obse o + LEFT JOIN {sch}.rnf_relv r ON r.relv_id = o.obse_relv_id + LEFT JOIN {sch}.rnf_site s ON s.site_id = o.obse_site_id + LEFT JOIN {sch}.tmp_sgll tmps ON tmps.sgll_site_id = o.obse_site_id + LEFT JOIN {sch}.tmp_ogll tmpo ON tmpo.ogll_obse_id = o.obse_id + LEFT JOIN serenarefe.rnf_taxo t ON t.taxo_id = o.obse_taxo_id + LEFT JOIN inpn.taxons_isere_absents_taxref ta ON ta.id_taxon = o.obse_taxo_id + LEFT JOIN inpn.taxref tax ON t.taxo_mnhn_id = tax.cd_nom::int + --LEFT JOIN {sch}.rnf_odat od ON od.odat_obse_id = o.obse_id + + '''.format(sch=postgis_sch) + + if insicen is not None : + sql_sicen = 'SELECT DISTINCT id_origine FROM saisie.saisie_observation WHERE id_lot=3' + sicen_obs = pd.read_sql_query(sql_sicen,con_sicen) + if insicen is True : + sql += 'WHERE obse_id IN {lst_id}'.format(lst_id=tuple(sicen_obs.id_origine.astype(int))) + if insicen is False : + sql += 'WHERE obse_id NOT IN {lst_id}'.format(lst_id=tuple(sicen_obs.id_origine.astype(int))) + + return pd.read_sql_query(sql,con_sicen)\ + .dropna(axis=1,how='all') + + +def list_releve(id_relv=None,source='postgis'): + if source=='postgis': + + df_choi = pd.read_sql_table('rnf_choi',con_sicen,postgis_sch)\ + .dropna(axis=1,how='all') + df_relv = pd.read_sql_table('rnf_relv',con_sicen,postgis_sch)\ + .dropna(axis=1,how='all') + + # Correction coquille + df_relv.loc[df_relv.relv_categ_choi_id==100800,'relv_categ_choi_id'] = 100008 + + # Jointure relevé / catégorie + df = df_relv.merge(df_choi[['choi_id','choi_nom']],how='inner',left_on='relv_categ_choi_id',right_on='choi_id')\ + .drop(columns=['relv_categ_choi_id']) + + # Mise en forme des dates + lstcol_dat = df.columns[df.columns.str.contains('date')] + for lcd in lstcol_dat: + # df[lcd] = df[lcd].replace({'VIDE':None}) + df.loc[df[lcd]=='2000 à 2001',lcd] = '2000' if '1date' in lcd else '2001' + df[lcd].replace({'VIDE':None},inplace=True) + df[lcd] = pd.to_datetime(df[lcd]) + + if id_relv is not None: + if isinstance(id_relv,int) : filtre = df.relv_id == id_relv + elif isinstance(id_relv,(list,gpd.pd.Series,gpd.np.ndarray)) : filtre = df.relv_id.isin([*id_relv]) + return df[filtre] + else: + return df + + +def get_serena_obs(srce_id=None,source:str='access'): + DICT_STR = { + 'avenir':'cen isère', + 'be':'personnel' + } + if source == 'postgis': + sql = 'SELECT user_id,user_srce_id FROM {sch}.rnf_user'.format(sch=postgis_sch) + if srce_id is not None: + if isinstance(srce_id,int) : + sql += ' WHERE user_srce_id = %i'%srce_id + elif isinstance(srce_id,(list,gpd.pd.Series,gpd.np.ndarray)) : + sql += ' WHERE user_srce_id IN {lst_id}'.format(lst_id=tuple(srce_id)) + return lower(pd.read_sql_query(sql,con_sicen)) + + elif source == 'access': + df = mdb.read_table(path.join(ACCESS_PATH,ACCESS_FILE),'rev_obs') + if srce_id is not None: + if isinstance(srce_id,int) : + df = df[df.rev_observateur==srce_id] + elif isinstance(srce_id,(list,gpd.pd.Series,gpd.np.ndarray)) : + df = df[df.rev_observateur.isin([*srce_id])] + df[['nom','prenom']] = df.nom_observateur.str.split(' ',1,expand=True) + + return lower(df).replace(DICT_STR) + + +def get_sicen_obs(personne_id=None): + sql = 'SELECT id_personne,prenom,nom,id_structure,nom_structure FROM md.personne JOIN md.structure USING (id_structure)'.format(sch=postgis_sch) + if personne_id is not None: + if isinstance(personne_id,int) : + sql += ' WHERE personne_id = %i'%personne_id + elif isinstance(personne_id,str) : + sql += ' WHERE personne_id = %i'%int(personne_id) + elif isinstance(personne_id,(list,gpd.pd.Series,gpd.np.ndarray)) : + sql += ' WHERE personne_id IN {lst_id}'.format(lst_id=tuple(personne_id)) + return lower(pd.read_sql_query(sql,con_sicen)) + +def get_sicen_etude(): + sql = 'SELECT * FROM md.etude' + return pd.read_sql_query(sql, con_sicen) + + +def get_serena_choi(choi_id:gpd.np.ndarray|list|pd.Series|int=None): + '''''' + sql = 'SELECT choi_id,choi_nom FROM {sch}.rnf_choi'.format(sch=postgis_sch) + + if isinstance(choi_id,int) : + sql += ' WHERE choi_id = %i'%choi_id + elif isinstance(choi_id,(list,gpd.pd.Series,gpd.np.ndarray)) : + sql += ' WHERE choi_id IN {lst_id}'.format(lst_id=tuple(choi_id)) + + return lower(pd.read_sql_query(sql,con_sicen)) + + +def get_serena_site(site_id:gpd.np.ndarray|list|pd.Series|int=None): + sql = 'SELECT site_id,site_nom FROM {sch}.rnf_site'.format(sch=postgis_sch) + + if isinstance(site_id,int) : + sql += ' WHERE site_id = %i'%site_id + elif isinstance(site_id,(list,gpd.pd.Series,gpd.np.ndarray)) : + sql += ' WHERE site_id IN {lst_id}'.format(lst_id=tuple(site_id)) + return pd.read_sql_query(sql,con_sicen) + + +def get_sicen_pcol(): + sql = '''SELECT * FROM "md"."protocole";''' + return lower(pd.read_sql_query(sql,con_sicen)) + + +def _crsp(left:pd.DataFrame,right:pd.DataFrame,lefton,righton,idleft,idright): + df = left.merge(right,left_on=lefton,right_on=righton) + return dict(zip(df[idleft],df[idright])) + + +def crsp_relv(relv:pd.Series): + seren_relv = lower(list_releve(relv.unique())) + sicen_relv = lower(get_sicen_etude()) + DICT_RELV_ID = _crsp(left=seren_relv,right=sicen_relv,lefton='relv_nom',righton='nom_etude',idleft='relv_id',idright='id_etude') + return relv.replace(DICT_RELV_ID) + + +def crsp_obs(obs:pd.Series): + obs_seren = get_serena_obs(obs.unique()) + obs_sicen = get_sicen_obs() + DICT_OBS_ID = _crsp(left=obs_seren,right=obs_sicen,lefton=['nom','structure_obs'],righton=['nom','nom_structure'],idleft='rev_observateur',idright='id_personne') + return obs.replace(DICT_OBS_ID) + + +def crsp_pcol(pcol:pd.Series): + pcol_seren = get_serena_choi(pcol.unique()) + pcol_sicen = get_sicen_pcol() + DICT_PCOL_ID = _crsp(left=pcol_seren,right=pcol_sicen,lefton='choi_nom',righton='libelle',idleft='choi_id',idright='id_protocole') + return pcol.replace(DICT_PCOL_ID) + + +def crsp_valid(valid:pd.Series): + pcol_seren = get_serena_choi(valid.unique()) + pcol_sicen = get_sicen_pcol() + DICT_PCOL_ID = _crsp(left=pcol_seren,right=pcol_sicen,lefton='choi_nom',righton='libelle',idleft='choi_id',idright='id_protocole') + return valid.replace(DICT_PCOL_ID) + + +def crsp_abond(abond:pd.Series): + ab_seren = get_serena_choi(abond.dropna().unique()) + DICT_ABOND_ID = dict(zip(ab_seren.choi_id,ab_seren.choi_nom)) + return abond.replace(DICT_ABOND_ID) + + +def crsp_site(site:pd.Series): + sit_seren = get_serena_site(site.dropna().unique()) + DICT_SITE_ID = dict(zip(sit_seren.site_id,sit_seren.site_nom)) + return site.replace(DICT_SITE_ID) + + +def get_structure_id(id_pers:gpd.np.ndarray|list|pd.Series|int=None): + sql = 'SELECT id_personne, id_structure FROM md.personne' + if isinstance(id_pers,int) : + sql += ' WHERE id_personne = %i'%id_pers + elif isinstance(id_pers,(list,gpd.pd.Series,gpd.np.ndarray)) : + sql += ' WHERE id_personne IN {lst_id}'.format(lst_id=tuple(id_pers)) + return pd.read_sql_query(sql,con_sicen) + + +def format_effectif(DF): + df = DF.copy() + isnum = df.effectif_min.str.isnumeric() + df.loc[isnum,'effectif'] = df[isnum].effectif_min.copy() + df.loc[isnum,'effectif_min'] = None + df.loc[~isnum,'effectif_textuel'] = df[~isnum].effectif_min.copy() + df.loc[~isnum,'effectif_min'] = df[~isnum].effectif_textuel.str.split(r'[-+]',expand=True)[0] + df.loc[~isnum,'effectif_max'] = df[~isnum].effectif_textuel.str.split(r'[-+]',expand=True)[1] + return df.replace({'':None}) + +def to_sicen(DF): + + CRSP_COLUMNS = { + 'obse_id':'id_origine', + 'obse_relv_id':'id_etude', + 'obse_obsv_id':'observateur', + 'obse_detm_id':'validateur', + 'obse_date':'date_obs', + # 'obse_site_id':'localisation', + 'obse_pcole_choi_id':'id_protocole', + 'obse_validat_choi_id':'statut_validation', + 'obse_confid_choi_id':'diffusable', + 'obse_abond_choi_id':'effectif_textuel', + 'obse_nombre':'effectif_min', + 'site_nom':'localisation', + 'site_ref_sig':'id_waypoint', + 'lat':'latitude', + 'lon':'longitude', + } + + df = DF.copy() + if df.geometry.name != 'geometrie' : + df.rename_geometry('geometrie',inplace=True) + + df.obse_relv_id = crsp_relv(df.obse_relv_id) + df.obse_obsv_id = crsp_obs(df.obse_obsv_id) + df.obse_detm_id = crsp_obs(df.obse_detm_id).astype(int) + df.obse_date = pd.to_datetime(df.obse_date) + # df.obse_site_id = crsp_site(df.obse_site_id) + df.obse_pcole_choi_id = crsp_pcol(df.obse_pcole_choi_id) + df.loc[df.obse_validat_choi_id==100373,'obse_validat_choi_id'] = 'validée' + df.loc[df.obse_confid_choi_id==100473,'obse_confid_choi_id'] = True + df.obse_abond_choi_id = crsp_abond(df.obse_abond_choi_id) + df.rename(columns=CRSP_COLUMNS,inplace=True) + + # Tag du lot SERENA + df['id_lot'] = 3 + + # Jointure des id_structure + struct = get_structure_id(df.observateur.unique()) + df = df.merge(struct,how='left',left_on='observateur', right_on='id_personne')\ + .rename(columns={'id_structure':'structure'}) + cols = match_columns(df,'saisie_observation','saisie',con_sicen) + + + # Formatage des effectifs + return format_effectif(df[cols]) + + +def drop_cdnom_missing(df): + isna = df.cd_nom.isna() + idx_na = df[isna].index + return df.drop(idx_na) + + + +if __name__ == "__main__": + + obs_serena = get_obs_serena(insicen=True) + isna = obs_serena.cd_nom.isna() + + # Recréation des géométries + gdf_obs = obs_serena.set_geometry(gpd.points_from_xy(obs_serena.lon,obs_serena.lat)) + gdf_obs.set_crs(4326,inplace=True) + gdf_obs.to_crs(2154,inplace=True) + + # Identification des données RhoMeo + is_rhomeo = gdf_obs.obse_nom.str.contains('rhoméo',na=False,case=False) + gdf_obs.loc[is_rhomeo,['obse_id','obse_nom','obse_date','cd_nom']] + obs_rhomeo = gdf_obs[is_rhomeo]\ + .dropna(axis=1,how='all')\ + .drop(columns=['obse_habi_id']) + rhoisna = obs_rhomeo.cd_nom.isna() + obs_rhomeo = drop_cdnom_missing(obs_rhomeo) + + odat = get_odat_obse(obs_rhomeo.obse_id) + + OBS_RHOMEO = obs_rhomeo.merge( + format_odat(obs_rhomeo,odat), + left_on='obse_id',right_on='odat_obse_id',how='left' + ).drop(columns='odat_obse_id') + + OBS_RHOMEO.to_file('/home/colas/Documents/9_PROJETS/4_SICEN/RECOVERY/rhomeo_data_notin_sicen.gpkg') + + RES_OBS = to_sicen(OBS_RHOMEO) + RES_OBS.to_postgis( + 'saisie_observation', + con_sicen, + 'saisie', + if_exists='append', + index=False, + dtype=get_columns_dtypes( + 'saisie_observation', + 'saisie', + con_sicen, + RES_OBS + ) + ) + # .to_file('/home/colas/Documents/9_PROJETS/4_SICEN/RECOVERY/rhomeo_data_notin_sicen.gpkg') + + + update_to_sql( + RES_OBS[['id_lot','id_origine','effectif','effectif_min']], + con_sicen, + 'saisie_observation', + 'saisie', + ['id_lot','id_origine'], + dtype=get_columns_dtypes( + 'saisie_observation', + 'saisie', + con_sicen, + RES_OBS[['id_lot','id_origine','effectif','effectif_min']] + ) + ) + + + # Récupération des relevés + id_relvrho = gdf_obs[is_rhomeo].obse_relv_id.unique() + relv = list_releve(id_relv=id_relvrho) + relv_rh = relv.relv_nom diff --git a/1_SICEN/extract_chiro.py b/1_SICEN/extract_chiro.py new file mode 100644 index 0000000..8923fae --- /dev/null +++ b/1_SICEN/extract_chiro.py @@ -0,0 +1,39 @@ +import pycen +import geopandas as gpd + +sql = 'SELECT * FROM saisie.saisie_observation' +df = gpd.read_postgis(sql,pycen.con_sicen,geom_col="geometrie") + +sql = 'SELECT * FROM saisie.v_synthese_invertebre' +v_inv = gpd.read_postgis(sql,pycen.con_sicen,geom_col="geom") +v_inv.date_obs = gpd.pd.to_datetime(v_inv.date_obs) + +v_inv2021 = v_inv[v_inv.date_obs > '2021-01-01'] + +sql = 'SELECT * FROM saisie.v_synthese_vertebre' +v_vert = gpd.read_postgis(sql,pycen.con_sicen,geom_col="geom") +v_vert.date_obs = gpd.pd.to_datetime(v_vert.date_obs) +chiro = v_vert[v_vert.ordre=='Chiroptera'] + +dict_obs_pu = [ + '{"JUTON Mathieu"}', '{"VEILLET Bruno"}', + '{"SUCHET Patrick","VEILLET Bruno"}', + '{"SUCHET Patrick"}', + '{"BIRON Nicolas"}', + '{"PASQUIER Guillaume"}', + '{"LUCAS Jérémie"}', + '{"JUTON Mathieu","PASQUIER Guillaume"}', + '{"BIRON Nicolas","BONHOMME Baptiste","GROSSI Jean-Luc","JUTON Mathieu"}', + '{"DELOCHE Denis","BEGUIN Lucile","JUTON Mathieu"}', + '{"BERENGER Myrtille"}','{"VINCENT Stéphane"}' +] +chiro[( + (chiro.diffusable=="oui") | + (chiro.observateurs.isin(dict_obs_pu)) | + chiro.rmq_localisation.str.contains('Camp de Chambaran',case=False))] + +# IDEM +# chiro[( +# (chiro.diffusable=="oui") | +# (chiro.observateurs.isin(dict_obs_pu)) | +# (chiro.rmq_localisation=='Camp de Chambaran'))] \ No newline at end of file diff --git a/1_SICEN/extract_obs.py b/1_SICEN/extract_obs.py new file mode 100644 index 0000000..c25a440 --- /dev/null +++ b/1_SICEN/extract_obs.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine #, text +from sqlalchemy.engine import URL +import geopandas as gpd +from pycen.zh import zh +zh = zh() + +file = '/home/colas/Documents/9_PROJETS/1_ZH/SIMBY_zonage_Belledonne.txt' +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +if file : + lst_site = gpd.pd.read_csv(file) + lst_site = [*lst_site.site] + +sql = """ + SELECT * FROM zones_humides.v_zoneshumides +""" +sql = sql + " WHERE site_code IN ('%s')" % "','".join(lst_site) +VZH = gpd.read_postgis(sql,zh.con,geom_col='geom') + +sql = """ + SELECT * FROM saisie.saisie_observation s + WHERE ST_Intersects(s.geometrie,'SRID=2154;%s') +""" % VZH.unary_union + +saisie = gpd.read_postgis(sql,con,geom_col='geometrie') + +tmp = gpd.sjoin(VZH[['site_code','geom']], saisie) +ss = tmp[['site_code', + 'phylum','regne','classe','ordre','famille', + 'nom_valide','nom_vern','nom_complet','cd_nom']].copy() +ss.drop_duplicates(inplace=True) +ss.nom_valide = ss.nom_valide.replace([' \(#ref\)',],[''],regex=True) +ss.nom_complet = ss.nom_complet.replace([' \(#ref\)',],[''],regex=True) +ss.sort_values('nom_valide', inplace=True) +sp_enjeux = [ + 'Carex brunnescens (Pers.) Poir., 1813', + 'Coenagrion mercuriale (Charpentier, 1840)', + 'Drosera rotundifolia L., 1753', + 'Juncus squarrosus L., 1753', + 'Lycopodium annotinum L., 1753', + 'Lycopodium clavatum L., 1753', + 'Neottia cordata (L.) Rich., 1817', + 'Pinguicula grandiflora subsp. rosea (Mutel) Casper, 1962', + # 'Pinguicula grandiflora Lam., 1789', + 'Pinguicula grandiflora subsp. grandiflora Lam., 1789', +] +ss.nom_valide.unique() + +ss[ss.nom_valide.isin(sp_enjeux)].to_excel('/home/colas/Documents/tmp/espèced_enjeux_belledonne.xlsx') +ss[ss.nom_complet.isin(sp_enjeux)].to_file('/home/colas/Documents/tmp/espèced_enjeux_belledonne.xlsx') + + + diff --git a/1_SICEN/extract_obs_bysites_toGeonature.py b/1_SICEN/extract_obs_bysites_toGeonature.py new file mode 100644 index 0000000..ee283d3 --- /dev/null +++ b/1_SICEN/extract_obs_bysites_toGeonature.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# -*- coding:UTF-8 -*- + + +from sqlalchemy import create_engine #, text +from sqlalchemy.engine import URL +import geopandas as gpd +import pandas as pd + +# HOMER +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +bd_si = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=bd_si, +) +con = create_engine(url) + +# BART +bd_38 = 'bd-cen-38' +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '192.168.0.189' +port = '5432' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=bd_38, +) +con38 = create_engine(url) + +sql = 'SELECT * FROM sites.c_sites_zonages' +site = gpd.read_postgis(sql,con38,geom_col='geom') +site.sort_values(['code_site','surface_ha'], inplace=True) +site.reset_index(inplace=True,drop=True) +site.drop_duplicates(subset=['code_site'],keep='last', inplace=True) + + +cod = 'ARSE' +for cod in site.code_site: + print(cod) + sql = """ + SELECT * FROM saisie.vm_synthese_observations + WHERE ST_Intersects(geom,'SRID=2154;%s') + """ % site[site.code_site==cod].geom.values[0] + obs = gpd.read_postgis(sql,con,geom_col='geom') + + if obs.empty: + continue + + sql = """ + SELECT operation op, date_operation date_op, utilisateur user_op, id_obs FROM saisie.suivi_saisie_observation + WHERE operation = 'INSERT' AND id_obs IN (%s) + """ % ",".join(obs.id_obs.astype(str)) + ins_bos = pd.read_sql_query( + sql,con + ) + if ins_bos.empty: pass + else: + ins_bos['heure_op'] = ins_bos.date_op.dt.time + ins_bos.date_op = ins_bos.date_op.dt.date + ins_bos.date_op = ins_bos.date_op.astype(str) + ins_bos.heure_op = ins_bos.heure_op.astype(str) + + obs = pd.merge(ins_bos,obs, on=['id_obs'], how='right') + obs.structures = [' & '.join(i) for i in obs.structures] + obs.observateurs = [' & '.join(i) for i in obs.observateurs] + obs.date_obs = obs.date_obs.astype(str) + obs.date_debut_obs = obs.date_debut_obs.astype(str) + obs.date_fin_obs = obs.date_fin_obs.astype(str) + obs.heure_obs = obs.heure_obs.astype(str) + obs.set_index('id_obs', inplace=True) + + output = '/home/colas/Documents/tmp/obsbysite/%s.xlsx' % cod + with pd.ExcelWriter(output) as writer: + obs[obs.structures=='CEN Isère'].to_excel(writer,sheet_name='CEN Isère', index=True) + obs[obs.structures!='CEN Isère'].to_excel(writer,sheet_name='Partenaire', index=True) + writer.save() + + diff --git a/1_SICEN/import_dataTOsicenIMPORT.py b/1_SICEN/import_dataTOsicenIMPORT.py new file mode 100644 index 0000000..ac21331 --- /dev/null +++ b/1_SICEN/import_dataTOsicenIMPORT.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine #, text +from sqlalchemy.engine import URL +from sqlalchemy.types import Date,Time,String +from shapely.geometry import Point +import pandas as pd +import datetime + +file = '/home/colas/Documents/9_PROJETS/4_SICEN/IMPORT/Tableau Saisie_FauneFlore_AG_30_03_22.xlsx' +# file = '/home/colas/Documents/9_PROJETS/4_SICEN/LPO/cdnom_idvisionature.csv' +to_table = 'import_data_cen38' +# to_table = 'taxons_fauneisere_biolovision_taxref' + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +df = pd.read_excel( + file, + skiprows=[1,2,3,4] +) +if 'nom de champ' in df.columns: + del df['nom de champ'] +# df = pd.read_csv(file,sep=',') +df.columns = df.columns.str.strip().str.lower() +df.dropna(subset=['date_obs'],inplace=True) +df.drop_duplicates(inplace=True) + + +date_default = datetime.datetime(1899, 12, 30, 0, 0) +# data_import.loc[data_import['time_start'] == date_default,'time_start'] = datetime.time(0,0) +# data_import.loc[data_import['time_stop'] == date_default,'time_stop'] = datetime.time(0,0) +# data_import.loc[data_import['time_start'] == '1899-12-30 00:00:00','time_start'] = '00:00:00' +# data_import.loc[data_import['time_stop'] == '1899-12-30 00:00:00','time_stop'] = '00:00:00' +# df.loc[df['horaire'] == date_default,'horaire'] = datetime.time(0,0) +df.date_obs = df.date_obs.dt.date + +if to_table == 'import_data_cen38': + dtype = { + 'date_obs':Date, + 'date_debut_obs':Date, + 'date_fin_obs':Date, + 'heure':Time, + 'strate':String, + 'phenologie':String, + 'cd_nom':String, + 'sexe':String, + 'precision':String} +else : dtype = None + + +df.to_sql( + name=to_table, + con = con, + schema='import', + index=False, + if_exists='replace', + method='multi', + dtype=dtype +) + +sql_grant = """ +GRANT ALL ON TABLE import.%s TO grp_admin +""" % to_table + +with con.begin() as cnx: + cnx.execute(sql_grant) diff --git a/1_SICEN/import_donneesaisieTOsicen.py b/1_SICEN/import_donneesaisieTOsicen.py new file mode 100644 index 0000000..09ecedc --- /dev/null +++ b/1_SICEN/import_donneesaisieTOsicen.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + +nom_numerateur = 'GEIER' +pnom_numerator = 'Colas' +today = dt.now().date().isoformat() +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +nom_numerateur = nom_numerateur.upper() +pnom_numerator = pnom_numerator[0].upper() + pnom_numerator[1:] +query_get_numerator = """ + SELECT * FROM md.personne + WHERE nom = '%s' + AND prenom = '%s' + AND id_structure = 1 +""" % (nom_numerateur,pnom_numerator) +numerateur = pd.read_sql_query( + sql = query_get_numerator, + con = con +) + + +select_import = """ +SELECT * FROM import.import_data_cen38; +""" +data_import = pd.read_sql_query( + sql = select_import, + con = con) + + +check_datageom = """ +DROP VIEW IF EXISTS import.test_geom_prestation; +CREATE OR REPLACE VIEW import.test_geom_prestation as ( +SELECT ROW_NUMBER() OVER (ORDER BY date_obs ASC) AS gid, localisation, +CASE WHEN latitude::numeric > 6000000 + THEN ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',2154) -- coordonnées en lambert93 + WHEN latitude::numeric > 50 AND latitude::numeric < 3000000 + THEN ST_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',27572),2154) -- coordonnées en Lambert II étendu + --WHEN latitude is null THEN st_pointonsurface(b.geom) -- pas de coordonnées XY on récupère la geom du centroide du site CenRA si elle existe et si une partie du nom du site est renseignée dans la colonne LOCALISATION + ELSE ST_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',4326),2154) -- coordonnées en WGS84 +END as geometrie +FROM import.import_data_cen38 + --LEFT JOIN import.sites_serena_cren_mu_2015 b ON lower(b.nom_site) like '%'||lower(trim(localisation,'s'))||'%' -- on joint la table des sites CenRA pour récupérer éventuellement le centroide d'un site si pas de XY +WHERE cd_nom is not null and (date_obs is not null or date_debut_obs is not null)) ; -- pour ignorer les lignes vides qui traineraient dans le csv +SELECT * FROM import.test_geom_prestation; +""" +df_check_datageom = gpd.read_postgis( + sql = text(check_datageom), + con = con, + geom_col='geometrie') +if df_check_datageom.geometry.is_valid.all() \ + and df_check_datageom.crs.srs == 'epsg:2154': + drop_datageom = """DROP VIEW IF EXISTS import.test_geom_prestation;""" + with con.begin() as cnx: + cnx.execute(drop_datageom) + + +create_table_pers = """ +DROP TABLE IF EXISTS md.temp_import_personne; +CREATE TABLE md.temp_import_personne AS + +WITH personnel AS ( + SELECT a.id_personne,a.nom,a.prenom,b.id_structure,b.nom_structure + FROM md.personne a + JOIN md.structure b USING (id_structure) +) +SELECT DISTINCT + -- personne + CASE WHEN observateur2 is null THEN (c.nom||' '||c.prenom)::text + WHEN observateur2 is not null AND observateur3 is null THEN (c.nom||' '||c.prenom)||'&'||(d.nom||' '||d.prenom) + WHEN observateur3 is not null AND observateur4 is null THEN (c.nom||' '||c.prenom)||'&'||(d.nom||' '||d.prenom)||'&'||(e.nom||' '||e.prenom) + WHEN observateur4 is not null AND observateur5 is null THEN (c.nom||' '||c.prenom)||'&'||(d.nom||' '||d.prenom)||'&'||(e.nom||' '||e.prenom)||'&'||(f.nom||' '||f.prenom) + + ELSE c.nom_structure||'&'||d.nom_structure||'&'||(e.nom||' '||e.prenom)||'&'||(f.nom||' '||f.prenom)||'&'||(g.nom||' '||g.prenom) + END AS personne, + + -- nom_structure + CASE WHEN observateur2 is null THEN c.nom_structure::text + WHEN observateur2 is not null AND observateur3 is null AND c.nom_structure <> d.nom_structure + THEN c.nom_structure||'&'||d.nom_structure + WHEN observateur2 is not null AND observateur3 is null AND c.nom_structure = d.nom_structure + THEN c.nom_structure + + WHEN observateur3 is not null AND observateur4 is null AND c.nom_structure <> d.nom_structure AND d.nom_structure <> e.nom_structure + AND c.nom_structure <> e.nom_structure + THEN c.nom_structure||'&'||d.nom_structure||'&'||e.nom_structure + WHEN observateur3 is not null AND observateur4 is null AND c.nom_structure = d.nom_structure AND d.nom_structure <> e.nom_structure + THEN c.nom_structure||'&'||e.nom_structure + WHEN observateur3 is not null AND observateur4 is null AND c.nom_structure <> d.nom_structure AND d.nom_structure = e.nom_structure + THEN c.nom_structure||'&'||d.nom_structure + + WHEN observateur4 is not null AND observateur5 is null AND c.nom_structure <> d.nom_structure + THEN c.nom_structure||'&'||d.nom_structure||'&'||e.nom_structure||'&'||f.nom_structure + + ELSE c.nom_structure||'&'||d.nom_structure||'&'||e.nom_structure||'&'||f.nom_structure||'&'||g.nom_structure + END AS nom_structure, + + -- observateur + CASE WHEN observateur2 is null THEN c.id_personne::text + WHEN observateur2 is not null AND observateur3 is null THEN c.id_personne||'&'||d.id_personne + WHEN observateur3 is not null AND observateur4 is null THEN c.id_personne||'&'||d.id_personne||'&'||e.id_personne + WHEN observateur4 is not null AND observateur5 is null THEN c.id_personne||'&'||d.id_personne||'&'||e.id_personne||'&'||f.id_personne + + ELSE c.id_personne||'&'||d.id_personne||'&'||e.id_personne||'&'||f.id_personne||'&'||g.id_personne + END AS observateur, + structure +FROM import.import_data_cen38 a + LEFT JOIN personnel c ON (c.nom||' '||c.prenom) = trim(a.observateur1::text) AND lower(c.nom_structure) = lower(a.structure) + LEFT JOIN personnel d ON (d.nom||' '||d.prenom) = trim(a.observateur2::text) AND lower(d.nom_structure) = lower(a.structure) + LEFT JOIN personnel e ON (e.nom||' '||e.prenom) = trim(a.observateur3::text) AND lower(e.nom_structure) = lower(a.structure) + LEFT JOIN personnel f ON (f.nom||' '||f.prenom) = trim(a.observateur4::text) AND lower(f.nom_structure) = lower(a.structure) + LEFT JOIN personnel g ON (g.nom||' '||g.prenom) = trim(a.observateur5::text) AND lower(g.nom_structure) = lower(a.structure) +; +""".format(id_creator = numerateur.id_personne.item()) +with con.begin() as cnx: + cnx.execute(create_table_pers) + +select_pers = 'SELECT * FROM md.temp_import_personne' +df_pers = pd.read_sql_query( + sql = select_pers, + con = con) + + + + +with_dataTOinsert = """ +WITH personnel AS ( + SELECT a.id_personne,a.nom,a.prenom,b.id_structure,b.nom_structure + FROM md.personne a + JOIN md.structure b USING (id_structure) +), +t as ( +SELECT + CASE WHEN a.heure IS NULL THEN '00:00:00' + ELSE a.heure END heure, + a.date_obs, a.date_debut_obs, a.date_fin_obs, a.duree as date_textuelle, + CASE WHEN b.regne IS NULL THEN bb.regne + ELSE b.regne END regne, + CASE WHEN b.phylum IS NULL THEN bb.phylum + ELSE b.phylum END phylum, + CASE WHEN b.classe IS NULL THEN bb.classe + ELSE b.classe END classe, + CASE WHEN b.ordre IS NULL THEN bb.ordre + ELSE b.ordre END ordre, + CASE WHEN b.famille IS NULL THEN bb.famille + ELSE b.famille END famille, + CASE WHEN b.nom_valide IS NULL THEN bb.nom_valide + ELSE b.nom_valide END nom_valide, + CASE WHEN b.nom_vern IS NULL THEN bb.nom_vern + ELSE b.nom_vern END nom_vern, + CASE WHEN b.nom_complet IS NULL THEN bb.nom_complet + ELSE b.nom_complet END nom_complet, + CASE WHEN b.cd_nom IS NULL THEN bb.cd_nom::text + ELSE b.cd_nom END cd_nom, + a.abondance_dominance as effectif_textuel, a.effectif_min, a.effectif_max, + CASE WHEN strate::text is not null THEN strate::text + ELSE age::text END AS type_effectif, + CASE WHEN phenologie is not null THEN phenologie + ELSE sexe END AS phenologie, + + CASE WHEN latitude::numeric > 6000000 + THEN ST_X(st_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',2154),4326)) -- coordonnée X en lambert93 + WHEN latitude::numeric > 50 AND latitude::numeric < 3000000 + THEN ST_X(st_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',27572),4326)) -- coordonnées en Lambert II étendu + -- WHEN latitude is null + -- THEN ST_X(st_transform(st_pointonsurface(i.geom),4326)) -- pas de coordonnées XY on récupère la geom du centroide du site CenRA si elle existe et si une partie du nom du site est renseignée dans la colonne LOCALISATION + ELSE longitude::double precision -- coordonnées en WGS84 + END as longitude, + + CASE WHEN latitude::numeric > 6000000 + THEN ST_Y(st_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',2154),4326)) -- coordonnée X en lambert93 + WHEN latitude::numeric > 50 AND latitude::numeric < 3000000 + THEN ST_Y(st_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',27572),4326)) -- coordonnées en Lambert II étendu + -- WHEN latitude is null + -- THEN ST_Y(st_transform(st_pointonsurface(i.geom),4326)) -- pas de coordonnées XY on récupère la geom du centroide du site CenRA si elle existe et si une partie du nom du site est renseignée dans la colonne LOCALISATION + ELSE latitude::double precision + END as latitude, + localisation, + + CASE WHEN observateur2 is null THEN c.id_personne::text + WHEN observateur2 is not null AND observateur3 is null THEN c.id_personne||'&'||d.id_personne + WHEN observateur3 is not null AND observateur4 is null THEN c.id_personne||'&'||d.id_personne||'&'||e.id_personne + WHEN observateur4 is not null AND observateur5 is null THEN c.id_personne||'&'||d.id_personne||'&'||e.id_personne||'&'||f.id_personne + + ELSE c.id_personne||'&'||d.id_personne||'&'||e.id_personne||'&'||f.id_personne||'&'||g.id_personne + END AS observateur, + {id_numerateur}::integer as numerisateur, -- adapter l'identifiant du numérisateur en fonction de la personne qui réalise l'import en masse + + -- cas d'une prestation on ajoute le CenRA à la structure de l'observateur '&1' sinon il faut supprimer + CASE WHEN h.id_structure <> 1 THEN (h.id_structure||'&1')::text + ELSE h.id_structure::text END as structure, + remarque_obs, + CASE WHEN lower(a.structure) like '%CEN Isère%' THEN true::boolean + ELSE false::boolean END as diffusable, + precision::saisie.enum_precision as precision, + -- CASE + -- WHEN latitude is null AND round(st_maxdistance(st_PointOnSurface(i.geom), st_convexhull(i.geom))::numeric,-2) < 101 THEN '10 à 100m'::saisie.enum_precision + -- WHEN latitude is null AND round(st_maxdistance(st_PointOnSurface(i.geom), st_convexhull(i.geom))::numeric,-2) < 501 AND round(st_maxdistance(ST_PointOnSurface(i.geom), st_convexhull(i.geom))::numeric,-2) > 100 THEN '100 à 500m'::saisie.enum_precision + -- WHEN latitude is null AND round(st_maxdistance(st_PointOnSurface(i.geom), st_convexhull(i.geom))::numeric,-2) < 1001 AND round(st_maxdistance(ST_PointOnSurface(i.geom), st_convexhull(i.geom))::numeric,-2) > 500 THEN '500 à 1000m'::saisie.enum_precision + -- WHEN latitude is null AND round(st_maxdistance(st_PointOnSurface(i.geom), st_convexhull(i.geom))::numeric,-2) > 1001 THEN '> 1000m'::saisie.enum_precision + -- ELSE precision::saisie.enum_precision + -- END as precision, + 'validée'::saisie.enum_statut_validation as statut_validation, + j.id_etude, + k.id_protocole, + effectif::integer, + CASE WHEN statut_repro::text IS NOT NULL THEN (statut_repro::text)::saisie.enum_reprostatut + ELSE NULL END reprostatut, + + CASE WHEN latitude::numeric > 6000000 + THEN ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',2154) -- coordonnées en lambert93 + WHEN latitude::numeric > 50 AND latitude::numeric < 3000000 + THEN ST_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',27572),2154) -- coordonnées en Lambert II étendu + -- WHEN latitude is null + -- THEN st_pointonsurface(i.geom) -- pas de coordonnées XY on récupère la geom du centroide du site CenRA si elle existe et si une partie du nom du site est renseignée dans la colonne LOCALISATION + ELSE ST_transform(ST_GeomFromText('POINT('||longitude::double precision||' '||latitude::double precision||')',4326),2154) -- coordonnées en WGS84 + END as geometrie, + + code_releve, + CASE WHEN obs_null::text ilike 'oui' THEN true::boolean + ELSE false::boolean END as obs_null + +FROM import.import_data_cen38 a + LEFT JOIN inpn.taxref b ON b.cd_nom::text = a.cd_nom::text + LEFT JOIN inpn.taxref bb ON bb.nom_complet::text = a.nom_complet::text + LEFT JOIN personnel c ON (c.nom||' '||c.prenom) = trim(a.observateur1::text) AND lower(c.nom_structure) = lower(a.structure) + LEFT JOIN personnel d ON (d.nom||' '||d.prenom) = trim(a.observateur2::text) AND lower(d.nom_structure) = lower(a.structure) + LEFT JOIN personnel e ON (e.nom||' '||e.prenom) = trim(a.observateur3::text) AND lower(e.nom_structure) = lower(a.structure) + LEFT JOIN personnel f ON (f.nom||' '||f.prenom) = trim(a.observateur4::text) AND lower(f.nom_structure) = lower(a.structure) + LEFT JOIN personnel g ON (g.nom||' '||g.prenom) = trim(a.observateur5::text) AND lower(g.nom_structure) = lower(a.structure) + JOIN md.structure h ON lower(h.nom_structure) = lower(replace(replace(a.structure,' & ','&'),'CEN38&','')) + LEFT JOIN md.etude j on a.id_etude = j.nom_etude + LEFT JOIN md.protocole k USING (id_protocole) + --LEFT JOIN import.sites_serena_cren_mu_2015 i ON lower(i.nom_site) like '%'||lower(trim(localisation,'s'))||'%' -- on joint la table des sites CenRA pour récupérer éventuellement le centroide d'un site si pas de XY + --JOIN md.etude i ON i.nom_etude = a.id_etude -- à utiliser si les champs id_etude et id_protocole contiennent le libelle plutôt que l'identifiant, modifier aussi au-dessus id_etude en i.id_etude, id_protocole en j.id_protocole + --JOIN md.protocole j ON j.libelle = a.id_protocole +) +""".format(id_numerateur=numerateur.id_personne.item()) + +select_dataTOinsert = """ +SELECT DISTINCT + heure::TIME WITHOUT TIME ZONE as heure_obs, date_obs, date_debut_obs, date_fin_obs, date_textuelle, + regne, nom_vern, nom_complet, cd_nom, effectif_textuel, + effectif_min, effectif_max, type_effectif, phenologie, + longitude, latitude, + localisation, + observateur, numerisateur, + structure, remarque_obs, + c.code_insee, + diffusable, "precision", + statut_validation, + id_etude, id_protocole, + effectif, reprostatut, + t.geometrie, + code_releve id_waypoint, + obs_null +FROM t, ign_bd_topo.commune c +WHERE ST_intersects(t.geometrie ,c.geometrie) +--ORDER BY nom_complet, date_obs, longitude, latitude +; +""" +df_data_insert = gpd.read_postgis( + sql = text(with_dataTOinsert+select_dataTOinsert), + con = con, + geom_col='geometrie') +# pd.read_sql_query( +# sql = text(with_dataTOinsert+select_dataTOinsert), +# con = con,) + + + + + + +if df_data_insert.shape[0] == data_import.shape[0]: + insert_data = """ + INSERT INTO saisie.saisie_observation( + heure_obs, date_obs, date_debut_obs, date_fin_obs, date_textuelle, regne, nom_vern, nom_complet, cd_nom, effectif_textuel, + effectif_min, effectif_max, type_effectif, phenologie, longitude, latitude, localisation, observateur, numerisateur, structure, remarque_obs, code_insee, diffusable, "precision", + statut_validation, id_etude, id_protocole, effectif, reprostatut, geometrie, id_waypoint, obs_null) + """ + with con.begin() as cnx: + cnx.execute( + text(with_dataTOinsert+ + insert_data+ + select_dataTOinsert) + ) + update_structure_name = """ + UPDATE saisie.suivi_saisie_observation a + SET utilisateur = REPLACE(utilisateur, 'inconnu', '{email_numerator}') + WHERE date_operation > '{today}' + ; + """.format( + email_numerator=numerateur.email.item(), + today = today + ) + drop_temp_table = """ + DROP TABLE IF EXISTS md.temp_import_personne; + """ + with con.begin() as cnx: + cnx.execute(update_structure_name) + cnx.execute(drop_temp_table) +else : + print(data_import[~data_import.index.isin(df_data_insert.index)]) + data_import[~data_import.index.isin(data_import.drop_duplicates().index)] + + + +check_datageom = """ +DROP VIEW IF EXISTS import.test_geom_prestation; +""" +with con.begin() as cnx: + cnx.execute(check_datageom) diff --git a/1_SICEN/import_fauneisereTOsicen (biolovision).py b/1_SICEN/import_fauneisereTOsicen (biolovision).py new file mode 100644 index 0000000..22fa97a --- /dev/null +++ b/1_SICEN/import_fauneisereTOsicen (biolovision).py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + +nom_numerateur = 'GEIER' +pnom_numerator = 'Colas' +today = dt.now().date().isoformat() +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +nom_numerateur = nom_numerateur.upper() +pnom_numerator = pnom_numerator[0].upper() + pnom_numerator[1:] +query_get_numerator = """ + SELECT * FROM md.personne + WHERE nom = '%s' + AND prenom = '%s' + AND id_structure = 1 +""" % (nom_numerateur,pnom_numerator) +numerateur = pd.read_sql_query( + sql = query_get_numerator, + con = con +) + + +select_import = """ +SELECT * FROM import.import_data_fauneisere_biolovision; +""" +data_import = pd.read_sql_query( + sql = select_import, + con = con) + + +verif_taxref = ''' +SELECT CASE WHEN c.taxref_cdnom IS NULL THEN bb.cd_nom + ELSE c.taxref_cdnom END cdnom_taxref, + CASE WHEN c.cd_nom IS NULL THEN cc.cd_nom + ELSE c.cd_nom END cd_nom, + CASE WHEN c.cd_ref IS NULL THEN cc.cd_ref + ELSE c.cd_ref END cd_ref, + a.ref, a."id espèce biolovision", a."nom espèce", a."nom latin", + a.date, a.jour, a.mois, a.année, a."jour de l'année", a.pentade, a."décade", + a."numéro de la semaine", a."heure début", a."heure de début", + a."minute de début", a."heure fin", a."heure de fin", a."minute de fin", a.horaire, + --a.id_place, a.commune, a.municipality, a.county, a.country, "code insee", a.altitude, + a."lat (wgs84)", a."lon (wgs84)", a."type de localisation", + a.estimation, a.nombre, a."détails", a."code atlas", a.comportement, + a."prénom", a.nom, a."abréviation personne morale", + --a.search_export_species_sheet_protection_nat, a.search_export_entity_full_name, a.search_export_entity_tra_full_name, + a.search_export_species_sheet_dir_oiseaux, a.search_export_species_sheet_dir_habitat, + a.search_export_species_sheet_uicn, a.search_export_species_sheet_lr_nat, a.search_export_species_sheet_znieff, + a.search_export_species_sheet_id_fnat, + --a."contient des détails mortalité", a.protocol, + a.remarque, a."remarque privée",a."date d'insertion", a."date de dernière modification" + FROM import.import_data_fauneisere_biolovision a + LEFT JOIN ( + SELECT DISTINCT b.taxref_cdnom, b.id_visio, t.* + FROM ref_lpo.taxons_fauneisere_biolovision_taxref b + JOIN inpn.taxref t ON t.cd_nom::integer = b.taxref_cdnom) c + ON a."id espèce biolovision" = c.id_visio + LEFT JOIN (inpn.taxons_isere_absents_taxref bb + JOIN inpn.taxref cc ON cc.cd_nom::integer = bb.cd_nom) + ON a."id espèce biolovision" = bb.id_taxon + ; +''' +df_verif = pd.read_sql_query( + sql = verif_taxref, + con = con) + + +if df_verif.shape[0] != data_import.shape[0]: + print('shape data_import : %s') % data_import.shape[0] + print('shape taxref_verif : %s') % df_verif.shape[0] + raise Exception('ERROR : correspondance with TAXREF invalid !') + +# df_verif[df_verif.cd_nom.isna()][["id espèce biolovision","nom espèce","nom latin"]].drop_duplicates() +# data_import[~data_import.id_species.isin(df_verif.id_species)] \ +# [['name_species','latin_species','id_species']].drop_duplicates() + + + +update_structure_name = """ +UPDATE import.import_data_fauneisere_biolovision a +SET "abréviation personne morale" = REPLACE("abréviation personne morale", 'CEN38', 'CEN Isère'); +""" +with con.begin() as cnx: + cnx.execute(update_structure_name) + + + +create_table_pers = """ +DROP TABLE IF EXISTS md.temp_import_personne; +CREATE TABLE md.temp_import_personne AS + +WITH nom_complet_identifies AS +(SELECT DISTINCT + ltrim(unnest(string_to_array(a."prénom",'&'))) as prenom, + ltrim(unnest(CASE WHEN "abréviation personne morale" IN ('CEN Isère','-') THEN string_to_array(''|| upper(split_part(a.nom,' /',1)),'&') + ELSE string_to_array(upper(split_part(a.nom,' (',1)),'&') END)) as nom, + CASE WHEN "abréviation personne morale" ILIKE '-' THEN 'CEN Isère' + ELSE "abréviation personne morale" END as structure, + a.nom || ' ' || a."prénom" as full_name + FROM import.import_data_fauneisere_biolovision a + ORDER BY full_name), + +personne_sicen AS +(SELECT a.id_personne, prenom, nom, nom || ' ' || prenom as nom_complet, role, specialite, + a.id_structure, b.nom_structure + FROM md.personne a + JOIN md.structure b ON a.id_structure = b.id_structure + --WHERE personne.id_personne BETWEEN 100000 AND 999998 +) + +SELECT DISTINCT + CASE WHEN b.id_personne IS NULL THEN nextval('md.personne_id_personne_hors_cen38_seq'::regclass) + ELSE b.id_personne END as id_personne, + b.id_personne id_personne_exist, + 'Observateur Faune-Isère' as remarque, + a.prenom as prenom, + a.nom as nom, + a.full_name as nom_complet, + b.nom_structure, + a.structure, + CASE WHEN a.structure ILIKE 'CEN Isère' THEN 1 + ELSE 1 END AS id_structure +FROM nom_complet_identifies a +LEFT JOIN personne_sicen b ON lower(a.nom || ' ' || a.prenom) = lower(b.nom_complet) AND b.nom_structure = a.structure +; +""" +with con.begin() as cnx: + cnx.execute(create_table_pers) + +select_pers = 'SELECT * FROM md.temp_import_personne' +df_pers = pd.read_sql_query( + sql = select_pers, + con = con) + + +select_persTOinsert = """ +SELECT DISTINCT + a.id_personne, + a.remarque, + a.prenom as prenom, + a.nom as nom, --full_name + NULL as email, + 'observ'::md.enum_role, + NULL::md.enum_specialite as specialite, + 0 as mot_de_passe, + {id_creator} as createur, + 'Mme / M.'::md.enum_titre as titre, + now()::date as date_maj, + a.id_structure +FROM md.temp_import_personne a +WHERE NOT EXISTS (SELECT DISTINCT id_personne FROM md.personne) +; +""".format(id_creator = numerateur.id_personne.item()) +df_pers_insert = pd.read_sql_query( + sql = select_persTOinsert, + con = con) + +if not df_pers_insert.empty: + raise Exception("DEV en cours ...") + insert_pers = """ + INSERT INTO md.personne( + id_personne, + remarque, + prenom, + nom, + email, + role, + specialite, + mot_de_passe, + createur, + titre, + date_maj, + id_structure) + + """ + select_persTOinsert + with con.begin() as cnx: + cnx.execute(insert_pers) +else: + pass + + +select_pers = """ +WITH observateurs_multiples AS +(SELECT DISTINCT + string_agg(a.id_personne::text,'&') as id_personne, + --a.remarque, + --a.prenom as prenom, + --a.nom as nom, + a.nom_complet, + a.id_structure, + s.nom_structure +FROM md.temp_import_personne a +LEFT JOIN md.structure s ON a.id_structure = s.id_structure +GROUP BY a.nom_complet, a.id_structure,s.nom_structure) +""" + +select_dataTOinsert = """ +SELECT DISTINCT + --a."id espèce biolovision", + --a.name_species, + --a.latin_species, + --a.protegee, + --a.embr, + --a.classe1, + --a.classe2, + a.date::date, + CASE WHEN a.horaire <> '00:00:00' THEN a.horaire::time + ELSE NULL END as heure_obs, + --a.date_year, + --a.time_start, + --a.time_stop, + CASE WHEN t.regne IS NULL THEN tt.regne + ELSE t.regne END regne, + CASE WHEN t.phylum IS NULL THEN tt.phylum + ELSE t.phylum END phylum, + CASE WHEN t.classe IS NULL THEN tt.classe + ELSE t.classe END classe, + CASE WHEN t.ordre IS NULL THEN tt.ordre + ELSE t.ordre END ordre, + CASE WHEN t.famille IS NULL THEN tt.famille + ELSE t.famille END famille, + CASE WHEN t.nom_valide IS NULL THEN tt.nom_valide + ELSE t.nom_valide END nom_valide, + CASE WHEN t.nom_vern IS NULL THEN tt.nom_vern + ELSE t.nom_vern END nom_vern, + CASE WHEN t.nom_complet IS NULL THEN tt.nom_complet + ELSE t.nom_complet END nom_complet, + CASE WHEN t.cd_nom IS NULL THEN tt.cd_nom + ELSE t.cd_nom END cd_nom, + --t.cd_nom cd_nom_2, + --b.taxref_cdnom, + CASE WHEN a.estimation IS NULL AND (a.nombre IS NOT NULL OR a.nombre > 0) THEN a.nombre + WHEN a.estimation LIKE 'x' THEN '1' + ELSE NULL END AS effectif, + CASE WHEN a.estimation LIKE '>' AND (a.nombre IS NOT NULL OR a.nombre > 0) THEN a.nombre + WHEN a.estimation LIKE '~' AND (a.nombre IS NOT NULL OR a.nombre > 0) THEN a.nombre + ELSE NULL END AS effectif_min, + CASE WHEN a.estimation LIKE '~' AND (a.nombre IS NOT NULL OR a.nombre > 0) THEN (a.nombre + 10) + ELSE NULL END AS effectif_max, + a."lon (wgs84)" as longitude, + a."lat (wgs84)" as latitude, + a.commune as localisation, + o.id_personne as observateur, + /*a."prénom", + a.nom, + a.entity_short_name, + a.entity_full_name, + a.tra_surname, + a.tra_name, + a.tra_full_name,*/ + {id_numerateur} as numerisateur, + o.id_structure, + concat_ws(' | ', + CASE WHEN a."détails" IS NOT NULL THEN 'Détails : '|| a."détails" ELSE NULL END, + CASE WHEN a."contient des détails mortalité" ILIKE 'oui' THEN 'Mortalité' ELSE NULL END, + CASE WHEN a.comportement IS NOT NULL THEN 'Comportement : '|| a.comportement ELSE NULL END, + CASE WHEN a."code atlas" IS NOT NULL THEN 'Code atlas : '|| c.categorie || CONCAT(' (',c.libelle,')') ELSE NULL END, + CASE WHEN a.remarque IS NOT NULL THEN 'Commentaires : '|| a.remarque ELSE NULL END + ) as remarque_obs, + --a.id_place, + --a.municipality, + --a.county, + --a.country, + "code insee" as code_insee, + TRUE as diffusable, + --CASE WHEN a.hidden ILIKE 'oui' THEN TRUE ELSE FALSE END as diffusable, + CASE WHEN lower(a."type de localisation") ILIKE 'lieu-dit' THEN 'lieu-dit' + WHEN lower(a."type de localisation") ILIKE 'localisation précise' THEN '0 à 10m' + ELSE NULL END::saisie.enum_precision AS "precision", + --a.grid_name, + --a.estimation, + 'validée'::saisie.enum_statut_validation as statut_validation, + 99 as id_etude, -- Echange de données (partenaires) + 3 as id_protocole, -- Observation aléatoire + a.altitude as elevation, + --a.hidden, + --a.admin_hidden, + --a.committee_chn, + --a.committee_chr, + --a.second_hand, + --a.private_comment, + --a.insert_date, + --a.update_date, + --a.project_code, + ST_Transform(st_setsrid(st_makepoint("lon (wgs84)", "lat (wgs84)"),4326),2154)::geometry(POINT,2154) AS geometrie, + CASE WHEN (a.nombre IS NULL OR a.nombre = 0) THEN TRUE ELSE FALSE END AS obs_null, + 8 as id_lot, -- Code du lot de données : Import Faune-Isère (exporté par LPO) + a.ref as id_origine + + FROM import.import_data_fauneisere_biolovision a + LEFT JOIN ( + SELECT DISTINCT b.taxref_cdnom, b.id_visio, t.* + FROM ref_lpo.taxons_fauneisere_biolovision_taxref b + JOIN inpn.taxref t ON t.cd_nom::integer = b.taxref_cdnom) t + ON a."id espèce biolovision" = t.id_visio + LEFT JOIN (inpn.taxons_isere_absents_taxref bb + JOIN inpn.taxref tt ON tt.cd_nom::integer = bb.cd_nom) + ON a."id espèce biolovision" = bb.id_taxon + LEFT JOIN ref_lpo.tr_code_atlas_lpo c ON a."code atlas" = c.code_atlas + JOIN observateurs_multiples o ON lower(a.nom || ' ' || a."prénom") = lower(o.nom_complet) + --LEFT JOIN saisie.saisie_observation s + --ON ((a.ref::text = s.id_origine) AND (id_lot IN (7.8))) + --AND s.id_origine IS NULL +; +""".format(id_numerateur=numerateur.id_personne.item()) +df_data_insert = gpd.read_postgis( + sql = text(select_pers+select_dataTOinsert), + con = con, + geom_col='geometrie') + +# df_data_insert[df_data_insert.id_origine.duplicated(keep=False)] +# df_data_insert[df_data_insert.id_origine.duplicated(keep=False)].cd_nom.unique() +# df_data_insert[df_data_insert.cd_nom.isna()] + +if df_data_insert.shape[0] == data_import.shape[0]: + insert_data = """ + INSERT INTO saisie.saisie_observation( + --id_obs, + date_obs, + --date_debut_obs, + --date_fin_obs, + heure_obs, + --date_textuelle, + regne, + phylum, + classe, + ordre, + famille, + nom_valide, + nom_vern, + nom_complet, + cd_nom, + --effectif_textuel, + effectif, + effectif_min, + effectif_max, + --type_effectif, + --phenologie, + --id_waypoint, + longitude, + latitude, + localisation, + observateur, + numerisateur, + --validateur, + structure, + remarque_obs, + code_insee, + --id_lieu_dit, + diffusable, + "precision", + statut_validation, + id_etude, + id_protocole, + --url_photo, + --commentaire_photo, + --decision_validation, + --determination, + elevation, + geometrie, + --qualification, + --reprostatut, + obs_null, + --uuid, + id_lot, + id_origine) + """ + with con.begin() as cnx: + cnx.execute( + text(select_pers+ + insert_data+ + select_dataTOinsert) + ) + + + update_structure_name = """ + UPDATE saisie.suivi_saisie_observation a + SET utilisateur = REPLACE(utilisateur, 'inconnu', '{email_numerator}') + WHERE date_operation > '{today}' + ; + """.format( + email_numerator=numerateur.email.item(), + today = today + ) + drop_temp_table = """ + DROP TABLE IF EXISTS md.temp_import_personne; + """ + with con.begin() as cnx: + cnx.execute(update_structure_name) + cnx.execute(drop_temp_table) + + + +{ + 'ref': 'ref', + "id espèce biolovision": 'id_species', + "lat (wgs84)": 'coord_lat', + "lon (wgs84)": 'coord_lon', + "nom espèce": 'name_species', + "nom latin": 'latin_species', + # 'abréviation de la personne morale du transmetteur', + "prénom":'surname', + 'nom':'name', + 'abréviation personne morale':'search_export_entity_short_name', + # 'personne morale', + 'nombre': 'total_count', + 'estimation': 'estimation_code', + 'jour':'date_day', + 'mois':'date_month', + 'année':'date_year', + "jour de l'année":'date_jday', + 'pentade':'date_pentade', + 'décade':'date_decade', + "numéro de la semaine":'date_week', + "heure fin":'time_stop', + "heure de fin":'time_stop_hour', + "minute de fin":'time_stop_min', + "heure début":'time_start', + "heure de début":'time_start_hour', + "minute de début":'time_start_min', + # 'id universel transmetteur', + # 'id transmetteur', + # 'e-mail', + # 'personne morale du transmetteur', + # 'id universel observateur', + # 'nom transmetteur', + # 'prénom transmetteur', + # 'transmetteur anonyme', + # 'anonyme', + # 'liste complète ?', + # 'protégée', + # 'vérification', + # 'donnée de seconde main', + # 'commentaire de la liste', + # 'ordre systématique', + "détails": 'detail', + 'horaire':'timing', + "type de localisation": 'precision', + 'comportement': 'behaviour', + 'remarque': 'comment', + "remarque privée": 'private_comment', + # 'protection nationale', + "date d'insertion": 'insert_date', + "date de dernière modification": 'update_date', + "code atlas":'atlas_code', + "code insee": 'insee', + "altitude": 'altitude', + "commune": 'place', + "contient des détails mortalité":'has_death_info', + # 'id liste' +} \ No newline at end of file diff --git a/1_SICEN/import_fauneisereTOsicen.py b/1_SICEN/import_fauneisereTOsicen.py new file mode 100644 index 0000000..079b06d --- /dev/null +++ b/1_SICEN/import_fauneisereTOsicen.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + +nom_numerateur = 'GEIER' +pnom_numerator = 'Colas' +today = dt.now().date().isoformat() +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +nom_numerateur = nom_numerateur.upper() +pnom_numerator = pnom_numerator[0].upper() + pnom_numerator[1:] +query_get_numerator = """ + SELECT * FROM md.personne + WHERE nom = '%s' + AND prenom = '%s' + AND id_structure = 1 +""" % (nom_numerateur,pnom_numerator) +numerateur = pd.read_sql_query( + sql = query_get_numerator, + con = con +) + +create_view=''' +DROP VIEW import.test_import_faune_isere; +# CREATE OR REPLACE VIEW import.test_import_faune_isere AS +# SELECT DISTINCT + # *, + # st_setsrid(st_makepoint(coord_lon_l93, coord_lat_l93),2154)::geometry(POINT,2154) AS geom +# FROM import.import_data_fauneisere; +''' +with con.begin() as cnx: + cnx.execute(create_view) + + +select_import = """ +SELECT * FROM import.import_data_fauneisere; +""" +data_import = pd.read_sql_query( + sql = select_import, + con = con) + + +verif_taxref = ''' +SELECT CASE WHEN b.cdnom_taxref IS NULL THEN bb.cd_nom + ELSE b.cdnom_taxref END cdnom_taxref, + b.cdnom_taxref, + CASE WHEN c.cd_nom IS NULL THEN cc.cd_nom + ELSE c.cd_nom END cd_nom, + --c.cd_nom, + CASE WHEN c.cd_ref IS NULL THEN cc.cd_ref + ELSE c.cd_ref END cd_ref, + --c.cd_ref, + a.id_sighting, a.id_species, a.name_species, a.id_observer, a.latin_species, + a.taxonomy_name, a.family_name, a.sys_order, a.date, a.date_day, a.date_month, a.date_year, + a.date_jday, a.date_pentade, a.date_decade, a.date_week, a.time_start, a.time_start_hour, a.time_start_min, a.time_stop, a.time_stop_hour, a.time_stop_min, + a.full_form, a.timing, a.id_place, a.place, a.municipality, a.county, a.country, a.insee, a.coord_lon_liie, a.coord_lat_liie, a.coord_lon_l93, a.coord_lat_l93, + a.coord_lat, a.coord_lon, a.coord_lat_dms, a.coord_lon_dms, a.coord_f, a.coord_e, a.coord_n, a."precision", a.altitude, a.grid_name, + a.estimation_code, a.total_count, + a.detail, a.atlas_code, + a.hidden, a.admin_hidden, a.behaviour, + --a.committee_chn, a.committee_chr, + a.search_export_species_sheet_dir_oiseaux, a.search_export_species_sheet_protection_nat, a.search_export_species_sheet_dir_habitat, + a.search_export_species_sheet_uicn, a.search_export_species_sheet_lr_nat, a.search_export_species_sheet_znieff, a.search_export_species_sheet_id_fnat, + a.surname, a.name, a.search_export_entity_short_name, a.search_export_entity_full_name, a.tra_surname, a.tra_name, a.id_form, + a.search_export_entity_tra_full_name, a.second_hand, a.comment, a.private_comment, a.daily_text_comment_rem, a.insert_date, a.update_date, a.protocol, a.has_death_info + FROM import.import_data_fauneisere a + LEFT JOIN (ref_lpo.taxons_faune_isere_taxref b + JOIN inpn.taxref c ON c.cd_nom::integer = b.cdnom_taxref) + ON a.latin_species = b.latin_species + LEFT JOIN (inpn.taxons_isere_absents_taxref bb + JOIN inpn.taxref cc ON cc.cd_nom::integer = bb.cd_nom) + ON a.id_species = bb.id_taxon + ; +''' +df_verif = pd.read_sql_query( + sql = verif_taxref, + con = con) + +data_import[~data_import.id_species.isin(df_verif.id_species)] \ + [['name_species','latin_species','id_species']].drop_duplicates() + + + +update_structure_name = """ +UPDATE import.import_data_fauneisere a +SET search_export_entity_short_name = REPLACE(search_export_entity_short_name, 'CEN38', 'CEN Isère'); +""" +with con.begin() as cnx: + cnx.execute(update_structure_name) + + + +create_table_pers = """ +DROP TABLE IF EXISTS md.temp_import_personne; +CREATE TABLE md.temp_import_personne AS + +WITH nom_complet_identifies AS +(SELECT DISTINCT + ltrim(unnest(string_to_array(a.surname,'&'))) as prenom, + ltrim(unnest(CASE WHEN search_export_entity_short_name IN ('CEN Isère','-') THEN string_to_array(''|| upper(split_part(a.name,' /',1)),'&') + ELSE string_to_array(upper(split_part(a.name,' (',1)),'&') END)) as nom, + CASE WHEN search_export_entity_short_name ILIKE '-' THEN 'CEN Isère' + ELSE search_export_entity_short_name END as structure, + a.name || ' ' || a.surname as full_name + FROM import.import_data_fauneisere a + ORDER BY full_name), + +personne_sicen AS +(SELECT a.id_personne, prenom, nom, nom || ' ' || prenom as nom_complet, role, specialite, + a.id_structure, b.nom_structure + FROM md.personne a + JOIN md.structure b ON a.id_structure = b.id_structure + --WHERE personne.id_personne BETWEEN 100000 AND 999998 +) + +SELECT DISTINCT + CASE WHEN b.id_personne IS NULL THEN nextval('md.personne_id_personne_hors_cen38_seq'::regclass) + ELSE b.id_personne END as id_personne, + b.id_personne id_personne_exist, + 'Observateur Faune-Isère' as remarque, + a.prenom as prenom, + a.nom as nom, + a.full_name as nom_complet, + b.nom_structure, + a.structure, + CASE WHEN a.structure ILIKE 'CEN Isère' THEN 1 + ELSE 1 END AS id_structure +FROM nom_complet_identifies a +LEFT JOIN personne_sicen b ON lower(a.nom || ' ' || a.prenom) = lower(b.nom_complet) AND b.nom_structure = a.structure +; +""" +with con.begin() as cnx: + cnx.execute(create_table_pers) + + +select_persTOinsert = """ +SELECT DISTINCT + a.id_personne, + a.remarque, + a.prenom as prenom, + a.nom as nom, --full_name + NULL as email, + 'observ'::md.enum_role, + NULL::md.enum_specialite as specialite, + 0 as mot_de_passe, + {id_creator} as createur, + 'Mme / M.'::md.enum_titre as titre, + now()::date as date_maj, + a.id_structure +FROM md.temp_import_personne a +WHERE NOT EXISTS (SELECT DISTINCT id_personne FROM md.personne) +; +""".format(id_creator = numerateur.id_personne.item()) +df_pers_insert = pd.read_sql_query( + sql = select_persTOinsert, + con = con) + +if not df_pers_insert.empty: + raise Exception("DEV en cours ...") + insert_pers = """ + INSERT INTO md.personne( + id_personne, + remarque, + prenom, + nom, + email, + role, + specialite, + mot_de_passe, + createur, + titre, + date_maj, + id_structure) + + """ + select_persTOinsert + with con.begin() as cnx: + cnx.execute(insert_pers) +else: + pass + + +select_pers = """ +WITH observateurs_multiples AS +(SELECT DISTINCT + string_agg(a.id_personne::text,'&') as id_personne, + --a.remarque, + --a.prenom as prenom, + --a.nom as nom, + a.nom_complet, + a.id_structure, + s.nom_structure +FROM md.temp_import_personne a +LEFT JOIN md.structure s ON a.id_structure = s.id_structure +GROUP BY a.nom_complet, a.id_structure,s.nom_structure) +""" + +select_dataTOinsert = """ +SELECT DISTINCT + --a.id_species, + --a.name_species, + --a.latin_species, + --a.protegee, + --a.embr, + --a.classe1, + --a.classe2, + a.date::date, + CASE WHEN a.timing <> '00:00:00' THEN a.timing::time + ELSE NULL END as heure_obs, + --a.date_year, + a.time_start, + a.time_stop, + CASE WHEN t.regne IS NULL THEN tt.regne + ELSE t.regne END regne, + CASE WHEN t.phylum IS NULL THEN tt.phylum + ELSE t.phylum END phylum, + CASE WHEN t.classe IS NULL THEN tt.classe + ELSE t.classe END classe, + CASE WHEN t.ordre IS NULL THEN tt.ordre + ELSE t.ordre END ordre, + CASE WHEN t.famille IS NULL THEN tt.famille + ELSE t.famille END famille, + CASE WHEN t.nom_valide IS NULL THEN tt.nom_valide + ELSE t.nom_valide END nom_valide, + CASE WHEN t.nom_vern IS NULL THEN tt.nom_vern + ELSE t.nom_vern END nom_vern, + CASE WHEN t.nom_complet IS NULL THEN tt.nom_complet + ELSE t.nom_complet END nom_complet, + CASE WHEN t.cd_nom IS NULL THEN tt.cd_nom + ELSE t.cd_nom END cd_nom, + --t.cd_nom cd_nom_2, + --b.cdnom_taxref, + CASE WHEN a.estimation_code IS NULL AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN a.total_count + WHEN a.estimation_code LIKE 'x' THEN '1' + ELSE NULL END AS effectif, + CASE WHEN a.estimation_code LIKE '>' AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN a.total_count + WHEN a.estimation_code LIKE '~' AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN a.total_count + ELSE NULL END AS effectif_min, + CASE WHEN a.estimation_code LIKE '~' AND (a.total_count IS NOT NULL OR a.total_count > 0) THEN (a.total_count + 10) + ELSE NULL END AS effectif_max, + a.coord_lon_l93 as longitude, + a.coord_lat_l93 as latitude, + a.place as localisation, + o.id_personne as observateur, + /*a.surname, + a.name, + a.entity_short_name, + a.entity_full_name, + a.tra_surname, + a.tra_name, + a.tra_full_name,*/ + {id_numerateur} as numerisateur, + o.id_structure, + concat_ws(' | ', + CASE WHEN a.detail IS NOT NULL THEN 'Détails : '|| a.detail ELSE NULL END, + --CASE WHEN a.has_death_info ILIKE 'oui' THEN 'Mortalité' ELSE NULL END, + CASE WHEN a.behaviour IS NOT NULL THEN 'Comportement : '|| a.behaviour ELSE NULL END, + CASE WHEN a.atlas_code IS NOT NULL THEN 'Code atlas : '|| a.atlas_code ELSE NULL END, + CASE WHEN a.comment IS NOT NULL THEN 'Commentaires : '|| a.comment ELSE NULL END + ) as remarque_obs, + --a.id_place, + --a.municipality, + --a.county, + --a.country, + a.insee as code_insee, + TRUE as diffusable, + --CASE WHEN a.hidden ILIKE 'oui' THEN TRUE ELSE FALSE END as diffusable, + CASE WHEN lower(a."precision") ILIKE 'lieu-dit' THEN 'lieu-dit' + WHEN lower(a."precision") ILIKE 'localisation précise' THEN '0 à 10m' + ELSE NULL END::saisie.enum_precision AS "precision", + --a.grid_name, + --a.estimation_code, + 'validée'::saisie.enum_statut_validation as statut_validation, + 99 as id_etude, -- Echange de données (partenaires) + 3 as id_protocole, -- Observation aléatoire + a.altitude as elevation, + --a.hidden, + --a.admin_hidden, + --a.committee_chn, + --a.committee_chr, + --a.second_hand, + --a.private_comment, + --a.insert_date, + --a.update_date, + --a.project_code, + st_setsrid(st_makepoint(coord_lon_l93, coord_lat_l93),2154)::geometry(POINT,2154) AS geometrie, + CASE WHEN (a.total_count IS NULL OR a.total_count = 0) THEN TRUE ELSE FALSE END AS obs_null, + 8 as id_lot, -- Code du lot de données : Import Faune-Isère (exporté par LPO) + a.id_sighting as id_origine + + FROM import.import_data_fauneisere a + LEFT JOIN (ref_lpo.taxons_faune_isere_taxref b + JOIN inpn.taxref t ON t.cd_nom::integer = b.cdnom_taxref) + ON a.latin_species = b.latin_species + LEFT JOIN (inpn.taxons_isere_absents_taxref bb + JOIN inpn.taxref tt ON tt.cd_nom::integer = bb.cd_nom) + ON a.id_species = bb.id_taxon + JOIN observateurs_multiples o ON lower(a.name || ' ' || a.surname) = lower(o.nom_complet) + --LEFT JOIN saisie.saisie_observation s + --ON ((a.id_sighting::text = s.id_origine) AND (id_lot IN (7.8))) + --AND s.id_origine IS NULL +; +""".format(id_numerateur=numerateur.id_personne.item()) +df_data_insert = gpd.read_postgis( + sql = text(select_pers+select_dataTOinsert), + con = con, + geom_col='geometrie') + + +if df_data_insert.shape[0] == data_import.shape[0]: + insert_data = """ + INSERT INTO saisie.saisie_observation( + --id_obs, + date_obs, + --date_debut_obs, + --date_fin_obs, + heure_obs, + --date_textuelle, + regne, + phylum, + classe, + ordre, + famille, + nom_valide, + nom_vern, + nom_complet, + cd_nom, + --effectif_textuel, + effectif, + effectif_min, + effectif_max, + --type_effectif, + --phenologie, + --id_waypoint, + longitude, + latitude, + localisation, + observateur, + numerisateur, + --validateur, + structure, + remarque_obs, + code_insee, + --id_lieu_dit, + diffusable, + "precision", + statut_validation, + id_etude, + id_protocole, + --url_photo, + --commentaire_photo, + --decision_validation, + --determination, + elevation, + geometrie, + --qualification, + --reprostatut, + obs_null, + --uuid, + id_lot, + id_origine) + + """ + with con.begin() as cnx: + cnx.execute( + text(select_pers+ + insert_data+ + select_dataTOinsert) + ) + + + update_structure_name = """ + UPDATE saisie.suivi_saisie_observation a + SET utilisateur = REPLACE(utilisateur, 'inconnu', '{email_numerator}') + WHERE date_operation > '{today}' + ; + """.format( + email_numerator=numerateur.email.item(), + today = today + ) + drop_temp_table = """ + DROP TABLE IF EXISTS md.temp_import_personne; + """ + with con.begin() as cnx: + cnx.execute(update_structure_name) + cnx.execute(drop_temp_table) + diff --git a/1_SICEN/import_serena_to_sicen.py b/1_SICEN/import_serena_to_sicen.py new file mode 100644 index 0000000..ea614e6 --- /dev/null +++ b/1_SICEN/import_serena_to_sicen.py @@ -0,0 +1,499 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine #, text +from sqlalchemy.engine import URL +from shapely.geometry import Point +from shapely.ops import nearest_points +import pandas as pd +import geopandas as gpd + + +numerisateur = ['GEIER Colas'] +lib_etude = 'Echange de données (partenaires)' +# précision possible : +# GPS / 0 à 10m / 10 à 100m / 100 à 500m / commune / lieu-dit +precision_gps = 'GPS' + + +# Parametres bdd +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +dict_sp = { + 'Dilatata dilatata (Gould, 1841)':'Menetus dilatatus (A.A. Gould, 1841)', + 'Gomphocerippus biguttulus biguttulus (Linnaeus, 1758)':'Chorthippus biguttulus biguttulus (Linnaeus, 1758)', + 'Gomphocerippus mollis mollis (Charpentier, 1825)':'Chorthippus mollis mollis (Charpentier, 1825)', +} +dict_columns = { + 'TAXO_LATIN_C':'nom_latin', + 'RELV_NOM':'lot_donnees', + 'OBSE_DATE_J':'date_obs', + 'OBSE_LON':'longitude', + 'OBSE_LAT':'latitude', + 'SITE_NOM':'localisation', + 'OBSE_PLACE':'rmq_loc', + 'OBSV_LIBEL':'observateur', + 'DETM_LIBEL':'validateur', + 'OBSE_COMMENT':'remarque_obs', + 'OBSE_PCOLE_CHOI':'protocole', + 'OBSE_PRECIS_CHOI':'effectif_textuel', + 'OBSE_NOMBRE':'effectif', + 'OBSE_TIME':'heure_obs', + 'OBSE_COMP_CHOI':'reprostatut', + 'OBSE_CARACT_CHOI':'reprocaract', + 'OBSE_SIG_OBJ_ID':'id_origine', + 'OBSE_NEBUL_CHOI':'nebulosite', + 'OBSE_TPTAIR':'temperature', + 'OBSE_VENT_CHOI':'vent' +} +# dict_columns = { +# 'Nom scientifique':'nom_latin', +# 'CD_nom':'cd_nom', +# 'Observateur':'observateur', +# 'Date_Relevé':'date_obs', +# 'Code_Relevé':'id_waypoint', +# 'X lambert 93':'longitude', +# 'Y Lambert 93':'latitude', +# 'Altinf_Relevé':'elevation', +# 'Nom_LieuDit':'localisation', +# 'Description_Relevé':'remarque_obs', +# 'Statuts':'remarque_obsStatut', +# 'Lot_données':'lot_donnees', +# 'Localisation_Relevé':'rmq_loc' +# } + + + +# sql = 'SELECT * FROM %s.%s LIMIT 10;' % ('saisie','saisie_observation') +# saisie = gpd.read_postgis( +# sql = sql, +# con = con, +# geom_col = 'geometrie' +# ) + +def near(point,df2,geom_union,src_column): + # find the nearest point and return the corresponding Place value + geom2_col = df2.geometry.name + nearest = df2[geom2_col] == nearest_points(point, geom_union)[1] + # print(nearest) + # print(df2[nearest][src_column]) + return df2[nearest][src_column].values[0] + + +def get_refGeomTable(table, schema,geom_col='geometrie',colnames=None,mask=None,buffer=None): + ''' + Récupération de données géométriques dasn une table de référence + ''' + pkey = con.dialect.get_pk_constraint(con,table,schema)['constrained_columns'][0] + if colnames: + colnames = [pkey,*colnames] + else: + colnames = [pkey] + cols = ','.join(colnames) + sql = 'SELECT %s,%s FROM %s.%s' % (cols,geom_col,schema,table) + if mask: + if buffer : + mask = mask.buffer(buffer) + sql = sql + """ WHERE ST_Intersects({geom_col}, 'SRID=2154;{mask}')""".format( + geom_col=geom_col, + mask=mask) + gdf = gpd.read_postgis( + sql,con, + geom_col=geom_col) + return gdf + + +def get_refTable(table, schema, colnames=None): + pkey = con.dialect.get_pk_constraint(con,table,schema)['constrained_columns'][0] + if colnames: + colnames = [pkey,*colnames] + else: + colnames = [pkey] + + sql = 'SELECT %s' % (','.join(colnames)) + sql = sql + ' FROM %s.%s' % (schema,table) + df = pd.read_sql(sql,con) + return df + + +def create_geometry(x,y): + geom = [Point(xy) for xy in zip(x, y)] + return geom + + +def normalize_obs(obs,name='first',sep=';'): + ''' + :obs: pd.Series. Colonne des observateurs. + :name: str. 'first' si le nom est cité en premier. + 'first',None. Défault 'first'. + :sep: str. séparateur de personne dans le cas de + multi-observateur. Défault ';'. + ''' + obs = obs.replace(sep,',',regex=True) + obs = obs.replace(' ,',',',regex=True) + obs = obs.replace(', ',',',regex=True) + obs = obs.str.split(',',expand=True).stack() + obs = obs.str.split(' ',expand=True) + if name == 'first': + obs[0] = obs[0].str.upper() + obs[1] = obs[1].str[0].str.upper() + obs[1].str[1:] + obs['pers'] = obs[0] + ' ' + obs[1] + else: + obs[1] = obs[1].str.upper() + obs[0] = obs[0].str[0].str.upper() + obs[0].str[1:] + obs['pers'] = obs[1] + ' ' + obs[0] + obs = obs['pers'].unstack() + obs['pers'] = obs[0] + if 1 in obs.columns: + obs.loc[~obs[1].isna(),'pers'] = obs.loc[~obs[1].isna(),'pers'] + '&' +\ + obs.loc[~obs[1].isna(),1] + return obs['pers'] + + +def get_pers(obs): + ''' + :obs: pd.Series. Colonne des observateurs. + ''' + obs = pd.Series(obs.unique()) + obs = obs.str.split(' ',expand=True) + obs_name = "','".join(obs[0].str.upper().tolist()) + obs_surn = "','".join(obs[1].tolist()) + sql = ''' + SELECT + concat(nom,' ',prenom) pers, + id_personne, + id_structure + FROM md.personne + WHERE nom in ('%s') + AND prenom in ('%s') + AND id_structure != 49 + ;''' % (obs_name,obs_surn) + obs = pd.read_sql(sql,con) + return obs + + +# import TAXREF +taxref = pd.read_sql_table( + table_name='taxref', + con=con, + schema='inpn' +) +taxref = taxref[[ + 'regne','phylum','classe','ordre', + 'famille','nom_valide','nom_vern', + 'nom_complet','cd_nom']] + +taxref['nom_latin'] = taxref.nom_complet.str.replace(' \(\#ref\)','',regex=True).copy() +# taxref['nom_latin2'] = taxref.nom_latin.str.split(r' \(|, [0-9]',expand=True)[0] +tmp = taxref[ + (taxref.nom_latin=='Euconulus Reinhardt, 1883')& + (taxref.cd_nom!='192372')].index +taxref.drop(tmp,inplace=True) + + +# import DATA +path = '/home/colas/Documents/9_PROJETS/4_SICEN/IMPORT/' +file = 'Données Le Cheylas GD_2020.xlsx' +# file = 'DonneesMeF.ods' +df = pd.read_excel(path+file) +# df = pd.read_csv(path+file) + +# mise en forme DATA +df.index.name = 'id' +df.rename(columns=dict_columns, inplace=True) +keep_col = df.columns[df.columns.isin(dict_columns.values())] +df = df[keep_col] +df.nom_latin.replace(dict_sp,inplace=True) +df['statut_validation'] = 'validée' +# df['lieu_dit'] = df['localisation'].copy() + +if 'EFFC_OIS_Nb_Adult_Mâl' in df.columns: + df.loc[~df.EFFC_OIS_Nb_Adult_Mâl.isna(),'type_effectif'] = 'Adulte' + del df['EFFC_OIS_Nb_Adult_Mâl'] +if 'id_origine' in df.columns: + df.loc[~df.id_origine.isna(),'id_origine'] = df.loc[~df.id_origine.isna(),'id_origine'].astype(int).astype(str) +# Concaténation des remarques de localisation +if 'rmq_loc' in df.columns: + df.loc[~df.rmq_loc.isna(),'localisation'] = df.loc[~df.rmq_loc.isna(),'localisation'] + \ + ', ' + df.loc[~df.rmq_loc.isna(),'rmq_loc'] + del df['rmq_loc'] +# Concaténation des remarques observateurs +if 'remarque_obs' in df.columns: + df.loc[~df.remarque_obs.isna(),'remarque_obs'] = 'Commentaires : ' + \ + df.loc[~df.remarque_obs.isna(),'remarque_obs'] +if 'remarque_obsStatut' in df.columns: + df.loc[~df.remarque_obsStatut.isna(),'remarque_obs'] = df.loc[~df.remarque_obsStatut.isna(),'remarque_obs'] + \ + '; rmq_statut : ' + df.loc[~df.remarque_obsStatut.isna(),'remarque_obsStatut'] + del df['remarque_obsStatut'] +if 'nebulosite' in df.columns: + df.loc[~df.nebulosite.isna(),'remarque_obs'] = df.loc[~df.nebulosite.isna(),'remarque_obs'] + \ + '; nebul : ' + df.loc[~df.nebulosite.isna(),'nebulosite'] + del df['nebulosite'] +if 'temperature' in df.columns: + df.loc[~df.temperature.isna(),'remarque_obs'] = df.loc[~df.temperature.isna(),'remarque_obs'] + \ + '; temp : ' + df.loc[~df.temperature.isna(),'temperature'].astype(str) + del df['temperature'] +if 'vent' in df.columns: + df.loc[~df.vent.isna(),'remarque_obs'] = df.loc[~df.vent.isna(),'remarque_obs'] + \ + '; ' + df.loc[~df.vent.isna(),'vent'] + del df['vent'] +if 'id_waypoint' in df.columns: + df.loc[~df.id_waypoint.isna(),'id_waypoint'] = df.loc[~df.id_waypoint.isna(),'id_waypoint']\ + .astype(int).astype(str) +if 'precision' not in df.columns: + df['precision'] = 'GPS' + +# format date / time +# df['date_obs'] = df['date_obs'].astype(str).astype(int) +# df['date_obs'] = pd.to_datetime(df['date_obs']) +# df['date_obs'].astype('datetime64') +# pd.to_datetime(df['heure_obs'],format='%H:%M:%S') +# df['heure_obs'] + +# create geometrie +df['geometrie'] = create_geometry(df.longitude, df.latitude) +if all(df.longitude < 10): + epsg = 4326 +elif all(df.longitude > 900000): + epsg = 2154 +df = df.set_geometry('geometrie', crs=epsg) +if epsg == 4326: + df.to_crs(2154, inplace=True) +elif epsg == 2154: + df['longitude'] = df.to_crs(4326).geometrie.x + df['latitude'] = df.to_crs(4326).geometrie.y +df_union = df.unary_union + +# join code_INSEE & Lieu-dit +# commune +com = get_refGeomTable( + 'commune', 'ign_bd_topo', + geom_col='geometrie',mask=df_union) +com_union = com.unary_union +# lieu_dit +ldt = get_refGeomTable( + 'lieu_dit', 'ign_bd_topo', colnames=['nom'], + geom_col='geometrie',mask=com_union, buffer=1000) +ldt.rename(columns={'id': 'id_lieu_dit'}, inplace=True) +ldt_union = ldt.unary_union +# jointure lieu_dit / communes +if 'lieu_dit' in df.columns: + df['lieu_dit'] = df.lieu_dit.str.lower() + df = pd.merge(df,ldt[['id_lieu_dit','nom']],left_on='lieu_dit',right_on='nom') + del df['nom'] + del df['lieu_dit'] +else: + df['id_lieu_dit'] = [ + near(geom,ldt,ldt_union,'id_lieu_dit') + for geom in df.geometrie] +df = gpd.sjoin(df,com) +del df['index_right'] + +# get observateur/validateur +df['observateur'] = normalize_obs(df['observateur'], sep='-') +obs = get_pers(df.observateur) +if not obs.pers.is_unique: + raise Exception('duplicate observateur') + ind = obs[(obs.pers == 'GRANGE Benjamin')&(obs.id_structure == 1)].index + obs.drop(ind, inplace=True) + +obs['id_personne'] = obs['id_personne'].astype(str) +obs['id_structure'] = obs['id_structure'].astype(str) +df['observateur'] = df['observateur'].replace( + to_replace=[*obs.pers], + value=[*obs.id_personne], + regex=True) +if 'validateur' in df.columns: + val = get_pers(df.validateur) + val['id_personne'] = val['id_personne'].astype(str) + df['validateur'].replace(*obs.pers,*obs.id_personne, inplace=True) +else: + df['validateur'] = df['observateur'].str.split('&',expand=True)[0] +for p in obs.iterrows(): + o = p[1]['id_personne'] + s = p[1]['id_structure'] + df.loc[df.observateur.str.contains(o),'structure'] = s + +# get numerisateur +num = get_pers(pd.Series(numerisateur)) +df['numerisateur'] = num.id_personne.astype(str)[0] + +# get id_lot / id_etude +lot = get_refTable('lot_donnee','md',colnames=['libelle']) +etude = get_refTable('etude','md',colnames=['nom_etude']) +df = pd.merge(df,lot,left_on='lot_donnees',right_on='libelle') +del df['libelle'] +del df['lot_donnees'] +df['id_etude'] = etude.loc[etude.nom_etude == lib_etude,'id_etude'].values[0] + +# get protocole +if 'protocole' in df.columns: + proto = get_refTable('protocole','md',colnames=['libelle']) + df.protocole = df.protocole \ + .replace( + to_replace=[ + 'Capture à vue (à la main, au filet à papillon...)', + "Recherche ciblée (bouses, cadavres d'animaux...)"], + value=['Capture à vue',"Recherche d'indices de présence"]) + + df = pd.merge(df,proto, left_on='protocole',right_on='libelle') + del df['libelle'] + del df['protocole'] + + +# join taxref +if 'cd_nom' in df.columns: + if df['cd_nom'].dtype == int: df['cd_nom'] = df['cd_nom'].astype(str) + if 'nom_latin' in taxref.columns: del taxref['nom_latin'] + if 'nom_latin' in df.columns: del df['nom_latin'] + df = df.merge(taxref,on='cd_nom') +else: + df.nom_latin = df.nom_latin.str.strip() + tt = taxref[taxref.nom_latin.isin(df.nom_latin.unique())] + len_taxo = df.nom_latin.unique().shape[0] + len_taxref = tt.shape[0] + if len_taxo == len_taxref: + df = df.merge(tt,on='nom_latin') + del df['nom_latin'] + else: + contain = '|'.join(df.nom_latin.unique()) + tt = taxref[taxref.nom_latin.str.contains(contain)] + len_taxref = tt.shape[0] + if len_taxo == len_taxref: + df = df.merge(tt,on='nom_latin') + del df['nom_latin'] + else: + raise Exception('error corespondance cd_nom') + contain = '|'.join(df.nom_latin.str.split(r' \(|, [0-9]',expand=True)[0].unique()) + tt = taxref[taxref.nom_latin2.str.contains(contain)] + + +# get statuts de reproduction +if 'reprostatut' in df.columns: + if 'reprocaract' in df.columns: + df.loc[(~df.reprostatut.isna())&(~df.reprocaract.isna()),'remarque_obs'] = df['remarque_obs'] + \ + '; rmq_repro : ' + df.loc[(~df.reprostatut.isna())&(~df.reprocaract.isna()),'reprocaract'] + df.loc[df.reprostatut.isna(),'reprostatut'] = df.loc[df.reprostatut.isna(),'reprocaract'] + del df['reprocaract'] + + sql = 'SELECT unnest(enum_range(null::{0}.{1})) {1}'.format('saisie','enum_reprostatut') + reprostat = pd.read_sql(sql,con) + reprostat = pd.concat( + [reprostat,reprostat.enum_reprostatut.str.split('_',expand=True)], + axis=1 ) + lst_phyl = df.phylum.unique() + lst_clas = set([*df.classe.unique()]) + lst_ordr = set([*df.ordre.unique()]) + AMP = set(['Amphibia']) + CHIR = set([]) + INV = set(['Bivalvia','Gastropoda']) + MAM = set([]) + ODO = set(['Hexapoda']) + ordr_ODO = set(['Odonata']) + ordr_INV = set(['Orthoptera','Lepidoptera']) + OIS = set(['Aves']) + REP = set([]) + + if any(lst_clas.intersection(AMP)): + # AMP = list(AMP) + repro = reprostat[reprostat[0]=='AMP'] + df.loc[df.classe.isin(AMP),'reprostatut'] = df[df.classe.isin(AMP)].reprostatut \ + .replace( + to_replace='Reproduction confirmée', + value='Accouplement') \ + .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + # df.loc[df.classe.isin(AMP),'reprostatut'] = df[df.classe.isin(AMP)].reprostatut \ + # .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + + if any(lst_clas.intersection(CHIR)): + # CHIR = list(CHIR) + repro = reprostat[reprostat[0]=='CHIR'] + + if any(lst_clas.intersection(MAM)): + # MAM = list(MAM) + repro = reprostat[reprostat[0]=='MAM'] + + if any(lst_clas.intersection(INV)): + # INV = list(INV) + repro = reprostat[reprostat[0]=='INV'] + df.loc[df.classe.isin(INV),'reprostatut'] = df[df.classe.isin(INV)].reprostatut \ + .replace( + to_replace='Reproduction confirmée', + value='Accouplement') \ + .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + # df.loc[df.classe.isin(INV),'reprostatut'] = df[df.classe.isin(INV)].reprostatut \ + # .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + + if any(lst_clas.intersection(ODO)) and any(lst_ordr.intersection(ordr_INV)): + # ODO = list(ODO) + # ordr_INV = list(ordr_INV) + repro = reprostat[reprostat[0]=='INV'] + df.loc[(df.classe.isin(ODO))&(df.ordre.isin(ordr_INV)),'reprostatut'] = \ + df[(df.classe.isin(ODO))&(df.ordre.isin(ordr_INV))].reprostatut \ + .replace( + to_replace='Reproduction confirmée', + value='Accouplement') \ + .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + # df.loc[(df.classe.isin(ODO))&(df.ordre.isin(ordr_INV)),'reprostatut'] = \ + # df[(df.classe.isin(ODO))&(df.ordre.isin(ordr_INV))].reprostatut \ + # .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + # ODO = set(ODO) + + if any(lst_clas.intersection(ODO)) and any(lst_ordr.intersection(ordr_ODO)): + # ODO = list(ODO) + # ordr_ODO = list(ordr_ODO) + repro = reprostat[reprostat[0]=='ODO'] + df.loc[(df.classe.isin(ODO))&(df.ordre.isin(ordr_ODO)),'reprostatut'] = \ + df[(df.classe.isin(ODO))&(df.ordre.isin(ordr_ODO))].reprostatut \ + .replace( + to_replace=['Accouplement','Reproduction confirmée','Reproduction possible','Reproduction probable'], + value=['Tandem','Exuvie/émergence','Mâles+Femelles','Immature']) \ + .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + # df.loc[(df.classe.isin(ODO))&(df.ordre.isin(ordr_ODO)),'reprostatut'] = \ + # df[(df.classe.isin(ODO))&(df.ordre.isin(ordr_ODO))].reprostatut \ + # .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + + if any(lst_clas.intersection(OIS)): + # OIS = list(OIS) + repro = reprostat[reprostat[0]=='OIS'] + df.loc[df.classe.isin(OIS),'reprostatut'] = df[df.classe.isin(OIS)].reprostatut \ + .replace( + to_replace='Déplacement (marche, vol, nage)', + value='Fuite - envol') \ + .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + # df.loc[df.classe.isin(OIS),'reprostatut'] = df[df.classe.isin(OIS)].reprostatut \ + # .replace(to_replace=[*repro[1]],value=[*repro.enum_reprostatut]) + + if any(lst_clas.intersection(REP)): + # REP = list(REP) + repro = reprostat[reprostat[0]=='REP'] + + + + + + + + + + +df.to_postgis( + name='saisie_observation', + con=con, + schema='saisie', + if_exists='append', + index=False, + geom_col='geometrie', +) diff --git a/1_SICEN/refresh_vm.py b/1_SICEN/refresh_vm.py new file mode 100644 index 0000000..bbf6431 --- /dev/null +++ b/1_SICEN/refresh_vm.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +from pycen import con_sicen + +def refresh_vm(): + sql = "REFRESH MATERIALIZED VIEW saisie.vm_synthese_observations WITH DATA;" + with con_sicen.begin() as cnx: + cnx.execute(sql) + + +if __name__ == "__main__": + refresh_vm() \ No newline at end of file diff --git a/1_SICEN/sicen2_clean.py b/1_SICEN/sicen2_clean.py new file mode 100644 index 0000000..6cf744e --- /dev/null +++ b/1_SICEN/sicen2_clean.py @@ -0,0 +1,449 @@ +from os import replace +import pandas as pd +import numpy as np +from geoalchemy2 import Geometry +# import numpy as np +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +import geopandas as gpd + + +# Parametres bdd +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +bckup_user = 'postgres' +bckup_pwd = '1234' +bckup_adr = '192.168.0.81' +bckup_port = '5455' +bckup_base = 'sicen2' + + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) +bckup_url = URL.create('postgresql+psycopg2', + username=bckup_user, + password=bckup_pwd, + host=bckup_adr, + database=bckup_base, +) +bckup_con = create_engine(bckup_url) +# con = create_engine('postgresql+psycopg2://{1}:{0}@{2}:{3}/{4}'.format(pwd, user, adr, port, base), echo=False) +# bckup_con = create_engine('postgresql+psycopg2://{1}:{0}@{2}:{3}/{4}'.format(bckup_pwd, bckup_user, bckup_adr, bckup_port, bckup_base), echo=False) + + +def update_data(df, con, sch, tab, epsg=None): + columns = df.columns.to_list() + frame = df.copy() + frame.replace("'","''", regex=True, inplace=True) + pkey = con.dialect.get_pk_constraint(con, table_name=tab, schema=sch)['constrained_columns'] + for col in frame.select_dtypes(include=['object']).columns: + test = frame[frame[col].str.contains("'", na=False)] + if not test.empty: + frame[col].replace("'", "''", inplace=True, regex=True) + # print(frame[col]) + + + if 'geom' in columns or 'geometry' in columns: + if epsg or df.crs: + if not epsg: + epsg = df.crs.to_epsg() + name_geom = df.geometry.name + frame[name_geom] = 'SRID={epsg};'.format(epsg=epsg) + df[name_geom].map(str) + # else: return 'No crs define in update_data or in gdf' + + for c, col in enumerate(columns): + if c == 0: + frame['insert'] = "('" + frame[col].map(str) + # break + else: + frame['insert'] = frame['insert'] + "','" + frame[col].map(str) + if c == len(columns)-1: + frame['insert'] = frame['insert'] + "')" + # if c == 0: + # frame['insert'] = '("' + frame[col].map(str) + # # break + # else: + # frame['insert'] = frame['insert'] + '","' + frame[col].map(str) + # if c == len(columns)-1: + # frame['insert'] = frame['insert'] + '")' + + lst_cols = ', '.join(columns) + lst_vals = ','.join(frame['insert']) + lst_vals.replace("\'None\'", 'NULL', inplace=True) + lst_dupKey = ', '.join([col + '=EXCLUDED.' + col for col in columns]) + lst_pkey = ','.join(pkey) + + sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) VALUES {lst_vals} ON CONFLICT ({lst_pkey}) DO UPDATE SET {lst_dupKey} ;'''.format( + sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals, lst_dupKey=lst_dupKey, lst_pkey=lst_pkey) + # sql = '''INSERT INTO {sch}.{tab} ({lst_cols}) + # VALUES {lst_vals} + # ON CONFLICT DO NOTHING; + # '''.format(sch=sch, tab=tab, lst_cols=lst_cols, lst_vals=lst_vals) + try: + con.execute(sql) + # con.execute(text(sql)) + print(''' +Update OK !''') + except Exception as exept: + print(exept) + + +df = pd.read_sql_table( + table_name = 'personne', + con = con, + schema = 'md', + index_col='id_personne') + +tmp = pd.DataFrame(df.nom.str.split(' ',1).tolist(), columns = ['nom','prenom'], index=df.index) +tmp = tmp[~tmp.prenom.isnull()] +df.loc[tmp.index,'prenom'] = tmp['prenom'] +df.loc[tmp.index,'nom'] = tmp['nom'] +df['nom'] = [nom.replace('CEN38_','') for nom in df['nom']] +df['nom'] = [nom.replace('GENTIANA_','') for nom in df['nom']] +df['nom'] = [nom.replace('LPO38_','') for nom in df['nom']] +df['nom'] = [nom.replace('LPO_','') for nom in df['nom']] +df['nom'] = [nom.replace('CENRA_','') for nom in df['nom']] +df['nom'] = [nom.replace('GRPLS_','') for nom in df['nom']] + +rm_row = ['ISÈRE', 'Ecrin', 'FUR', 'BRIQUIR', 'BORGNE', '(FMBDS)', 'Isère', 'Rhône', '(Presonnel)', 'Monsieur', 'Batrachologique de France', '(Ecosphère)', +'PIC VERT Association', 'BAGOUSSE', '(BIOTOPE)', '(Dauphinelle)', 'Cras', '(GRPLS)', 'et Vie Sociale', '(ONCFS)', 'campagne sauvetage amphibiens'] +df = df[~df.prenom.isin(rm_row)] +tmp = pd.DataFrame(df.prenom.str.split(' ',1).tolist(), columns = ['nom','prenom'], index=df.index) +tmp = tmp[~tmp.prenom.isnull()] +tmp.drop(index=[100032,8628,8645,4238,8058,8070,8353,1099,1081,1323,1324], inplace=True) +df.loc[tmp.index,'nom'] = df.loc[tmp.index,'nom'] + ' ' + tmp['nom'] +df.loc[tmp.index,'prenom'] = tmp['prenom'] +tmp = pd.DataFrame( + df.loc[df.nom=='Abbé',['nom','prenom']].prenom.str.split(' ',1).tolist(), + columns = ['prenom','nom'], + index=df.loc[df.nom=='Abbé',['nom','prenom']].index) +tmp2 = df.loc[df.nom=='Abbé',['nom','prenom']] +df.loc[tmp.index,'nom'] = tmp['nom'] +df.loc[tmp.index,'prenom'] = tmp2['nom'] + ' ' + tmp['prenom'] +df.reset_index(inplace=True) +tmp = df.copy() +tmp = tmp[['id_personne','nom','prenom', 'id_structure']] +# update_data(tmp,con,sch='md',tab='personne') + +id_old = '8044' +id_new = '1000014' +sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{0}' OR validateur = {0};'''.format(id_old) +# sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;' +pd.read_sql( + sql=sql, + con=con +)[['observateur','validateur']] +# df[['prenom', 'nom']].values.tolist() + + +# sql = '''UPDATE saisie.saisie_observation SET validateur = 100077 +# WHERE validateur = 1110;''' +try: + con.execute(sql) + # con.execute(text(sql)) + print(''' + Update OK !''') +except Exception as exept: + print(exept) + + +tmpx = df.copy() +tmpx = tmpx[['id_personne','nom','prenom','id_structure']] +tutux = tmpx.drop_duplicates(['nom','prenom','id_structure'], keep=False) +tomx = tmpx[~tmpx.id_personne.isin(tutux.id_personne)] +tomx = tomx.sort_values('nom') + +for nom in tomx.nom.unique().tolist(): + tmp1 = tomx[tomx.nom==nom] + keep = max(tmp1.id_personne) + rep = min(tmp1.id_personne) + sql = '''UPDATE saisie.saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}';'''.format( + keep=keep, replace=rep + ) + # sql = '''UPDATE saisie.saisie_observation SET validateur = 100077 + # WHERE validateur = 1110;''' + try: + con.execute(sql) + # con.execute(text(sql)) + print(''' + Update OK !''') + except Exception as exept: + print(exept) + + sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format( + replace=rep + ) + # sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;' + test = pd.read_sql( + sql=sql, + con=con + )[['observateur','validateur']] + if test.empty: + print(''' OK for %s'''%nom) + + else: + print('ERROR !!!!!!!!!') + break + + + print(nom) + sql='''SELECT * FROM md.personne WHERE nom like '{nom}%';'''.format(nom=nom) + tutu = pd.read_sql( + sql=sql, + con=con + ) + if tutu.shape[0] > 1: + df + print(''' +{id_personne},{nom},{prenom},{id_structure}'''.format( + id_personne=id_personne,nom=nom,prenom=prenom,id_structure=id_structure)) + + + + + + + + + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from shapely.geometry import Point +import matplotlib.pyplot as plt +import contextily as cx +import geopandas as gpd +import pandas as pd +# Parametres bdd +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +df1 = pd.read_sql_table( + table_name = 'saisie_observation', + con = con, + schema = 'saisie', + index_col='id_obs') + +geometry = [Point(xy) for xy in zip(df1['longitude'], df['latitude'])] +crs = 'epsg:4326' +df = gpd.GeoDataFrame(df1, crs=crs, geometry=geometry) +df = df.to_crs(epsg=3857) +df_unkeep = df.drop_duplicates(keep=False) +df_clean = df.drop_duplicates(keep='first') + +df[~df.index.isin(df_clean.index)].sort_values('date_obs').index.values +df[~df.index.isin(df_unkeep.index)].sort_values('date_obs').index.values + +ax = df[~df.index.isin(df_unkeep.index)][['geometry']].plot() +cx.add_basemap(ax) +plt.show() + +df.sort_index(inplace=True) +data_drop = df[~df.index.isin(df.drop_duplicates(keep=False).index)] + +tmp = [i.split('&') for i in df['observateur'].unique() ] +lst_pers = [item for sublist in tmp for item in sublist] +df_pers = pd.DataFrame(lst_pers, columns=['id_pers']).drop_duplicates() + +sql='''SELECT * FROM md.personne WHERE id_personne IN ({ids});'''.format(ids=','.join(df_pers.id_pers)) +tutu = pd.read_sql( + sql=sql, + con=con +) +df_pers['id_pers'] = df_pers['id_pers'].astype(int) +df_pers[~df_pers.id_pers.isin(tutu.id_personne)] + + + + +tom = toto.copy() +tom = tom[tom.observateur.str.contains('&')] +tom['observateur'] = [idx.replace('1043','100086') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('6191','1000002') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('6051','1000013') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1070','1000011') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1088','100043') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('6045','1000004') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('6121','1000009') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('6355','1000003') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('8632','1000005') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1034','100007') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1110','100077') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1075','1000022') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1188','100038') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('8271','100017') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1049','100053') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('8328','100034') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1102','100062') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1016','1000001') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('1187','100037') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('8018','1000008') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('4233','100067') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('8254','1000012') for idx in tom['observateur']] +tom['observateur'] = [idx.replace('8015','100016') for idx in tom['observateur']] + +df.sort_index(inplace=True) +tom.sort_index(inplace=True) +df.loc[df.index.isin(tom.index), 'observateur'] = tom['observateur'] +df.reset_index(inplace=True) +tmp = df.copy() +tmp = tmp[['id_obs','observateur']] +update_data(tmp,con,sch='saisie',tab='saisie_observation') + + + + + +# Check user in suivi_saisie_observation existent tous! +from datetime import datetime as dt +df = pd.read_sql_table( + table_name = 'suivi_saisie_observation', + con = con, + schema = 'saisie', +) +df.sort_values('date_operation', ascending=False, inplace=True) +x = '2021-05-25 22:00:00' +y = '2021-05-21 12:30:00' +x = dt.strptime(x, '%Y-%m-%d %H:%M:%S') +y = dt.strptime(y, '%Y-%m-%d %H:%M:%S') + +ddf = df[(df.date_operation < x) & (df.date_operation > y)] +ddf = ddf[['operation','utilisateur', 'id_obs', 'observateur', 'validateur', 'numerisateur']] + +# Get all "id" observateur +tmp_obs = [i.split('&') for i in ddf['observateur'].dropna().unique() ] +lst_pers_obs = [item for sublist in tmp_obs for item in sublist] +tmp_val = [i.split('&') for i in ddf['validateur'].dropna().unique().astype(int).astype(str) ] +lst_pers_val = [item for sublist in tmp_val for item in sublist] +tmp_num = [i.split('&') for i in ddf['numerisateur'].dropna().unique().astype(str) ] +lst_pers_num = [item for sublist in tmp_num for item in sublist] +lst_pers = lst_pers_obs + lst_pers_val + lst_pers_num +df_pers = pd.DataFrame(lst_pers, columns=['id_pers']).drop_duplicates() + +# Check if personnes exist +pers_not_exist = [] +for i in df_pers.id_pers: + sql = 'SELECT id_personne,prenom, nom, id_structure FROM md.personne WHERE id_personne = %s;'%i + res = pd.read_sql( + sql = sql, + con = con + ) + if res.empty: + print(i) + pers_not_exist += [i] + + +sql = '''SELECT * FROM saisie.suivi_saisie_observation +WHERE utilisateur = 'inconnu' +AND date_operation < '2021-05-25 22:00:00' +AND date_operation > '2021-05-21 12:30:00';''' +pd.read_sql( + sql=sql, + con=con +) +sql = '''UPDATE saisie.suivi_saisie_observation SET utilisateur = 'colas.geier@cen-isere.org' +WHERE utilisateur = 'inconnu' +AND date_operation < '2021-05-25 22:00:00' +AND date_operation > '2021-05-21 12:30:00';''' +try: + con.execute(sql) + # con.execute(text(sql)) + print(''' +Update OK !''') +except Exception as exept: + print(exept) + + + +rep = '8044' +keep = '1000014' + +sql_check ='''SELECT * FROM saisie.saisie_observation WHERE observateur like '%{replace}%' OR validateur = {replace};'''.format( + replace=rep +) +sql_check ='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format( + replace=rep +) +sql_updt = ''' + UPDATE saisie.saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}'; + UPDATE saisie.saisie_observation SET numerisateur = {keep} WHERE numerisateur = {replace}; + '''.format( + keep=keep, replace=rep +) +# sql = '''UPDATE saisie.saisie_observation SET validateur = 100077 +# WHERE validateur = 1110;''' + +test = pd.read_sql( + sql=sql_check, + con=con +)[['observateur','validateur']] +if test.empty: + print(''' OK for %s in table saisie'''%nom) +else: + try: + con.execute(sql_updt) + # con.execute(text(sql)) + print(''' + Update OK !''') + except Exception as exept: + print(exept) + +sql_check2 ='''SELECT * FROM saisie.suivi_saisie_observation WHERE observateur = '{replace}' OR validateur = '{replace}';'''.format( + replace=rep +) +sql_updt2 = ''' + UPDATE saisie.suivi_saisie_observation SET observateur = '{keep}' WHERE observateur = '{replace}'; + UPDATE saisie.suivi_saisie_observation SET numerisateur = {keep} WHERE numerisateur = {replace}; + '''.format( + keep=keep, replace=rep +) +test2 = pd.read_sql( + sql=sql_check2, + con=con +)[['observateur','validateur']] +if test2.empty: + print(''' OK for %s in table saisie'''%nom) +else: + try: + con.execute(sql_updt2) + # con.execute(text(sql)) + print(''' + Update OK !''') + except Exception as exept: + print(exept) + + + + +sql='''SELECT * FROM saisie.saisie_observation WHERE observateur = '{replace}' OR validateur = {replace};'''.format( + replace=rep +) +# sql = 'SELECT nom, prenom FROM md.personne WHERE id_personne= 6191;' +test = pd.read_sql( + sql=sql, + con=con +)[['observateur','validateur']] +if test.empty: + print(''' OK for %s'''%nom) + +else: + print('ERROR !!!!!!!!!') + # break \ No newline at end of file diff --git a/1_SICEN/sicen2_extract.py b/1_SICEN/sicen2_extract.py new file mode 100644 index 0000000..907f285 --- /dev/null +++ b/1_SICEN/sicen2_extract.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +import geopandas as gpd +import pandas as pd +from geoalchemy2 import Geometry +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import sys + + +# Liste des sites +lst_site = ['GOUT'] + +# Suppression des champs souhaités +delete = True +lst_cols_del = ['geometrie','date_debut_obs'] + +# Format d'extraction ['xls', 'xlsx', 'csv'] +FORMAT = 'xlsx' +# Localistation du fichier de sortie +OUTPUT = '~/Documents/tmp/Jean_Luc/' +# Nom du fichier de sortie +NAME_OUT = 'observation_site_GOUT' + + +# Parametres bdd HOMER (out) +user_hom = 'cen_admin' +pwd_hom = '#CEN38@venir' +# user_hom = 'cgeier' +# pwd_hom = 'adm1n*sIcen' +adr_hom = '91.134.194.221' +port_hom = '5432' +base_hom = 'sicen2' +schema_hom = 'saisie' +table_hom = 'saisie_observation' +url_hom = URL.create('postgresql+psycopg2', + username=user_hom, + password=pwd_hom, + host=adr_hom, + database=base_hom, +) +engine_hom = create_engine(url_hom) +# engine_hom = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_hom,pwd_hom,adr_hom,port_hom,base_hom), echo=False) + +# Parametres bdd BART (in) +user_bart = 'cen_admin' +pwd_bart = '#CEN38@venir' +adr_bart = '192.168.0.3' +port_bart = '5432' +base_bart = 'bd_cen38' +schema_bart = 'sites' +table_bart = "c_sites_zonages" +url_bart = URL.create('postgresql+psycopg2', + username=user_bart, + password=pwd_bart, + host=adr_bart, + database=base_bart, +) +con_bart = create_engine(url_bart) +# con_bart = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_bart,pwd_bart,adr_bart,port_bart,base_bart), echo=False) + +epsg = '2154' +crs = 'EPSG:%s'%epsg + + + +sql = "SELECT geom FROM {sch}.{tab} WHERE code_site in ('{lst_site}') AND type_zonage = 'ZO'".format(sch=schema_bart, tab=table_bart, lst_site="','".join(lst_site)) +gdf_site = gpd.read_postgis( + sql = sql, + con = con_bart, +) +if gdf_site.shape[0] > 1 : + union = gdf_site.geom.cascaded_union + Sunion = gpd.GeoSeries(union) + gdf_union = gpd.GeoDataFrame(Sunion, geometry='geom', columns=['geom'], crs=crs) + gdf_site = gdf_union + + +sql = '''SELECT * FROM {sch}.{tab} WHERE ST_Intersects (geometrie, 'SRID={epsg};{poly}')'''.format(sch=schema_hom, tab=table_hom, epsg=epsg, poly=gdf_site.geom[0]) +gdf_saisie = gpd.read_postgis( + sql = sql, + con = engine_hom, + geom_col = 'geometrie' +) + +df = pd.DataFrame(gdf_saisie) +if delete: + df.drop(columns= lst_cols_del, inplace=True) + +if FORMAT in ['xls', 'xlsx']: + df.to_excel(OUTPUT + NAME_OUT + '.' + FORMAT) +if FORMAT == 'csv': + df.to_file(OUTPUT + NAME_OUT + '.' + FORMAT) + +sys.exit('END') \ No newline at end of file diff --git a/1_SICEN/taxonAbsentTaxref_TO_taxref.py b/1_SICEN/taxonAbsentTaxref_TO_taxref.py new file mode 100644 index 0000000..21b0063 --- /dev/null +++ b/1_SICEN/taxonAbsentTaxref_TO_taxref.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# -*- coding:UTF-8 -*- + +from sqlalchemy import create_engine #, text +from sqlalchemy.engine import URL +import pandas as pd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +sql = ''' + SELECT * FROM inpn.taxons_isere_absents_taxref + WHERE cd_nom::text NOT IN (SELECT cd_nom FROM inpn.taxref)''' +taxabs = pd.read_sql_query( + sql=sql,con=con +) +taxabs.cd_ref = taxabs.cd_ref.astype(int).astype(str) +taxabs.drop(columns=['id_taxon','source'], inplace=True) +taxabs.to_sql( + name='taxref', + con=con, + schema='inpn', + if_exists='append', + index=False +) \ No newline at end of file diff --git a/1_SICEN/update_codeInsee.py b/1_SICEN/update_codeInsee.py new file mode 100644 index 0000000..7a4915f --- /dev/null +++ b/1_SICEN/update_codeInsee.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +from pycen import update_to_sql +import pandas as pd +import geopandas as gpd +from sqlalchemy.sql.expression import column + + +nom_numerateur = 'GEIER' +pnom_numerator = 'Colas' +today = dt.now().date().isoformat() +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +# FONCTION +def get_numerator(nom,pnom,id_structure=1): + nom_numerateur = nom.upper() + pnom_numerator = pnom[0].upper() + pnom[1:] + query_get_numerator = """ + SELECT * FROM md.personne + WHERE nom = '%s' + AND prenom = '%s' + AND id_structure = %i + """ % (nom_numerateur,pnom_numerator,id_structure) + numerateur = pd.read_sql_query( + sql = query_get_numerator, + con = con + ) + return numerateur + +# SELECT OBSERVATION +sql = 'SELECT * FROM saisie.saisie_observation' +saisie = gpd.read_postgis( + sql, + con, + 'geometrie') +saisie.sort_values('id_obs', inplace=True) + +# SELECT COMMUNE +sql = 'SELECT * FROM ign_bd_topo.commune' +com = gpd.read_postgis( + sql, + con, + 'geometrie') + + +# JOINTURE spatiale +df = gpd.sjoin(saisie[['id_obs','code_insee','geometrie']],com[['code_insee','geometrie']],op='within') +df.sort_values('id_obs', inplace=True) +tmp = df[~df.code_insee_left.eq(df.code_insee_right)].copy() +tmp.rename(columns={'code_insee_right':'code_insee'}, inplace=True) +# UPDATE SET new code_insee +update_to_sql( + tmp[['id_obs','code_insee']], + con=con, + table_name='saisie_observation', + schema_name='saisie', + key_name='id_obs', + geom_col='geometrie' +) + + +# UPDATE utilisateur OF saisie.suivi_saisie_observation +numerateur = get_numerator(nom_numerateur,pnom_numerator) +update_structure_name = """ +UPDATE saisie.suivi_saisie_observation a +SET utilisateur = REPLACE(utilisateur, 'inconnu', '{email_numerator}') +WHERE date_operation > '{today}' +; +""".format( + email_numerator=numerateur.email.item(), + today = today + ) +with con.begin() as cnx: + cnx.execute(update_structure_name) \ No newline at end of file diff --git a/1_SICEN/update_dataONsicen.py b/1_SICEN/update_dataONsicen.py new file mode 100644 index 0000000..14600ad --- /dev/null +++ b/1_SICEN/update_dataONsicen.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from sqlalchemy import create_engine +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import pycen + + + +# Parametres bdd +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +port = '5432' +base = 'sicen2' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +sch = 'saisie' +tab = 'saisie_observation' +ids = 'id_obs' +col_updt = 'phenologie' + +sql = """ +SELECT + {ids}, + {col} +FROM {sch}.{tab} +WHERE {col} = 'Inderterminé' +""".format(sch=sch,tab=tab,ids=ids,col=col_updt) +df = pd.read_sql_query(sql,con) +df[col_updt] = 'Indéterminé' +pycen.update_to_sql(df,con,tab,sch,ids,geom_col=None) + + +tab_suivi = 'suivi_saisie_observation' +ids_suivi = ['operation','date_operation','id_obs'] +col_updtSuivi = 'utilisateur' +date = dt.today().date().isoformat() +sql = """ +SELECT + {ids}, + {col} +FROM {sch}.{tab} +WHERE {col} = 'inconnu' + AND date_operation > '{date}' +""".format(sch=sch,tab=tab_suivi,ids=','.join(ids_suivi),col=col_updtSuivi, date=date) +df = pd.read_sql_query(sql,con) +df[col_updtSuivi] = 'colas.geier@cen-isere.org' +pycen.update_to_sql( + df, + con, + table_name=tab_suivi, + schema_name=sch, + key_name = ids_suivi, + geom_col=None) \ No newline at end of file diff --git a/1_SICEN/update_useroperator_suivisaisie.py b/1_SICEN/update_useroperator_suivisaisie.py new file mode 100644 index 0000000..8b86c35 --- /dev/null +++ b/1_SICEN/update_useroperator_suivisaisie.py @@ -0,0 +1,43 @@ +from pycen import con_sicen as con +import pandas as pd +from datetime import datetime as dt + +# FONCTION +def get_numerator(nom,pnom,id_structure=1): + nom_numerateur = nom.upper() + pnom_numerator = pnom[0].upper() + pnom[1:] + query_get_numerator = """ + SELECT * FROM md.personne + WHERE nom = '%s' + AND prenom = '%s' + AND id_structure = %i + """ % (nom_numerateur,pnom_numerator,id_structure) + numerateur = pd.read_sql_query( + sql = query_get_numerator, + con = con + ) + return numerateur + + +if __name__ == "__main__": + + nom_numerateur = 'GEIER' + pnom_numerator = 'Colas' + today = dt.now().date().isoformat() + + # UPDATE utilisateur OF saisie.suivi_saisie_observation + numerateur = get_numerator(nom_numerateur,pnom_numerator) + + update_structure_name = """ + UPDATE saisie.suivi_saisie_observation a + SET utilisateur = REPLACE(utilisateur, 'inconnu', '{email_numerator}') + WHERE date_operation > '{today}' + ; + """.format( + email_numerator=numerateur.email.item(), + today = today + ) + + with con.begin() as cnx: + cnx.execute(update_structure_name) + print('END UPDATE utilisateur OF saisie.suivi_saisie_observation') \ No newline at end of file diff --git a/1_SICEN/v_saisie_.py b/1_SICEN/v_saisie_.py new file mode 100644 index 0000000..dd7878f --- /dev/null +++ b/1_SICEN/v_saisie_.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import geopandas as gpd +from pycen import con_sicen + +drop_v_saisie_ = "DROP VIEW IF EXISTS saisie.v_saisie_observation" +v_saisie_observation = """ +CREATE OR REPLACE VIEW saisie.v_saisie_observation +AS WITH auteur AS ( + SELECT p_1.id_personne, + CASE + WHEN p_1.prenom ~~ '(%%'::text THEN p_1.nom + ELSE COALESCE((p_1.nom || ' '::text) || p_1.prenom) + END AS personne, + s_1.nom_structure AS structure + FROM md.personne p_1 + JOIN md.structure s_1 USING (id_structure) +) +SELECT s.id_obs, + s.date_obs, + s.date_debut_obs, + s.date_fin_obs, + s.date_textuelle, + s.regne, + s.nom_vern, + s.nom_complet, + s.cd_nom, + t.cd_ref, + s.effectif_textuel, + s.effectif_min, + s.effectif_max, + CASE + WHEN s.regne = 'Plantae'::text THEN s.type_effectif + ELSE NULL::text + END AS strate_flore, + CASE + WHEN s.regne = 'Plantae'::text THEN s.phenologie + ELSE NULL::text + END AS phenologie_flore, + CASE + WHEN s.regne = 'Animalia'::text THEN s.type_effectif + ELSE NULL::text + END AS age_faune, + CASE + WHEN s.regne = 'Animalia'::text THEN s.phenologie + ELSE NULL::text + END AS sexe_faune, + s.id_waypoint, + s.longitude, + s.latitude, + s.localisation, + md.liste_nom_auteur(s.observateur::character varying) AS observateur, + a1.personne AS numerisateur, + a2.personne AS validateur, + md.liste_nom_structure(s.structure::character varying) AS structure, + s.remarque_obs, + s.code_insee, + s.id_lieu_dit, + s.diffusable, + s."precision", + s.statut_validation, + e.nom_etude AS etude, + p.libelle AS protocole, + s.effectif, + s.url_photo, + s.commentaire_photo, + s.decision_validation, + s.heure_obs, + s.determination, + s.elevation, + s.geometrie, + s.phylum, + s.classe, + s.ordre, + s.famille, + s.nom_valide, + s.qualification, + s.reprostatut AS reprostatut_faune, + s.obs_null, + s.uuid, + ld.libelle AS lot_donnee, + s.id_origine +FROM saisie.saisie_observation s + LEFT JOIN auteur a1 ON a1.id_personne = s.numerisateur + LEFT JOIN auteur a2 ON a2.id_personne = s.validateur + LEFT JOIN md.etude e ON s.id_etude = e.id_etude + LEFT JOIN md.protocole p ON s.id_protocole = p.id_protocole + LEFT JOIN md.lot_donnee ld ON s.id_lot = ld.id_lot + LEFT JOIN inpn.taxref t using (cd_nom) +; +""" + +grant_v_saisie = """ +ALTER TABLE saisie.v_saisie_observation OWNER TO cgeier; +GRANT ALL ON TABLE saisie.v_saisie_observation TO cgeier; +""" + +with con_sicen.begin() as cnx: + cnx.execute(drop_v_saisie_) + cnx.execute(v_saisie_observation) + cnx.execute(grant_v_saisie) + + +drop_vm_synthese = "DROP MATERIALIZED VIEW IF EXISTS saisie.vm_synthese_observations CASCADE;" +vm_synthese_observations = """ +-- saisie.vm_synthese_observations source + +CREATE MATERIALIZED VIEW saisie.vm_synthese_observations +TABLESPACE pg_default +AS WITH + observateurs AS ( + SELECT personne.id_personne, + (personne.nom || ' '::text) || personne.prenom AS nom_complet, + personne.role, + personne.id_structure + FROM md.personne + ), + structures AS ( + SELECT structure.id_structure, + structure.nom_structure, + structure.diffusable + FROM md.structure + ) +SELECT DISTINCT s.id_obs, + e.nom_etude AS etude, + p.libelle AS protocole, + ld.libelle AS lot_donnee, + s.date_obs, + s.date_debut_obs, + s.date_fin_obs, + s.heure_obs, + CASE + WHEN tx.regne IS NOT NULL THEN tx.regne + ELSE ta.regne + END AS regne, + --tx.phylum, + CASE + WHEN tx.phylum IS NOT NULL THEN tx.phylum + ELSE ta.phylum + END AS phylum, + --tx.classe, + CASE + WHEN tx.classe IS NOT NULL THEN tx.classe + ELSE ta.classe + END AS classe, + --tx.ordre, + CASE + WHEN tx.ordre IS NOT NULL THEN tx.ordre + ELSE ta.ordre + END AS ordre, + tx.famille, + CASE + WHEN tx.group1_inpn IS NOT NULL THEN tx.group1_inpn + ELSE ta.group1_inpn + END AS group1_inpn, + CASE + WHEN tx.group2_inpn IS NOT NULL THEN tx.group2_inpn + ELSE ta.group2_inpn + END AS group2_inpn, + CASE + WHEN tx.lb_nom IS NOT NULL THEN tx.lb_nom + ELSE ta.lb_nom + END AS nom_latin, + CASE + WHEN tx.nom_vern IS NOT NULL THEN tx.nom_vern + ELSE ta.lb_nom + END AS nom_vern, + CASE + WHEN tx.nom_complet IS NOT NULL THEN tx.nom_complet + ELSE ta.nom_complet + END AS nom_complet, + CASE + WHEN tx.cd_nom IS NOT NULL THEN tx.cd_nom + ELSE ta.cd_nom + END AS cd_nom, + CASE + WHEN tx.cd_ref IS NOT NULL THEN tx.cd_ref + ELSE ta.cd_ref + END AS cd_ref, + --s.cd_nom, + --t.cd_ref, + CASE + WHEN s.obs_null IS TRUE THEN 'oui'::text + ELSE 'non'::text + END::character(3) AS absence_observation, + s.effectif, + s.effectif_min, + s.effectif_max, + s.effectif_textuel, + CASE + WHEN s.regne ~~ 'Plantae'::text THEN s.type_effectif + ELSE NULL::text + END::saisie.enum_type_effectif AS strate_flore, + CASE + WHEN s.regne ~~ 'Plantae'::text THEN s.phenologie + ELSE NULL::text + END::saisie.enum_phenologie AS phenologie_flore, + CASE + WHEN s.regne ~~ 'Animalia'::text THEN s.type_effectif + ELSE NULL::text + END::saisie.enum_age AS age_faune, + CASE + WHEN s.regne ~~ 'Animalia'::text THEN s.phenologie + ELSE NULL::text + END::saisie.enum_sexe AS sexe_faune, + s.reprostatut AS reprostatut_faune, + s.determination, + s.remarque_obs AS rmq_observation, + md.liste_nom_auteur(s.observateur::character varying) AS observateurs, + md.liste_nom_auteur_structure(s.observateur::character varying) AS observateurs_v2, + num.nom_complet AS numerisateur, + md.liste_nom_structure(s.structure::character varying) AS structures, + CASE + WHEN s.diffusable IS TRUE THEN 'oui'::text + ELSE 'non'::text + END::character(3) AS diffusable, + s.statut_validation, + val.nom_complet AS validateur, + s.decision_validation, + s.code_insee AS insee_commune, + c.nom AS nom_commune, + l.nom AS lieu_dit, + s.elevation AS altitude_z, + s.longitude AS longitude_x, + s.latitude AS latitude_y, + s."precision", + s.localisation AS rmq_localisation, + s.id_origine, + s.geometrie::geometry(Point,2154) AS geom + FROM saisie.saisie_observation s + LEFT JOIN inpn.taxref t ON s.cd_nom = t.cd_nom + LEFT JOIN inpn.taxons_isere_absents_taxref ta ON s.cd_nom = ta.cd_nom::text + LEFT JOIN ref_inpn_taxref.taxref_v11 tx ON s.cd_nom = tx.cd_nom::text + JOIN md.etude e ON s.id_etude = e.id_etude + JOIN md.protocole p ON s.id_protocole = p.id_protocole + LEFT JOIN md.lot_donnee ld ON s.id_lot = ld.id_lot + LEFT JOIN observateurs num ON s.numerisateur = num.id_personne + LEFT JOIN observateurs val ON s.validateur = val.id_personne + LEFT JOIN ign_bd_topo.commune c ON s.code_insee = c.code_insee + LEFT JOIN ign_bd_topo.lieu_dit l ON s.id_lieu_dit = l.id +WITH DATA; + +-- View indexes: +CREATE INDEX idx_vm_synthese_observations_date_debut_obs ON saisie.vm_synthese_observations USING btree (date_debut_obs); +CREATE INDEX idx_vm_synthese_observations_date_fin_obs ON saisie.vm_synthese_observations USING btree (date_fin_obs); +CREATE INDEX idx_vm_synthese_observations_date_obs ON saisie.vm_synthese_observations USING btree (date_obs); +CREATE INDEX idx_vm_synthese_observations_group1_inpn ON saisie.vm_synthese_observations USING btree (group1_inpn); +CREATE INDEX idx_vm_synthese_observations_group2_inpn ON saisie.vm_synthese_observations USING btree (group2_inpn); +CREATE UNIQUE INDEX idx_vm_synthese_observations_id_obs ON saisie.vm_synthese_observations USING btree (id_obs); +CREATE INDEX idx_vm_synthese_observations_nom_commune ON saisie.vm_synthese_observations USING btree (nom_commune); +CREATE INDEX idx_vm_synthese_observations_nom_latin ON saisie.vm_synthese_observations USING btree (nom_latin); +CREATE INDEX idx_vm_synthese_observations_observateurs ON saisie.vm_synthese_observations USING btree (observateurs); +CREATE INDEX idx_vm_synthese_observations_structures ON saisie.vm_synthese_observations USING btree (structures); +CREATE INDEX sidx_vm_synthese_observations_geom ON saisie.vm_synthese_observations USING gist (geom); + + +COMMENT ON MATERIALIZED VIEW saisie.vm_synthese_observations IS 'Vue matérialisée de synthèse des données d''observations issuent de la table "saisie.saise_observation" et reformattée pour faciliter son utilisation dans QGIS. La vue est rafraîchie toutes les heures avec un script PSQL executer en SHELL via une tâche planifiée CRON côté serveur. Cette vue matérialisée peut également être rafraîchi manuellement avec la commande : REFRESH MATERIALIZED VIEW CONCURRENTLY'; +""" + +grant_vm_synthese = """ +-- Permissions + +ALTER TABLE saisie.vm_synthese_observations OWNER TO admin_ra; +GRANT ALL ON TABLE saisie.vm_synthese_observations TO admin_ra; +GRANT SELECT ON TABLE saisie.vm_synthese_observations TO grp_consult; +GRANT ALL ON TABLE saisie.vm_synthese_observations TO grp_admin; +GRANT SELECT ON TABLE saisie.vm_synthese_observations TO sicen2_gr_consult; +""" +with con_sicen.begin() as cnx: + cnx.execute(drop_vm_synthese) + cnx.execute(vm_synthese_observations) + cnx.execute(grant_vm_synthese) \ No newline at end of file diff --git a/1_SICEN/v_synthese[pole].py b/1_SICEN/v_synthese[pole].py new file mode 100644 index 0000000..4536998 --- /dev/null +++ b/1_SICEN/v_synthese[pole].py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import geopandas as gpd +from pycen import con_sicen + +drop_v_inv = "DROP VIEW IF EXISTS saisie.v_synthese_invertebre" +v_synthese_invertebre = """ +CREATE OR REPLACE VIEW saisie.v_synthese_invertebre +AS SELECT + vm.id_obs, + vm.etude, + vm.protocole, + vm.lot_donnee, + vm.date_obs::date, + vm.date_debut_obs::date, + vm.date_fin_obs::date, + vm.heure_obs::time, + vm.regne, + vm.classe, + vm.ordre, + vm.famille, + vm.group1_inpn, + vm.group2_inpn, + vm.nom_latin, + vm.nom_vern, + vm.nom_complet, + vm.cd_nom, + vm.cd_ref, + vm.absence_observation, + vm.effectif, + vm.effectif_min, + vm.effectif_max, + vm.effectif_textuel, + vm.strate_flore, + vm.phenologie_flore, + vm.age_faune, + vm.sexe_faune, + vm.reprostatut_faune, + vm.determination, + vm.rmq_observation, + vm.observateurs, + vm.numerisateur, + vm.structures, + vm.diffusable, + vm.statut_validation, + vm.validateur, + vm.decision_validation, + vm.insee_commune, + vm.nom_commune, + vm.lieu_dit, + vm.altitude_z, + vm.longitude_x, + vm.latitude_y, + vm."precision", + vm.rmq_localisation, + vm.geom, + so.id_origine +FROM saisie.vm_synthese_observations vm + JOIN ref_inpn_taxref.taxref_v11 tax ON vm.cd_nom::integer = tax.cd_nom + JOIN saisie.saisie_observation so USING (id_obs) +WHERE tax.regne = 'Animalia'::text AND tax.phylum <> 'Chordata'::text OR (vm.cd_nom = ANY (ARRAY['9999024'::text, '9999025'::text, '9999032'::text, '9999055'::text, '9999061'::text, '9999062'::text, '9999069'::text, '9999036'::text, '9999040'::text, '9999072'::text, '9999045'::text, '9999047'::text, '9999051'::text, '9999052'::text, '9999053'::text, '9999081'::text, '9999026'::text, '9999050'::text, '9999048'::text, '9999037'::text, '9999066'::text, '9999065'::text, '9999080'::text])); +""" +grant_v_inv = """ +ALTER TABLE saisie.v_synthese_invertebre OWNER TO cen_admin; +GRANT ALL ON TABLE saisie.v_synthese_invertebre TO cen_admin; +GRANT SELECT ON TABLE saisie.v_synthese_invertebre TO grp_consult; +GRANT ALL ON TABLE saisie.v_synthese_invertebre TO grp_admin; +""" +with con_sicen.begin() as cnx: + cnx.execute(drop_v_inv) + cnx.execute(v_synthese_invertebre) + cnx.execute(grant_v_inv) + + +drop_v_vert = "DROP VIEW IF EXISTS saisie.v_synthese_vertebre" +v_synthese_vertebre = """ +CREATE OR REPLACE VIEW saisie.v_synthese_vertebre +AS SELECT + vm.id_obs, + vm.etude, + vm.protocole, + vm.lot_donnee, + vm.date_obs::date, + vm.date_debut_obs::date, + vm.date_fin_obs::date, + vm.heure_obs::time, + vm.regne, + vm.classe, + vm.ordre, + vm.famille, + vm.group1_inpn, + vm.group2_inpn, + vm.nom_latin, + vm.nom_vern, + vm.nom_complet, + vm.cd_nom, + vm.cd_ref, + vm.absence_observation, + vm.effectif, + vm.effectif_min, + vm.effectif_max, + vm.effectif_textuel, + vm.strate_flore, + vm.phenologie_flore, + vm.age_faune, + vm.sexe_faune, + vm.reprostatut_faune, + vm.determination, + vm.rmq_observation, + vm.observateurs, + vm.numerisateur, + vm.structures, + vm.diffusable, + vm.statut_validation, + vm.validateur, + vm.decision_validation, + vm.insee_commune, + vm.nom_commune, + vm.lieu_dit, + vm.altitude_z, + vm.longitude_x, + vm.latitude_y, + vm."precision", + vm.rmq_localisation, + vm.geom, + so.id_origine +FROM saisie.vm_synthese_observations vm + JOIN ref_inpn_taxref.taxref_v11 tax ON vm.cd_nom::integer = tax.cd_nom + JOIN saisie.saisie_observation so USING (id_obs) +WHERE tax.regne = 'Animalia'::text AND tax.phylum = 'Chordata'::text OR (vm.cd_nom = ANY (ARRAY['9999042'::text, '9999028'::text, '9999029'::text, '9999056'::text, '9999058'::text, '9999067'::text, '9999068'::text, '9999030'::text, '9999031'::text, '9999034'::text, '9999034'::text, '9999035'::text, '9999035'::text, '9999038'::text, '9999039'::text, '9999070'::text, '9999073'::text, '9999057'::text, '9999054'::text, '9999049'::text, '9999022'::text, '9999027'::text, '9999043'::text, '9999044'::text, '9999046'::text, '9999041'::text, '9999033'::text, '9999071'::text, '9999064'::text, '9999063'::text, '9999060'::text, '9999059'::text, '9999074'::text, '9999023'::text, '9999082'::text, '9999083'::text])); +""" +grant_v_vert = """ +ALTER TABLE saisie.v_synthese_vertebre OWNER TO cen_admin; +GRANT ALL ON TABLE saisie.v_synthese_vertebre TO cen_admin; +GRANT SELECT ON TABLE saisie.v_synthese_vertebre TO grp_consult; +GRANT ALL ON TABLE saisie.v_synthese_vertebre TO grp_admin; +""" +with con_sicen.begin() as cnx: + cnx.execute(drop_v_vert) + cnx.execute(v_synthese_vertebre) + cnx.execute(grant_v_vert) + + +drop_v_flo = "DROP VIEW IF EXISTS saisie.v_synthese_flore" +v_synthese_flore = """ +CREATE OR REPLACE VIEW saisie.v_synthese_flore +AS SELECT + vm.id_obs, + vm.etude, + vm.protocole, + vm.lot_donnee, + vm.date_obs::date, + vm.date_debut_obs::date, + vm.date_fin_obs::date, + vm.heure_obs::time, + vm.regne, + vm.classe, + vm.ordre, + vm.famille, + vm.group1_inpn, + vm.group2_inpn, + vm.nom_latin, + vm.nom_vern, + vm.nom_complet, + vm.cd_nom, + vm.cd_ref, + vm.absence_observation, + vm.effectif, + vm.effectif_min, + vm.effectif_max, + vm.effectif_textuel, + vm.strate_flore, + vm.phenologie_flore, + vm.age_faune, + vm.sexe_faune, + vm.reprostatut_faune, + vm.determination, + vm.rmq_observation, + vm.observateurs, + vm.numerisateur, + vm.structures, + vm.diffusable, + vm.statut_validation, + vm.validateur, + vm.decision_validation, + vm.insee_commune, + vm.nom_commune, + vm.lieu_dit, + vm.altitude_z, + vm.longitude_x, + vm.latitude_y, + vm."precision", + vm.rmq_localisation, + vm.geom, + so.id_origine +FROM saisie.vm_synthese_observations vm + JOIN ref_inpn_taxref.taxref_v11 tax ON vm.cd_nom::integer = tax.cd_nom + JOIN saisie.saisie_observation so USING (id_obs) +WHERE tax.regne <> 'Animalia'::text OR (vm.cd_nom = ANY (ARRAY['9999008'::text, '9999003'::text, '9999005'::text, '9999006'::text, '9999004'::text, '9999009'::text, '9999010'::text, '9999011'::text, '9999013'::text, '9999014'::text, '9999015'::text, '9999016'::text, '9999017'::text, '9999018'::text, '9999019'::text, '9999020'::text, '9999021'::text, '9999007'::text, '9999012'::text, '9999001'::text, '9999002'::text])); +""" +grant_v_flo = """ +ALTER TABLE saisie.v_synthese_flore OWNER TO cen_admin; +GRANT ALL ON TABLE saisie.v_synthese_flore TO cen_admin; +GRANT SELECT ON TABLE saisie.v_synthese_flore TO grp_consult; +GRANT ALL ON TABLE saisie.v_synthese_flore TO grp_admin; +""" +with con_sicen.begin() as cnx: + cnx.execute(drop_v_flo) + cnx.execute(v_synthese_flore) + cnx.execute(grant_v_flo) diff --git a/2_MEDWET/0_BROUILLONS/medwet.py b/2_MEDWET/0_BROUILLONS/medwet.py new file mode 100644 index 0000000..2c6ba38 --- /dev/null +++ b/2_MEDWET/0_BROUILLONS/medwet.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : medwet.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import pandas as pd +import numpy as np +from pycen import bdd +from sqlalchemy import create_engine + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '192.168.0.3' +base = 'bd_cen38' +schema = 'zh' +table = 'cr_cen38_zh_medwet_v2021' + +dict_col = [{'criete_delimit': 'critere_delim'}] + +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,'bd_cen_new'), echo=False) +# Connexion bdd +bd = bdd.CEN( + user = user, + pwd = pwd, + adr = adr, + base = base + # schema = schema + ) + +df = bd.get_table( + schema = schema, + table = table) + + +##### critere_delimitation +critere_delimitation = pd.DataFrame(df.criete_delimit) +# remplacement d'une chaine de caractère +critere_delimitation.criete_delimit = critere_delimitation.criete_delimit.str.replace('critère de délimitation ZH : ', '') +# elimination des lignes vides +critere_delimitation.dropna(inplace = True) +# split des champs regroupant plusieurs infos et concaténation +critere_delimitation.criete_delimit = pd.concat( + [pd.Series(row['criete_delimit'].split(' // ')) for _, row in critere_delimitation.iterrows()] +).reset_index(drop=True) +liste_critere_delim = pd.DataFrame(critere_delimitation.criete_delimit.unique(), columns=['caracteristique']) + +liste_critere_delim = liste_critere_delim.append(pd.DataFrame(['tutu'], columns=['caracteristique'])) +liste_critere_delim.reset_index(inplace=True, drop=True) + +liste_critere_delim.to_sql(name='zh_delimitation', con=con, schema='zone_humide', index=False, if_exists='append') + + +##### regime_hydrique_entree +regime_hydrique_entree = pd.DataFrame(df.regime_hydrique_entree) +regime_hydrique_entree.dropna(inplace = True) +regime_hydrique_entree.regime_hydrique_entree = pd.concat( + [pd.Series(row['regime_hydrique_entree'].split(' // ')) for _, row in regime_hydrique_entree.iterrows()] +).reset_index(drop=True) +regime_hydrique_entree.reset_index(drop=True, inplace=True) +regime_hydrique_entree[['ecoulement_entree', 'toponymie', 'temporalite']] = regime_hydrique_entree.regime_hydrique_entree.str.split(' ; ', 2, expand=True) +# regime_hydrique_entree[['toponymie', 'temporalite']] = regime_hydrique_entree.temporalite.str.split(' ; ', 1, expand=True) +regime_hydrique_entree.drop(columns='regime_hydrique_entree', inplace = True) +regime_hydrique_entree.ecoulement_entree = regime_hydrique_entree.ecoulement_entree.str.replace('Entrée d\'eau : ', '') +regime_hydrique_entree.toponymie = regime_hydrique_entree.toponymie.str.replace('Toponymie : ', '') +regime_hydrique_entree.temporalite = regime_hydrique_entree.temporalite.str.replace('Permanence : ', '') +regime_hydrique_entree.ecoulement_entree.unique() +regime_hydrique_entree.toponymie.unique() +regime_hydrique_entree.temporalite.unique() + + +##### regime_hydrique_sortie +regime_hydrique_sortie = pd.DataFrame(df.regime_hydrique_sortie) +regime_hydrique_entree.dropna(inplace = True) +regime_hydrique_sortie.regime_hydrique_sortie = pd.concat( + [pd.Series(row['regime_hydrique_sortie'].split(' // ')) for _, row in regime_hydrique_sortie.iterrows()] +).reset_index(drop=True) +regime_hydrique_sortie['ecoulement'], regime_hydrique_sortie['toponymie'], regime_hydrique_sortie['permanence'] + + +regime_hydrique_freq = pd.DataFrame(df.regime_hydrique_freq) +regime_hydrique_origine = pd.DataFrame(df.regime_hydrique_orig) +regime_hydrique_etendue = pd.DataFrame(df.regime_hydrique_etendue) \ No newline at end of file diff --git a/2_MEDWET/0_BROUILLONS/recup_zh.py b/2_MEDWET/0_BROUILLONS/recup_zh.py new file mode 100644 index 0000000..4c786c3 --- /dev/null +++ b/2_MEDWET/0_BROUILLONS/recup_zh.py @@ -0,0 +1,2256 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : recup_zh.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import re +import pandas as pd +import pandas_access as mdb +import numpy as np +from sqlalchemy.sql.expression import column +from pycen import bdd +from sqlalchemy import create_engine +from geoalchemy2 import Geometry + + + +isin_bdd = True + +# Parametres bdd IN +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '192.168.0.189' +base = 'bd-cen-38' +schema = 'zh' +table = 'cr_cen38_zh_medwet_v2021' +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False) +bd = bdd.CEN( + user = user, + pwd = pwd, + adr = adr, + base = base + # schema = schema + ) + +# Parametres bdd OUT +user_zh = 'postgres' +pwd_zh = 'tutu' +adr_zh = '192.168.60.10' +base_zh = 'bd_cen' +con_zh = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user_zh,pwd_zh,adr_zh,base_zh), echo=False) + +# Read MS access database +db_file1 = '/home/colas/Documents/5_BDD/ZONES_HUMIDES/MEDWET_v1.mdb' +db_file2 = '/home/colas/Documents/5_BDD/ZONES_HUMIDES/MEDWET_V2.mdb' +df_med1 = mdb.read_table(db_file1, "SITEINFO") +df_med2 = mdb.read_table(db_file2, "SITEINFO") + +# FILE = db_file2 +# for tab in mdb.list_tables(FILE): +# if tab not in ['SIG', 'List', 'Switchboard', 'Items'] and not tab.startswith(('DicGen','DIcGen')): +# # df = mdb.read_table(FILE, tab, keep_default_na=False,skipinitialspace=True) +# df = mdb.read_table(FILE, tab) +# if 'SITE_COD' in df.columns or 'SIT_COD' in df.columns: +# print(tab) + +# FILE = db_file2 +# for tab in mdb.list_tables(FILE): +# if tab not in ['SIG', 'List', 'Switchboard', 'Items'] and not tab.startswith(('DicGen','DIcGen')): +# # df = mdb.read_table(FILE, tab, keep_default_na=False,skipinitialspace=True) +# df = mdb.read_table(FILE, tab) +# if 'ORG' in df.columns: +# print(tab) + +df = bd.get_table( + schema = schema, + table = table) +df.sort_values('site_code', inplace=True) +df.auteur_fiche.fillna('Inconnu', inplace=True) +df[['auteur_fiche','auteur_fiche_remarque']] = df[['auteur_fiche','auteur_fiche_remarque']].replace( + ['Biron N.','BIRON N\.','Balmain C.','Feuvrier B.','Souvignet N.','Billard G.','BELLUT', 'C. Balmain','E. JOURDAN','E. Jordan','Juton M.','P. Bellut', 'Folgar H.',], + ['BIRON Nicolas','BIRON Nicolas','BALMAIN Céline','FEUVRIER Benoit','SOUVIGNET Nicolas','BILLARD Gilbert','BELLUT P.','BALMAIN Céline','JOURDAN Elise','JOURDAN Elise','JUTON Mathieu','BELLUT P.','FOGLAR Hélène'], + regex=True) +# Récupération des structures +df_structure = pd.DataFrame( + df['organisme_auteur'].drop_duplicates(), ) +df_structure.rename(columns={'organisme_auteur':'nom'}, inplace=True) +# df_structure.drop( +# labels=[ +# # 1092,748,1088, +# 17], +# axis=0, +# inplace=True) +df_structure['nom_autres'] = None +df_structure.loc[df_structure.nom == 'Acer campestre', 'nom_autres'] = 'ACER CAMPESTRE' +df_structure.loc[df_structure.nom == 'FRAPNA Isère', 'nom_autres'] = 'Asso. FRAPNA' +df_structure.loc[df_structure.nom == 'DRAC NATURE', 'nom_autres'] = 'Asso. Drac Nature' +df_structure.loc[df_structure.nom == 'Comité Gère vivante', 'nom_autres'] = 'Asso. GERE VIVANTE' +df_structure.nom.fillna('Inconnu', inplace=True) +df_structure.reset_index(inplace=True, drop=True) +# Envoie des structures en bdd +if not isin_bdd: + df_structure['nom'].to_sql( + name='organisme', + con = con_zh, + schema='personnes', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Correction des structures dans le df global +df.organisme_auteur.fillna('Inconnu', inplace=True) +for d,j in df_structure[~df_structure.nom_autres.isna()].iterrows(): + df.loc[df.organisme_auteur==j.nom_autres, 'organisme_auteur'] = j.nom + + +# Récupération des personnes +df_pers = df[['auteur_fiche', 'organisme_auteur']].drop_duplicates() +df_pers.auteur_fiche.fillna('Inconnu', inplace=True) +tmp = [i.split('&') for i in df_pers['auteur_fiche'].dropna().unique() ] +lst_pers = [item for sublist in tmp for item in sublist] +tmp = pd.DataFrame(data=lst_pers, columns=['nom_prenom']) +tmp['nom_prenom'] = tmp.nom_prenom.str.strip() +tmp[['nom','prenom','autre']] = tmp['nom_prenom'].str.split(' ', 2, expand=True) +for i,j in tmp[~tmp.autre.isna()].iterrows(): + tmp.loc[tmp.nom==j.nom, 'nom'] = j.nom + ' ' + j.prenom + tmp.loc[tmp.autre==j.autre, 'prenom'] = j.autre +tmp.drop(columns=['nom_prenom', 'autre'], inplace=True) +tmp['organisme'] = None +for nom in tmp.nom: + orga = df_pers.loc[df_pers.auteur_fiche.str.contains(nom),'organisme_auteur'] + orga = orga.unique() + tmp.loc[tmp.nom == nom,'organisme'] = orga[0] +tmp['id_organisme'] = tmp['organisme'] +tmp['id_organisme'] = tmp['id_organisme'].replace(df_structure.nom.to_list(),df_structure.index.to_list()) +tmp.nom = tmp.nom.str.upper() +tmp.drop_duplicates(inplace=True) +df_pers = tmp +df_pers.drop(columns='organisme', inplace=True) +df_pers.reset_index(inplace=True, drop=True) +# Envoie des personnes en bdd +if not isin_bdd: + df_pers.to_sql( + name='personne', + con = con_zh, + schema='personnes', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Correction des personnes dans le df global +df_pers = pd.read_sql_table( + table_name='personne', + con = con_zh, + schema='personnes', + index_col='id', +) +# df.auteur_fiche.fillna('Inconnu', inplace=True) +# df[['auteur_fiche','auteur_fiche_remarque']] = df[['auteur_fiche','auteur_fiche_remarque']].replace( +# ['Biron N.','Balmain C.','Feuvrier B.','Souvignet N.','Billard G.','BELLUT', 'C. Balmain','E. JOURDAN','E. Jordan','Juton M.','P. Bellut' ], +# ['BIRON Nicolas','BALMAIN Céline','FEUVRIER Benoit','SOUVIGNET Nicolas','BILLARD Gilbert','BELLUT P.','BALMAIN Céline','JOURDAN Elise','JOURDAN Elise','JUTON Mathieu','BELLUT P.'], +# regex=True) +df_pers['nom_prenom'] = df_pers.nom.str[0] + df_pers.nom.str[1:].str.lower() + ' ' + df_pers.prenom +for pers in df_pers.nom_prenom.dropna(): + val = df_pers[df_pers.nom_prenom == pers].index[0] + val = str(val) + df.auteur_fiche = df.auteur_fiche.str.replace(pers,val, regex=True) +df_pers['nom_prenom'] = df_pers.nom + ' ' + df_pers.prenom +for pers in df_pers.nom_prenom.dropna(): + val = df_pers[df_pers.nom_prenom == pers].index[0] + val = str(val) + df.auteur_fiche = df.auteur_fiche.str.replace(pers,val, regex=False) +df.auteur_fiche.replace(df_pers.nom.to_list(),df_pers.index.to_list(), inplace=True) +NOM = df_pers.nom.str[0] + df_pers.nom.str[1:].str.lower() +df.auteur_fiche.replace(NOM.to_list(),NOM.index.to_list(), inplace=True) +df.loc[df.auteur_fiche == 'SETIS Groupe Degaud', 'auteur_fiche'] = df_pers.loc[df_pers.prenom == 'Degaud'].index[0] + + +# Récupération des sites +df_site = df[['site_code', 'date_init', 'name_zone', 'auteur_fiche', 'typo_sdage', ]] +df_site = df_site.rename(columns={ + 'site_code': 'id', + 'date_init': 'date_deb', + 'name_zone': 'nom', + 'auteur_fiche': 'id_auteur', + 'typo_sdage': 'id_typo_sdage' +}) +df_site.sort_values('date_deb', inplace=True) +df_site.reset_index(inplace=True, drop=True) +typ_sdage = pd.read_sql_table( + table_name = 'typo_sdage', + con = con_zh, + schema = 'sites', + index_col = 'id', +) +typ_milieu = pd.read_sql_table( + table_name = 'type_milieu', + con = con_zh, + schema = 'sites', + index_col = 'id', +) +typ_site = pd.read_sql_table( + table_name = 'type_site', + con = con_zh, + schema = 'sites', + index_col = 'id', +) +df_site.id_typo_sdage.replace(typ_sdage.nom.str.lower().to_list(),typ_sdage.index.to_list(), inplace=True) +df_site.id_typo_sdage.fillna(typ_sdage[typ_sdage.nom.str.lower() == 'inconnu'].index[0], inplace=True) +df_site.loc[df_site.id_typo_sdage == "bordures de cours d'eau", 'id_typo_sdage'] = typ_sdage[typ_sdage.nom.str.lower().str.contains("cours d'eau")].index[0] +df_site.loc[df_site.id_typo_sdage == "petits plans d'eau et bordures de plans d'eau", 'id_typo_sdage'] = typ_sdage[typ_sdage.nom.str.lower().str.contains("plans d'eau")].index[0] +df_site.loc[df_site.id_typo_sdage == 'zones humides de bas-fond en tête de bassin versant', 'id_typo_sdage'] = typ_sdage[typ_sdage.nom.str.lower().str.contains("bas-fond")].index[0] +df_site.loc[df_site.id_typo_sdage == 'plaines alluviales', 'id_typo_sdage'] = typ_sdage[typ_sdage.nom.str.lower().str.contains("plaines alluviales")].index[0] + +df_site['id_type_milieu'] = typ_milieu[typ_milieu.nom_court.str.contains('humides')].index[0] +df_site['id_type_site'] = typ_site[typ_site.nom == 'N.D.'].index[0] +df_site.set_index('id', inplace=True) + +df_site.date_deb.fillna('0001-01-01', inplace=True) +df_site.nom.fillna('Inconnu', inplace=True) +# Envoie des sites en bdd +if not isin_bdd: + df_site.to_sql( + name='sites', + con = con_zh, + schema='sites', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + + +# Récupération des géometries +df_geomsite = df[['site_code', 'geom', 'date_der_modif', 'link_pdf', 'fonction_majeur', 'interet_patri', 'bilan_menaces', 'orient_act', 'usages_process_natu_comm' ]] +df_geomsite = df_geomsite.rename(columns={ + 'site_code': 'id_site', + 'date_der_modif': 'date', + 'fonction_majeur': 'rmq_fct_majeur', + 'interet_patri': 'rmq_interet_patri', + 'bilan_menaces': 'rmq_bilan_menace', + 'orient_act': 'rmq_orient_act', + 'usages_process_natu_comm': 'rmq_usage_process' +}) +df_geomsite = df_geomsite.merge(df_site[['id_auteur']].reset_index(), left_on='id_site', right_on='id') +df_geomsite.drop(columns=['id'], inplace=True) +df_geomsite.date.fillna('0001-01-01', inplace=True) +df_geomsite.reset_index(inplace=True, drop=True) +# Envoie des géometries en bdd +if not isin_bdd: + df_geomsite.to_postgis( + name='r_sites_geom', + con = con_zh, + schema='sites', + index=True, + index_label='id', + if_exists='append', + geom_col='geom' + ) + print("INSERT ... ok !") +df_rgsite = df_geomsite[['id_site']] +df_rgsite.index.name = 'id' +df_rgsite.reset_index(inplace=True) + + +# Récupération des types de connexions +df_typconex = df[['connex_type']] +df_typconex = df_typconex.rename(columns={ + 'connex_type': 'nom' +}) +df_typconex.dropna(inplace=True) +df_typconex = pd.DataFrame(df_typconex.nom.unique(), columns=['nom']) +df_typconex = pd.concat( + [ pd.DataFrame(['inconnu'], columns=['nom']),df_typconex ], + ignore_index=True) +df_typconex['nom'] = df_typconex.nom.str[0].str.upper() + df_typconex.nom.str[1:] +# Envoie des types de connexions en bdd +if not isin_bdd: + df_typconex.to_sql( + name='param_type_connect', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + + +# Récupération des relations sites / types de connexions +df_Rtypconex = df[['site_code','connex_type']] +df_Rtypconex = pd.merge(df_Rtypconex, df_rgsite, left_on='site_code', right_on='id_site') +df_Rtypconex = df_Rtypconex.rename(columns={ + 'id': 'id_geom_site', + 'connex_type': 'id_param_connect' +}) +df_Rtypconex.id_param_connect.fillna('inconnu', inplace=True) +df_Rtypconex.id_param_connect.replace(df_typconex.nom.str.lower().to_list(),df_typconex.index.to_list(), inplace=True) +df_Rtypconex.drop(columns=['site_code', 'id_site'], inplace=True) +# Envoie des relations sites / types de connexions +if not isin_bdd: + df_Rtypconex.to_sql( + name='r_site_type_connect', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Incrémentation des types de paramettres fctEcoSocioPatri +d = { + 'nom': ['Fonctions hydroligiques', 'Fonctions biologiques', 'Valeurs socio-économiques', 'Interêt patrimonial',], + 'nom_court': ['fct_hydro', 'fct_bio', 'val_socioEco', 'int_patri']} +df_typFct = pd.DataFrame(data=d) +if not isin_bdd: + df_typFct.to_sql( + name='type_param_fct', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + +# Récupération des fct Hydro, Bio, Socio-eco, Patri +columns_fct = ['fct_bio', 'val_socio_eco', 'int_patri', 'fct_hydro'] +df_rSiteFct = df[['site_code'] + columns_fct ] +df_rSiteFct = pd.merge(df_rSiteFct, df_rgsite, left_on='site_code', right_on='id_site') +df_rSiteFct = df_rSiteFct.rename(columns={ + 'id': 'id_geom_site', +}) +df_rSiteFct.drop(columns=['site_code', 'id_site'], inplace=True) +df_rSiteFct.index.name = 'id' +# df_rSiteFct.dropna(axis=0,subset=columns_fct, inplace=True) +lst_df = {} +for col in columns_fct: + print(col) + lst_df[col] = df_rSiteFct[['id_geom_site', col]] + d = lst_df[col][col].str.split('//').apply(pd.Series).stack() + d = pd.DataFrame(d, columns=[col]) + d.index.name = 'id' + del lst_df[col][col] + lst_df[col] = lst_df[col].merge(d, on='id',how='left') + lst_df[col][[col, col+'_rmq']] = lst_df[col][col].str.split('; Justification :', expand=True) + lst_df[col][col] = lst_df[col][col].str.replace('Critère :','') + lst_df[col][col+'_rmq'] = lst_df[col][col+'_rmq'].str.replace('Justification :','') + lst_df[col][col] = lst_df[col][col].str.strip() + lst_df[col][col+'_rmq'] = lst_df[col][col+'_rmq'].str.strip() + lst_df[col].dropna(subset=[col], inplace=True) +# Isolement des paramètres des fct Hydro, Bio, Socio-eco, Patri +df_paramFct = pd.DataFrame(columns=['nom', 'type']) +for col in columns_fct: + # x = lst_df[col][col].drop_duplicates().dropna() + # y = pd.Series([col]*len(x)) + x = lst_df[col][col].drop_duplicates().dropna().tolist() + y = [col]*len(x) + xy = {'nom': x, 'type': y} + xy = pd.DataFrame(data=xy) + df_paramFct = df_paramFct.append(xy, ignore_index=True) +# Incrémentation des paramètres des fct Hydro, Bio, Socio-eco, Patri +df_paramFct['id_type'] = df_paramFct.type.copy() +df_paramFct.id_type.replace('val_socio_eco','val_socioeco', inplace=True) +df_paramFct.id_type.replace(df_typFct.nom_court.str.lower().to_list(),df_typFct.index.to_list(), inplace=True) +del df_paramFct['type'] +df_paramFct[['nom','description']] = df_paramFct.nom.str.split('(', n=1, expand=True) +df_paramFct['description'] = df_paramFct['description'].str.replace(';',',') +df_paramFct['description'] = df_paramFct['description'].str.strip(')') +df_paramFct['description'] = df_paramFct['description'].replace('),',':') +df_paramFct['nom'] = df_paramFct['nom'].str.strip() +if not isin_bdd: + df_paramFct.to_sql( + name='param_fct_eco_socio_patri', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Incrémentation des relations sites / paramètres des fct Hydro, Bio, Socio-eco, Patri +df_paramFct.id_type.replace(df_typFct.index.to_list(), df_typFct.nom_court.str.lower().to_list(), inplace=True) +for col in columns_fct: + lst_df[col].rename(columns={ + col : 'id_fct', + col+'_rmq' : 'description' + }, inplace=True) + if col == 'int_patri': + lst_df[col]['quantite'] = [ + sum(map(int, filter(str.isdigit, row.split()))) + if isinstance(row,str) else None + for row in lst_df[col]['description'] ] +for col in columns_fct: + id_type = col + if col == 'val_socio_eco': + id_type = 'val_socioeco' + tmp = df_paramFct[df_paramFct.id_type==id_type] + for i,row in tmp.iterrows(): + lst_df[col].loc[lst_df[col]['id_fct'].str.contains(row.nom, na=False), 'id_fct'] = row.name +df_RsiteFct = pd.DataFrame() +for col in columns_fct: + df_RsiteFct = pd.concat([df_RsiteFct, lst_df[col]]) +df_RsiteFct.sort_values('id_geom_site') +df_RsiteFct.reset_index(drop=True,inplace=True) +df_RsiteFct['description'] = df_RsiteFct['description'].replace('prioriatire','prioritaire', regex=True) +if not isin_bdd: + df_RsiteFct.to_sql( + name='r_site_fctecosociopatri', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + +# Récupération des critères de délimitation +df_critDelm = df[['site_code','criete_delimit', 'criete_delimit_rmq']] +df_critDelm = pd.merge(df_critDelm, df_rgsite, left_on='site_code', right_on='id_site') +df_critDelm.rename(columns={ + 'id': 'id_geom_site', +}, inplace=True) +df_critDelm.drop(columns=['site_code', 'id_site'], inplace=True) +df_critDelm.index.name = 'id' +df_critDelm.dropna(subset=['criete_delimit'], inplace=True) +d = df_critDelm['criete_delimit'].str.split('//').apply(pd.Series).stack() +d = pd.DataFrame(d, columns=['criete_delimit']) +d.index.name = 'id' +del df_critDelm['criete_delimit'] +df_critDelm = df_critDelm.merge(d, on='id',how='left') +df_critDelm['criete_delimit'] = df_critDelm['criete_delimit'].str.replace('critère de délimitation ZH : ','') +df2_critDelm = df_rgsite[['id']].copy() +df2_critDelm.rename(columns={ + 'id': 'id_geom_site', +}, inplace=True) +df2_critDelm['criete_delimit'] = 'Non déterminé' +df_critDelm = pd.concat([df_critDelm, df2_critDelm]) +# Récupération des paramètres de délimitation de fct +# df_PcritDelm = pd.DataFrame(columns=['id_type','nom_court','nom', 'description'] ) +# df_PcritDelm = df_PcritDelm.append(df_critDelm[['criete_delimit']].drop_duplicates()) +# df_PcritDelm[['nom', 'description']] = df_PcritDelm.criete_delimit.str.split('(', expand=True) +# del df_PcritDelm['criete_delimit'] +# df_PcritDelm['nom'] = df_PcritDelm['nom'].str.strip() +# df_PcritDelm['description'] = df_PcritDelm['description'].str.strip(')') +# df_PcritDelm['id_type'] = 0 +# df_PcritDelm.drop_duplicates(inplace=True) +# df_PcritDelm.reset_index(drop=True, inplace=True) +# # Incrémentation des paramètres de délimitation de fct +# if not isin_bdd: + # df_PcritDelm.to_sql( + # name='param_delim_fct', + # con = con_zh, + # schema='zones_humides', + # index=True, + # index_label='id', + # if_exists='append', + # ) +# print("INSERT ... ok !") +df_PcritDelm = pd.read_sql_table( + table_name = 'param_delim_fct', + con = con_zh, + schema = 'zones_humides', + index_col = 'id', +) +# Récupération des relations sites / paramètres de délimitation de fct +for i,row in df_PcritDelm.iterrows(): + df_critDelm.loc[df_critDelm['criete_delimit'].astype(str).str.contains(row.nom, na=False), 'criete_delimit'] = row.name +df_critDelm.rename(columns={ + 'criete_delimit' : 'id_crit_delim', + 'criete_delimit_rmq': 'description' +}, inplace=True) +# Incrémentation des relations sites / paramètres de délimitation de fct +df_critDelm.reset_index(drop=True,inplace=True) +if not isin_bdd: + df_critDelm.to_sql( + name='r_site_critdelim', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + +# Récupération des codes Corine Biotope +df_corbio = df[['site_code', 'corine_biotope']] +df_corbio = pd.merge(df_corbio, df_rgsite, left_on='site_code', right_on='id_site') +df_corbio = df_corbio.rename(columns={ + 'id': 'id_geom_site', +}) +df_corbio.drop(columns=['site_code', 'id_site'], inplace=True) +df_corbio.index.name = 'id' +df_corbio.dropna(subset=['corine_biotope'], inplace=True) +d = df_corbio['corine_biotope'].str.split(' ; ').apply(pd.Series).stack() +d = pd.DataFrame(d, columns=['corine_biotope']) +d.index.name = 'id' +del df_corbio['corine_biotope'] +df_corbio = df_corbio.merge(d, on='id',how='left') +df_corbio[['id_cb', 'libelle']] = df_corbio['corine_biotope'].str.split(' : ', expand=True) +df_corbio = df_corbio[['id_geom_site', 'id_cb']] +df_corbio.reset_index(inplace=True, drop=True) +# Incrémentation des relations sites / code corine biotope +if not isin_bdd: + df_corbio.to_sql( + name='r_site_habitat', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + +# Récupération des éléments de submersion +df_sub = df[['site_code','regime_hydrique_freq','regime_hydrique_etendue', 'regime_hydrique_orig']] +df_sub = pd.merge(df_sub, df_rgsite, left_on='site_code', right_on='id_site') +df_sub = df_sub.rename(columns={ + 'id': 'id_geom_site', +}) +df_sub.drop(columns=['site_code', 'id_site'], inplace=True) +df_sub.index.name = 'id' +df_sub.dropna(subset=['regime_hydrique_freq','regime_hydrique_etendue', 'regime_hydrique_orig'], inplace=True, how='all') +df_typePsub = pd.read_sql_table( + table_name = 'type_param_sub', + con = con_zh, + schema = 'zones_humides', + index_col = 'id', +) +# Récupération des critères de submersion +df_paramSub = df_sub[['regime_hydrique_freq','regime_hydrique_etendue']].stack() +df_paramSub = df_paramSub.reset_index(level=1) +df_paramSub.set_axis(['id_type','nom'], axis=1, inplace=True) +df_paramSub = df_paramSub.drop_duplicates().sort_values('id_type') +df_paramSub.id_type.replace(['regime_hydrique_freq', 'regime_hydrique_etendue'],[0,1], inplace=True) +df_paramSub.reset_index(inplace=True, drop=True) +df_paramSub2 = pd.DataFrame({ + 'id_type' : [0, 1], + 'nom' : ['Inconnu', 'Inconnu'] + }) +df_paramSub = df_paramSub.append(df_paramSub2, ignore_index=True) +# Incrémentation des critères de submersion +if not isin_bdd: + df_paramSub.to_sql( + name='param_sub', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Récupération des relations sites / critères de submersion +df_sub.rename( + columns={'regime_hydrique_orig': 'id_origsub', 'regime_hydrique_etendue': 'id_etendsub', 'regime_hydrique_freq': 'id_freqsub'}, + inplace=True) +df_sub.replace(df_paramSub.nom.to_list(), df_paramSub.index.to_list(), inplace=True, regex=True) +df_sub.id_freqsub.fillna(6, inplace=True) +df_sub.id_etendsub.fillna(7, inplace=True) +df_sub.reset_index(inplace=True, drop=True) +# Incrémentation des relations sites / critères de submersion +if not isin_bdd: + df_sub.to_sql( + name='r_site_sub', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + +# Récupération des régimes hydriques +df_hydri0 = df[['site_code','regime_hydrique_entree','regime_hydrique_sortie']] +df_hydri0 = pd.merge(df_hydri0, df_rgsite, left_on='site_code', right_on='id_site') +df_hydri0 = df_hydri0.rename(columns={ + 'id': 'id_geom_site', +}) +df_hydri0.drop(columns=['site_code', 'id_site'], inplace=True) +df_hydri0.index.name = 'id' +df_hydri0.dropna(subset=['regime_hydrique_entree','regime_hydrique_sortie'], inplace=True, how='all') +# Mise à plat des entrées d'eau +df_hydriE = df_hydri0[['id_geom_site','regime_hydrique_entree']].reset_index(drop=True) +df_hydriE.index.name = 'id' +d= df_hydriE['regime_hydrique_entree'].str.split(' // ').apply(pd.Series).stack() +d = pd.DataFrame(d, columns=['regime_hydrique_entree']) +d.index.name = 'id' +del df_hydriE['regime_hydrique_entree'] +df_hydriE = df_hydriE.merge(d, on='id',how='left') +df_hydriE.rename(columns={'regime_hydrique_entree': 'id_reg_hydro'}, inplace=True) +df_hydriE.id_reg_hydro.replace("Entrée d'eau : ", '', inplace=True, regex=True) +df_hydriE[['id_reg_hydro', 'id_permanance']] = df_hydriE['id_reg_hydro'].str.split(' ; Permanence : ', expand=True) +df_hydriE[['id_reg_hydro','id_toponymie']] = df_hydriE['id_reg_hydro'].str.split(' ; Toponymie : ', expand=True) +df_hydriE = df_hydriE.reset_index(drop=True) +df_hydriE.index.name = 'id' + +d = df_hydriE[df_hydriE.id_toponymie.str.contains(';',na=False)] +d = d.merge( + pd.DataFrame({'toponymie':d['id_toponymie'].str.split(' ; ').apply(pd.Series).stack()}), + on='id',how='left') \ + .drop(columns=['id_toponymie']) \ + .rename(columns={'toponymie':'id_toponymie'}) +# df_hydriE = pd.concat([ +# df_hydriE[~df_hydriE.index.isin(d.index)], +# d ]).sort_values('id_geom_site').reset_index(drop=True) + + +# d = d.merge( +# pd.DataFrame({'toponymie':d['id_toponymie'].str.split(', ').apply(pd.Series).stack()}), +# on='id',how='left') \ +# .drop(columns=['id_toponymie']) \ +# .rename(columns={'toponymie':'id_toponymie'}) \ +# .drop_duplicates() +# d = d.merge( +# pd.DataFrame({'toponymie':d['id_toponymie'].str.split(' ou ').apply(pd.Series).stack()}), +# on='id',how='left') \ +# .drop(columns=['id_toponymie']) \ +# .rename(columns={'toponymie':'id_toponymie'}) \ +# .drop_duplicates() + +# df_hydriE[['id_toponymie', 'id_permanance']] = df_hydriE['id_toponymie'].str.split(' ; Permanence : ', expand=True) +# d = df_hydriE['id_toponymie'].str.split(' ; ').apply(pd.Series).stack() +# d = pd.DataFrame(d, columns=['id_toponymie']) +# d.index.name = 'id' +# del df_hydriE['id_toponymie'] +# df_hydriE = df_hydriE.merge(d, on='id',how='left') +df_hydriE['entree_sortie'] = 0 +# Mise à plat des sorties d'eau +df_hydriS = df_hydri0[['id_geom_site','regime_hydrique_sortie']].reset_index(drop=True) +df_hydriS.index.name = 'id' +d = df_hydriS['regime_hydrique_sortie'].str.split(' // ').apply(pd.Series).stack() +d = pd.DataFrame(d, columns=['regime_hydrique_sortie']) +d.index.name = 'id' +del df_hydriS['regime_hydrique_sortie'] +df_hydriS = df_hydriS.merge(d, on='id',how='left') +df_hydriS.rename(columns={'regime_hydrique_sortie': 'id_reg_hydro'}, inplace=True) +df_hydriS.id_reg_hydro.replace("Sortie d'eau : ", '', inplace=True, regex=True) +df_hydriS[['id_reg_hydro', 'id_permanance']] = df_hydriS['id_reg_hydro'].str.split(' ; Permanence : ', expand=True) +df_hydriS[['id_reg_hydro','id_toponymie']] = df_hydriS['id_reg_hydro'].str.split(' ; Toponymie : ', expand=True) +# df_hydriS[['id_toponymie', 'id_permanance']] = df_hydriS['id_toponymie'].str.split(' ; Permanence : ', expand=True) +# d = df_hydriS['id_toponymie'].str.split(' ; ').apply(pd.Series).stack() +# d = pd.DataFrame(d, columns=['id_toponymie']) +# d.index.name = 'id' +# del df_hydriS['id_toponymie'] +# df_hydriS = df_hydriS.merge(d, on='id',how='left') +df_hydriS['entree_sortie'] = 1 +# Regroupement des régimes hydriques +df_hydri = pd.concat([df_hydriE, df_hydriS], ignore_index=True) +for col in df_hydri.columns: + if not col in ['id_geom_site', 'entree_sortie']: + df_hydri[col] = df_hydri[col].str.strip() +# df_hydri.drop_duplicates(inplace=True) +# df_hydri.id_toponymie.replace('', None, inplace=True, regex=True) + +df_hydri = pd.merge(df_hydri, df_rgsite, left_on='id_geom_site', right_on='id') +# Récupération des critères de régimes hydriques +df_Preghydro = df_hydri[['id_reg_hydro']].drop_duplicates() +df_Preghydro.dropna(inplace=True) +df_Preghydro['nom'] = df_Preghydro.id_reg_hydro.str[0].str.upper() + df_Preghydro.id_reg_hydro.str[1:].str.lower() +del df_Preghydro['id_reg_hydro'] +df_Preghydro.sort_values('nom', inplace=True) +df_Preghydro.reset_index(inplace=True, drop=True) +# Incrémentation des critères de régimes hydriques +if not isin_bdd: + df_Preghydro.to_sql( + name='param_reg_hydro', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Récupération des critères de permanances hydriques +df_PpermHydro = df_hydri[['id_permanance']].drop_duplicates() +df_PpermHydro.columns = ['nom'] +df_PpermHydro.nom = df_PpermHydro.nom.replace([''],[None]) +df_PpermHydro.dropna(inplace=True) +df_PpermHydro.sort_values('nom', inplace=True) +df_PpermHydro.reset_index(inplace=True, drop=True) +df_PpermHydro.loc[5] = 'inconnu' +# Incrémentation des critères de permanances hydriques +if not isin_bdd: + df_PpermHydro.to_sql( + name='param_permanence', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") +# Récupération des Toponymie +df_hydri[['id_toponymie']] = df_hydri[['id_toponymie']] \ + .replace( + ['ruisseau','canal','canaux','torrent','catelan','\nRuisseau de la Grande Valloire',', …'], + ['Ruisseau','Canal','Canal','Torrent','Catelan','',''], + regex=True) \ + .replace(['Ruisseaux'],['Ruisseau'], regex=True) +# df_topony = df_hydri[['id_toponymie']] \ +# .replace([''],[None]) \ +# .dropna() \ +# .drop_duplicates() +# df_tron = pd.read_sql_table( +# table_name='troncon_hydro', +# con=con_zh, +# schema='ref_hydro', +# columns=['id', 'nom'], +# ).dropna() +# df_topony[df_topony.id_toponymie.isin(df_tron.nom)] +# df_topony[~df_topony.id_toponymie.isin(df_tron.nom)].id_toponymie.unique() +df_RegHydro = df_hydri.drop(columns='id').copy() +df_RegHydro.rename(columns={ + 'id_toponymie':'rmq_toponymie', + 'id_permanance':'id_permanence', + 'entree_sortie': 'in_out'}, inplace=True) +df_RegHydro.id_reg_hydro = df_RegHydro.id_reg_hydro.str[0].str.upper() + df_RegHydro.id_reg_hydro.str[1:].str.lower() +df_RegHydro.dropna(subset=['id_reg_hydro'],inplace=True) +df_RegHydro.id_permanence.fillna('inconnu', inplace=True) +d1 = dict(df_Preghydro.nom) +d2 = dict(df_PpermHydro.nom) +d1 = {v: str(k) for k, v in d1.items()} +d2 = {v: str(k) for k, v in d2.items()} +dic = { + 'id_reg_hydro': d1, + 'id_permanence': {'':None, **d2} +} +df_RegHydro.in_out = ~df_RegHydro.in_out.astype(bool) +df_RegHydro = df_RegHydro.replace(dic) \ + .drop(columns='id_site') \ + .reset_index(drop=True) +# Incrémentation des régimes hydriques +if not isin_bdd: + df_RegHydro.to_sql( + name='r_site_reghydro', + con = con_zh, + schema='zones_humides', + index=False, + # index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + + + + +# Récupération des usages et process +df_Ppostion = pd.read_sql_table( + table_name = 'param_position', + con = con_zh, + schema = 'zones_humides', + index_col = 'id', +) +df.usages_process_natu = df.usages_process_natu.replace( + ['avce', 'Entretine', 'Espèce invasive','espèce invasive'], + ['avec', 'Entretien', 'Espèces invasives','espèces invasives'], regex=True) + +df_Rprocess = df[['site_code', 'usages_process_natu']].copy() +df_Rprocess = pd.merge(df_Rprocess, df_rgsite, left_on='site_code', right_on='id_site') +df_Rprocess = df_Rprocess.rename(columns={ + 'id': 'id_geom_site', +}) +df_Rprocess.drop(columns=['site_code'], inplace=True) +# df_Rprocess.dropna(subset=['usages_process_natu'], inplace=True) +df_Rprocess.reset_index(drop=True, inplace=True) +df_Rprocess.index.name = 'id' +d = df_Rprocess['usages_process_natu'].str.split('//', expand=True).stack() +d = pd.DataFrame(d, columns=['activ_humaine']) +d.index.name = 'id' +df_Rprocess = df_Rprocess \ + .drop(columns=['usages_process_natu']) \ + .merge(d, on='id', how='left') +df_Rprocess[['activ_humaine', 'position']] = df_Rprocess['activ_humaine'].str.split(', Localisation :', expand=True) +df_Rprocess[['activ_humaine', 'rmq_activ_hum']] = df_Rprocess['activ_humaine'].str.split(pat=' \(Remarques :', expand=True) +df_Rprocess['rmq_activ_hum'] = df_Rprocess['rmq_activ_hum'].str.replace(' \)','', regex=True).str.strip() +df_Rprocess['rmq_activ_hum'] = df_Rprocess['rmq_activ_hum'].replace([''],[None]) +df_Rprocess.activ_humaine = df_Rprocess.activ_humaine.str.strip() +df_Rprocess.position = df_Rprocess.position.str.strip() +df_Rprocess.activ_humaine = df_Rprocess.activ_humaine.str[0].str.upper() + df_Rprocess.activ_humaine.str[1:].str.lower() +df_Rprocess.activ_humaine.replace( + ['Dépots', 'Dépôt sauvage', 'Atterissement', "Entretien plan d'eau", "Création plan d'eau", 'Déchèterie communale' ,"Canalisation d'eau", 'Surpiétinement'], + ['Dépôts', 'Dépôts sauvages', 'Atterrissement', "Entretien de plan d'eau", "Création de plan d'eau", 'Déchèterie', 'Canalisation', 'Piétinement'], + regex=True, + inplace=True +) +df_Rprocess.activ_humaine.replace(['Canalisation', 'Step'], ["Canalisation d'eau", 'STEP'], + regex=True, inplace=True +) +# Récupération du dictionnaire activité humaine +df_ActHum = pd.read_sql_table( + table_name='param_activ_hum', + schema = 'zones_humides', + con=con_zh +) +df_Rprocess['activ_hum_autre'] = None +df_Rprocess.loc[~df_Rprocess.activ_humaine.isin(df_ActHum.nom),'activ_hum_autre'] = df_Rprocess.loc[~df_Rprocess.activ_humaine.isin(df_ActHum.nom),'activ_humaine'] +df_Rprocess.loc[~df_Rprocess.activ_hum_autre.isna(),'activ_humaine'] = df_ActHum.loc[df_ActHum.id==21, 'nom'].reset_index(drop=True)[0] +df_Rprocess.loc[(df_Rprocess.id_site=='38BB0073') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position','rmq_activ_hum']] = ["Pas d'activité marquante", 'ZH + EF','Anciennes mines'] +df_Rprocess.loc[(df_Rprocess.id_site=='38BO0013') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38BO0081') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38BO0112') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38BO0303') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0054') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0055') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0056') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0059') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0060') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0104') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0108') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38DA0018') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GC0008') & (df_Rprocess.activ_humaine.isna()), 'activ_humaine'] = "Autre (préciser dans l'encart réservé aux remarques)" +df_Rprocess.loc[(df_Rprocess.id_site=='38GC0096') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GC0098') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0003') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GR0006') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38MA0049') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38MA0054') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38MA0058') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38MA0060') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum','position']] = ["Sylviculture",'Coupe des arbres sous THT','ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38QV0020') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38QV0033') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RH0038') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RH0056') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RH0101') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0005') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0023') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0032') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0048') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0055') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0061') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0063') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0065') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0083') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0085') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0086') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0099') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0126') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0143') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0146') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0147') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0160') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0164') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VA0005') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VA0007') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VA0012') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VA0021') & (df_Rprocess.activ_humaine == "Pas d'activité marquante"),['position']] = ['ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VA0025') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0227') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0344') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='26PNRV0111') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH + EF'] + + + +# '38RD0163' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0163') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0163') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + "Élevage / pastoralisme",'Equin','ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38RD0162' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0162') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Ovin', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0162') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + 'Tourisme et loisirs (camping, zone de stationnement)','Randonnée', 'ZH + EF'] +# '38RD0161' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0161') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0161') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + 'Tourisme et loisirs (camping, zone de stationnement)','Randonnée', 'ZH + EF'] +# '38RD0159' ADD ['Urbanisation', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0159') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = [ + "Élevage / pastoralisme",'ZH + EF'] +# '38RD0158' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0158') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0158') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + "Tourisme et loisirs (camping, zone de stationnement)",'VTT','ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38RD0157' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0157') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0157') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + "Élevage / pastoralisme",'Equin','ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38RD0156' ADD ['Urbanisation', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0156') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + "Élevage / pastoralisme",'Bovin','ZH + EF'] +# '38RD0155' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0155') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Bovin', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0155') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + 'Tourisme et loisirs (camping, zone de stationnement)','Ski', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38RD0153' ADD ['Urbanisation', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0153') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + "Élevage / pastoralisme",'Bovin','ZH + EF'] +# '38RD0152' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0152') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38RD0152') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Bovin', 'ZH + EF'] +d2[['activ_humaine', 'position']] = ['Urbanisation', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0152') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + 'Tourisme et loisirs (camping, zone de stationnement)','Randonnée, Ski', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1, d2]).sort_values('id_site') +# '38RD0151' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0151') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38RD0151') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Equin', 'ZH + EF'] +d2[['activ_humaine','rmq_activ_hum', 'position']] = ["Prélèvements d'eau",'Captage', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0151') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + 'Tourisme et loisirs (camping, zone de stationnement)','Randonnée', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1, d2]).sort_values('id_site') +# '38RD0150' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0150') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Equin', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0150') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Chasse', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38RD0149' ADD ['Urbanisation', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0149') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = [ + 'Tourisme et loisirs (camping, zone de stationnement)', 'Randonnée','ZH + EF'] +# '38RD0148' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0148') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine','rmq_activ_hum', 'position']] = ["Tourisme et loisirs (camping, zone de stationnement)",'Randonnée', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0148') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Chasse', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38VS0057' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0057') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0057') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Pêche', 'ZH'] +d2[['activ_humaine', 'position']] = ["Tourisme et loisirs (camping, zone de stationnement)", 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0057') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Urbanisation', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +# '38VS0056' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0056') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0056') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# '38VS0055' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0055') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0055') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0055') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Sylviculture', 'ZH + EF'] +d2[['activ_humaine', 'position']] = ['Pêche', 'ZH'] +d3[['activ_humaine', 'position']] = ["Tourisme et loisirs (camping, zone de stationnement)", 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0055') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Urbanisation', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +# '38VS0054' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna())].copy() +d4 = df_Rprocess[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna())].copy() +d5 = df_Rprocess[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna())].copy() +d6 = df_Rprocess[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Sylviculture', 'ZH'] +d2[['activ_humaine', 'position']] = ['Pêche', 'ZH'] +d3[['activ_humaine', 'position']] = ['Chasse', 'ZH + EF'] +d4[['activ_humaine', 'position']] = ["Tourisme et loisirs (camping, zone de stationnement)", 'ZH + EF'] +d5[['activ_humaine','rmq_activ_hum', 'position']] = ["Prélèvements d'eau",'Irrigation', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0054') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3,d4,d5]).sort_values('id_site') +# '38VS0053' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna())].copy() +d4 = df_Rprocess[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna())].copy() +d5 = df_Rprocess[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna())].copy() +d6 = df_Rprocess[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Urbanisation', 'EF'] +d2[['activ_humaine', 'position']] = ['Infrastructures linéaires (routes, voies ferrées)', 'EF'] +d3[['activ_humaine', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'EF'] +d4[['activ_humaine','rmq_activ_hum', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)",'Remblais', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0053') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3,d4]).sort_values('id_site') +# '38VS0052' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna())].copy() +d4 = df_Rprocess[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna())].copy() +d5 = df_Rprocess[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna())].copy() +d6 = df_Rprocess[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Sylviculture", 'EF'] +d2[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Equin', 'EF'] +d3[['activ_humaine', 'position']] = ["Pêche", 'ZH'] +d4[['activ_humaine', 'position']] = ['Tourisme et loisirs (camping, zone de stationnement)', 'ZH + EF'] +d5[['activ_humaine', 'position']] = ['Urbanisation', 'EF'] +d6[['activ_humaine', 'position']] = ['Infrastructures linéaires (routes, voies ferrées)', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0052') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3,d4,d5,d6]).sort_values('id_site') +# '38VS0051' ADD ["Prélèvements d'eau", 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0051') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0051') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0051') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Urbanisation", 'EF'] +d2[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Bovin', 'ZH + EF'] +d3[['activ_humaine','rmq_activ_hum', 'position']] = ["Prélèvements d'eau",'Pompage agricole, Abrevoir', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0051') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Agriculture", 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]) +# '38VS0050' ADD ["Prélèvements d'eau", 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0050') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0050') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Urbanisation", 'EF'] +d2[['activ_humaine','rmq_activ_hum', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)",'Autoroute', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0050') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Agriculture", 'EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]) +# '38VS0049' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d4 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d5 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d6 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d7 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d8 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d9 = df_Rprocess[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Sylviculture", 'ZH + EF'] +d2[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +d3[['activ_humaine', 'position']] = ["Pêche", 'ZH'] +d4[['activ_humaine', 'position']] = ['Tourisme et loisirs (camping, zone de stationnement)', 'ZH'] +d5[['activ_humaine', 'position']] = ['Urbanisation', 'ZH + EF'] +d6[['activ_humaine', 'position']] = ['Industrie', 'EF'] +d7[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'EF'] +d8[['activ_humaine', 'position','rmq_activ_hum']] = ["Prélèvements d'eau", 'ZH','Pompage, lavoir'] +d9[['activ_humaine', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0049') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3,d4,d5,d6,d7,d8,d9]).sort_values('id_site') +# '38VS0048' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0048') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0048') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0048') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'EF'] +d2[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'EF'] +d3[['activ_humaine', 'position']] = ["Industrie", 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0048') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Urbanisation', 'EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +# '38VS0047' ADD ["Prélèvements d'eau", 'ZH'] +d = df_Rprocess[(df_Rprocess.id_site=='38VS0047') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Bovin', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0047') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Agriculture", 'EF'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38VS0046' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VS0046') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VS0046') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VS0046') & (df_Rprocess.activ_humaine.isna())].copy() +d4 = df_Rprocess[(df_Rprocess.id_site=='38VS0046') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'EF'] +d2[['activ_humaine','rmq_activ_hum', 'position']] = ["Élevage / pastoralisme",'Bovin, abeilles', 'ZH + EF'] +d3[['activ_humaine', 'position']] = ["Prélèvements d'eau", 'ZH'] +d4[['activ_humaine', 'position']] = ['Urbanisation', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VS0046') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3,d4]).sort_values('id_site') +# '38RD0128' ADD ["Prélèvements d'eau", 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0128') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38RD0128') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +d2[['activ_humaine', 'position']] = ["Chasse", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0128') & (df_Rprocess.activ_humaine.isna()),['activ_humaine','rmq_activ_hum', 'position']] = ["Activité militaire",'Zone de tirs temporaires du Galibier-Grandes Rousses', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]) +# '38RD0165' ADD ["Prélèvements d'eau", 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RD0165') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38RD0165') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'EF'] +d2[['activ_humaine', 'position']] = ["Extraction de granulats, mines", 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RD0165') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Activité hydroélectrique, barrage", 'ZH'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]) +# '38RH0292' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38RH0292') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38RH0292') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38RH0292') & (df_Rprocess.activ_humaine.isna())].copy() +d4 = df_Rprocess[(df_Rprocess.id_site=='38RH0292') & (df_Rprocess.activ_humaine.isna())].copy() +d5 = df_Rprocess[(df_Rprocess.id_site=='38RH0292') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position','activ_hum_autre']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'ZH', 'Remblais'] +d2[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +d3[['activ_humaine', 'position']] = ['Tourisme et loisirs (camping, zone de stationnement)', 'ZH'] +d4[['activ_humaine', 'position']] = ['Urbanisation', 'ZH'] +d5[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38RH0292') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3,d4,d5]).sort_values('id_site') +# '38MA0059' ADD ["Prélèvements d'eau", 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38MA0059') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38MA0059') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +d2[['activ_humaine', 'position']] = ["Chasse", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38MA0059') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Agriculture", 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]) +# '38DA0019' ADD ["Prélèvements d'eau", 'ZH'] +d = df_Rprocess[(df_Rprocess.id_site=='38DA0019') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38DA0019') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Chasse", 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38DA0020' ADD ["Prélèvements d'eau", 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38DA0020') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38DA0020') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ["Élevage / pastoralisme", 'ZH + EF'] +d2[['activ_humaine', 'position']] = ["Prélèvements d'eau", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38DA0020') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Chasse", 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]) +# '38CG0110' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38CG0110') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38CG0110') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38CG0110') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Chasse', 'ZH'] +d2[['activ_humaine', 'position']] = ['Tourisme et loisirs (camping, zone de stationnement)', 'EF'] +d3[['activ_humaine', 'position']] = ["Prélèvements d'eau", 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0110') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +# '26PNRV0208' ADD ["Prélèvements d'eau", 'ZH'] +d = df_Rprocess[(df_Rprocess.id_site=='26PNRV0208') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ["Prélèvements d'eau", 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='26PNRV0208') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ["Pas d'activité marquante", 'ZH'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38BO0058' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38BO0058') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38BO0058') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38BO0058') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Pêche', 'ZH'] +d2[['activ_humaine', 'position']] = ['Urbanisation', 'EF'] +d3[['activ_humaine', 'position']] = ["Infrastructures linéaires (routes, voies ferrées)", 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38BO0058') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +# '38VE0119' ADD ['Agriculture', 'EF'] +d = df_Rprocess[(df_Rprocess.id_site=='38VE0119') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ['Agriculture', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0119') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Élevage / pastoralisme', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38VE0147' ADD ['Infrastructures linéaires (routes, voies ferrées)', 'EF'] +d = df_Rprocess[(df_Rprocess.id_site=='38VE0147') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ['Infrastructures linéaires (routes, voies ferrées)', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0147') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Élevage / pastoralisme', 'ZH'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38VE0184' ADD ['Agriculture', 'EF'] +d = df_Rprocess[(df_Rprocess.id_site=='38VE0184') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ['Agriculture', 'EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0184') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Pêche', 'ZH'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38VE0280' ADD ['Urbanisation', 'ZH'] +d = df_Rprocess[(df_Rprocess.id_site=='38VE0280') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ['Urbanisation', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0280') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d]) +# '38VE0345' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VE0345') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VE0345') & (df_Rprocess.activ_humaine.isna())].copy() +d3 = df_Rprocess[(df_Rprocess.id_site=='38VE0345') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Élevage / pastoralisme', 'ZH + EF'] +d2[['activ_humaine', 'position']] = ['Infrastructures linéaires (routes, voies ferrées)', 'EF'] +d3[['activ_humaine', 'position','activ_hum_autre']] = ["Autre (préciser dans l'encart réservé aux remarques)", 'ZH + EF', 'Remblais'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0345') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +# '38VE0331' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VE0331') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VE0331') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Élevage / pastoralisme', 'ZH + EF'] +d2[['activ_humaine', 'position']] = ['Infrastructures linéaires (routes, voies ferrées)', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0331') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Agriculture', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +# '38VE0338' ADD ['Urbanisation', 'ZH'] +d = df_Rprocess[(df_Rprocess.id_site=='38VE0338') & (df_Rprocess.activ_humaine.isna())].copy() +d[['activ_humaine', 'position']] = ['Infrastructures linéaires (routes, voies ferrées)', 'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0338') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Élevage / pastoralisme', 'ZH + EF'] +df_Rprocess = pd.concat([df_Rprocess, d]).sort_values('id_site') +# '38VA0006' ADD ['Urbanisation', 'ZH'] +d1 = df_Rprocess[(df_Rprocess.id_site=='38VA0006') & (df_Rprocess.activ_humaine.isna())].copy() +d2 = df_Rprocess[(df_Rprocess.id_site=='38VA0006') & (df_Rprocess.activ_humaine.isna())].copy() +d1[['activ_humaine', 'position']] = ['Pêche', 'ZH'] +d2[['activ_humaine', 'position']] = ['Tourisme et loisirs (camping, zone de stationnement)', 'ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VA0006') & (df_Rprocess.activ_humaine.isna()),['activ_humaine', 'position']] = ['Activité hydroélectrique, barrage', 'ZH'] +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') + + +import pymedwet as pym +# Récupération du dictionnaire des impacts +df_imp = pym.medwet.__get_DicGenIMP__() \ + .rename(columns={'CODE':'id','DESCR':'nom'}) +df_impact = df_imp.copy() +df_impact.nom = df_impact.nom.str[0].str.upper() + df_impact.nom.str[1:] +# Incrémentation du dictionnaire des impacts +if not isin_bdd: + df_impact.to_sql( + name='param_impact', + con = con_zh, + schema='zones_humides', + index=False, + # index_label='id', + if_exists='append', + method='multi' + ) + print("INSERT ... ok !") +# Récupération des impacts +df_imp = pd.read_sql_table( + table_name='param_impact', + con = con_zh, + schema='zones_humides', + columns=['id','nom'] +) +med1 = pym.medwet.get_usage_process(pym.db_file1) +med2 = pym.medwet.get_usage_process(pym.db_file2) +med = pd.concat([med1, med2]).sort_values('SITE_COD') +med.ACTIVITE_HUM = med.ACTIVITE_HUM.str[0].str.upper() + med.ACTIVITE_HUM.str[1:].str.lower() +med.LOCALISATION = med.LOCALISATION.str[0].str.upper() + med.LOCALISATION.str[1:].str.lower() +med.LOCALISATION = med.LOCALISATION.replace(df_Ppostion.description.to_list(),df_Ppostion.nom.to_list()) +merge_med = med[['SITE_COD', 'ACTIVITE_HUM','LOCALISATION', 'IMPACT']].copy() # 'ACTIV_TYPO' +# Merge usage_process - impact +df_Rprocess = pd.merge(df_Rprocess, merge_med, + left_on = ['id_site','activ_humaine','position'], + right_on = ['SITE_COD','ACTIVITE_HUM','LOCALISATION'], + how = 'left') \ + .drop(columns=['SITE_COD','ACTIVITE_HUM','LOCALISATION']) \ + .rename(columns={'IMPACT':'impact'}) +# Ajout des impacts pour les zones + + +# '38RH0261' +df_Rprocess.loc[(df_Rprocess.id_site=='38RH0261') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Élevage / pastoralisme"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '45.0','nom'].item() ,'EF'] +# '38VE0281' +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0281') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Industrie"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '31.0','nom'].item() ,'ZH'] +d1 = df_Rprocess.loc[(df_Rprocess.id_site=='38VE0281') & (df_Rprocess.activ_humaine=="Élevage / pastoralisme"),].copy() +d2 = d1.copy() +d1[['activ_humaine','impact','position']] = ["Élevage / pastoralisme", df_imp.loc[df_imp.id == '45.0','nom'].item() ,'EF'] +d2[['activ_humaine','impact','rmq_activ_hum','position']] = ['Élevage / pastoralisme', df_imp.loc[df_imp.id == '46.0','nom'].item() ,'Fauche','EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38VE0281') & (df_Rprocess.activ_humaine=="Élevage / pastoralisme"), + ['rmq_activ_hum']] = ['Prairies labourées, resemées, pour parties les moins pentues'] +df_Rprocess = pd.concat([df_Rprocess, d1, d2]).sort_values('id_site') +# '38VS0033' +df_Rprocess.drop( + df_Rprocess.loc[(df_Rprocess.id_site=='38VS0033') & (df_Rprocess.position=='ERROR') & + (df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)")].index, inplace=True) +# '38QV0065' +df_Rprocess.loc[(df_Rprocess.id_site=='38QV0065') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Sylviculture"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '54.0','nom'].item() ,'EF'] +# '38MA0057' +df_Rprocess.loc[(df_Rprocess.id_site=='38MA0057') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Sylviculture"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '54.0','nom'].item() ,'EF'] +# '38FP0084' +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0084') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)"), + ['impact','activ_hum_autre', 'position']] = [ df_imp.loc[df_imp.id == '91.4','nom'].item() ,'Espèces invasives','ZH + EF'] +# '38FP0081' +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0081') & (df_Rprocess.position=='ERROR') & +(df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)") & (df_Rprocess.rmq_activ_hum=="épandage de granules type NPK sur pâture"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '44.0','nom'].item() ,'Drainage','EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0081') & (df_Rprocess.position=='ERROR') & +(df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)") & (df_Rprocess.rmq_activ_hum=="solidage"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '91.4','nom'].item() ,'Solidage','EF'] +# '38FP0072' +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0072') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '46.0','nom'].item() ,'EF'] +# '38FP0069' +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0069') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '34.0','nom'].item() ,'Remblais','EF'] +# '38FP0067' +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0067') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Chasse"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '62.0','nom'].item() ,'ZH + EF'] +# '38FP0065' +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0065') & (df_Rprocess.position=='ERROR') & +(df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)") & (df_Rprocess.rmq_activ_hum=="drainage"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '31.0','nom'].item() ,'Drainage','ZH'] +df_Rprocess.loc[(df_Rprocess.id_site=='38FP0065') & (df_Rprocess.position=='ERROR') & +(df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)") & (df_Rprocess.rmq_activ_hum=="solidage"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '91.4','nom'].item() ,'Solidage','ZH'] +# '38GL0025' +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0025') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Prélèvements d'eau"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '36.0','nom'].item() ,'ZH'] +# '38GL0024' +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0024') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Élevage / pastoralisme"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '45.0','nom'].item() ,'ZH + EF'] +# '38GL0023' +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0023') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Élevage / pastoralisme"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '45.0','nom'].item() ,'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0023') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Tourisme et loisirs (camping, zone de stationnement)"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '30','nom'].item() ,'Ski de fond','ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0023') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '15.0','nom'].item() ,'ZH + EF'] +# '38GL0022' +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0022') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Agriculture"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '45.0','nom'].item() ,'ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0022') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Tourisme et loisirs (camping, zone de stationnement)"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '30','nom'].item() ,'Ski','ZH + EF'] +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0022') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Prélèvements d'eau"), + ['impact','rmq_activ_hum', 'position']] = [ df_imp.loc[df_imp.id == '31.0','nom'].item() ,'Drainage','ZH + EF'] +# '38GL0021' +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0021') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Agriculture"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '45.0','nom'].item() ,'ZH + EF'] +# '38GL0020' +df_Rprocess.drop( + df_Rprocess.loc[(df_Rprocess.id_site=='38GL0020') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Agriculture"),].index, + inplace=True) +# '38GL0019' +df_Rprocess.loc[(df_Rprocess.id_site=='38GL0019') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Agriculture"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '45.0','nom'].item() ,'ZH + EF'] +# '38DA0017' +df_Rprocess.loc[(df_Rprocess.id_site=='38DA0017') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Pas d'activité marquante"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '0','nom'].item() ,'ZH + EF'] +# '38DA0010' +df_Rprocess.loc[(df_Rprocess.id_site=='38DA0010') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Pas d'activité marquante"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '0','nom'].item() ,'ZH + EF'] +# '38CG0142' +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0142') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Prélèvements d'eau"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '36.0','nom'].item() ,'ZH'] +# '38CG0135' +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0135') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Infrastructures linéaires (routes, voies ferrées)"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '53.0','nom'].item() ,'ZH + EF'] +# '38CG0131' +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0131') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Chasse"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '13.0','nom'].item() ,'ZH + EF'] +# '38CG0112' +df_Rprocess.loc[(df_Rprocess.id_site=='38CG0112') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Sylviculture"), + ['impact', 'position']] = [ df_imp.loc[df_imp.id == '62.0','nom'].item() ,'ZH + EF'] +# '38BO0271' +df_Rprocess.drop(df_Rprocess.loc[(df_Rprocess.id_site=='38BO0271') & (df_Rprocess.position=='ERROR') & +(df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)")].index, inplace=True) +# '38BI0127' +df_Rprocess.loc[(df_Rprocess.id_site=='38BI0127') & (df_Rprocess.position=='ERROR') & (df_Rprocess.activ_humaine=="Autre (préciser dans l'encart réservé aux remarques)"), + ['activ_hum_autre','impact', 'position']] = ['Gestion privée', df_imp.loc[df_imp.id == '35.0','nom'].item() ,'ZH'] + + +# 38RD0163 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0163') & + (df_Rprocess.activ_humaine =="Infrastructures linéaires (routes, voies ferrées)") & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0163') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +# df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +# 38RD0162 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0162') & + (df_Rprocess.activ_humaine =="Tourisme et loisirs (camping, zone de stationnement)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +# df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0162') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0162') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# 38RD0161 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0161') & + (df_Rprocess.activ_humaine =="Tourisme et loisirs (camping, zone de stationnement)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '24.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0161') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +# 38RD0159 +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0159') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0159') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '91.2','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +# 38RD0158 +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0158') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0158') & + (df_Rprocess.activ_humaine =="Tourisme et loisirs (camping, zone de stationnement)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '22.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0158') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0158') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '91.2','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +# 38RD0157 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0157') & + (df_Rprocess.activ_humaine =="Infrastructures linéaires (routes, voies ferrées)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '36.0','nom'].item() +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0157') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0157') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '91.2','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +# 38RD0156 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0156') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +# 38RD0155 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0155') & + (df_Rprocess.activ_humaine =="Tourisme et loisirs (camping, zone de stationnement)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '24.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0155') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +# 38RD0153 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0153') & + (df_Rprocess.activ_humaine =="Élevage / pastoralisme") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +# 38RD0152 +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0152') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '24.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0152') & + (df_Rprocess.activ_humaine =="Tourisme et loisirs (camping, zone de stationnement)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0152') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0152') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '24.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0152') & + (df_Rprocess.activ_humaine =="Urbanisation") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +# 38RD0151 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0151') & + (df_Rprocess.activ_humaine =="Tourisme et loisirs (camping, zone de stationnement)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0151') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0151') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '24.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0151') & + (df_Rprocess.activ_humaine =="Prélèvements d'eau") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '36.0','nom'].item() +# 38RD0150 +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0150') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0150') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '24.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0150') & + (df_Rprocess.activ_humaine =="Chasse") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '62.0','nom'].item() +# 38RD0149 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0149') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +# 38RD0148 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0148') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0148') & + (df_Rprocess.activ_humaine =="Chasse") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '62.0','nom'].item() +# 38VS0057 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0057') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '63.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0057') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0057') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +# 38VS0056 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0056') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0056') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '34.0','nom'].item() +# 38VS0055 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0055') & + (df_Rprocess.activ_humaine =='Sylviculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '51.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0055') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '63.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0055') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '16.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0055') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +# 38VS0054 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0054') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0054') & + (df_Rprocess.activ_humaine =='Sylviculture') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '53.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0054') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '63.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0054') & + (df_Rprocess.activ_humaine =='Chasse') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '62.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0054') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0054') & + (df_Rprocess.activ_humaine =="Prélèvements d'eau") & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '36.0','nom'].item() +# 38VS0053 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0053') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0053') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0053') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '17.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0053') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0053') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '15.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0053') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '34.0','nom'].item() +# 38VS0052 +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '43.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Sylviculture') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '53.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '73.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '63.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0052') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +# 38VS0051 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0051') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0051') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0051') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0051') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '17.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0051') & + (df_Rprocess.activ_humaine =="Prélèvements d'eau") & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '36.0','nom'].item() +# 38VS0050 +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0050') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0050') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '47.4','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0050') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0050') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '17.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0050') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0050') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '21.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# 38VS0049 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '45.0','nom'].item(), 'Equin'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '63.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Industrie') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '12.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() + +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Sylviculture') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '51.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '53.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Sylviculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '55.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH'),].copy() +d2 = d1.copy() +d3 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '16.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '32.0','nom'].item() +d3['impact'] = df_imp.loc[df_imp.id == '61.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '73.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d2 = d1.copy() +d3 = d1.copy() +d1['impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +d2['impact'] = df_imp.loc[df_imp.id == '15.0','nom'].item() +d3['impact'] = df_imp.loc[df_imp.id == '34.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '17.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d1,d2,d3]).sort_values('id_site') +d1 = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'ZH'),].copy() +d1['impact'] = df_imp.loc[df_imp.id == '34.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0049') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'ZH'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '15.0','nom'].item(), 'Plusieurs remblaiement constatés'] +df_Rprocess = pd.concat([df_Rprocess, d1]).sort_values('id_site') +# 38VS0048 +d = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0048') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'),].copy() +d['impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0048') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '15.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d]).sort_values('id_site') +d = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0048') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'EF'),].copy() +d['impact'] = df_imp.loc[df_imp.id == '91.2','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0048') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '91.4','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0048') & + (df_Rprocess.activ_humaine =="Industrie") & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '12.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0048') & + (df_Rprocess.activ_humaine =="Infrastructures linéaires (routes, voies ferrées)") & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +# 38VS0047 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0047') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0047') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +# 38VS0046 +d = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0046') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'),].copy() +d['impact'] = df_imp.loc[df_imp.id == '17.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0046') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess = pd.concat([df_Rprocess, d]).sort_values('id_site') +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0046') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VS0046') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '81.0','nom'].item() +# 38RD0128 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0128') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0128') & + (df_Rprocess.activ_humaine =='Chasse') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '62.0','nom'].item() +# 38RD0165 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0165') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'EF'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '13.0','nom'].item(), 'D526'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0165') & + (df_Rprocess.activ_humaine =='Extraction de granulats, mines') & + (df_Rprocess.position == 'ZH'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '34.0','nom'].item(), 'Extraction de gravats'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RD0165') & + (df_Rprocess.activ_humaine =='Activité hydroélectrique, barrage') & + (df_Rprocess.position == 'ZH'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '11.0','nom'].item(), 'Barrage'] +# 38RH0292 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RH0292') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '41.0','nom'].item(), 'Maïs'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RH0292') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '41.0','nom'].item(), 'Equin'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RH0292') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'ZH'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '11.0','nom'].item(), 'Camping, maison'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RH0292') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'ZH'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '16.0','nom'].item(), "Paintball, parcours sur câble"] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RH0292') & + (df_Rprocess.activ_humaine =="Infrastructures linéaires (routes, voies ferrées)") & + (df_Rprocess.position == 'ZH + EF'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '13.0','nom'].item(), 'Routes'] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38RH0292') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '31.0','nom'].item() +# 38CG0110 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38CG0110') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38CG0110') & + (df_Rprocess.activ_humaine =='Chasse') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '62.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38CG0110') & + (df_Rprocess.activ_humaine =='Tourisme et loisirs (camping, zone de stationnement)') & + (df_Rprocess.position == 'EF'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '61.0','nom'].item(), "Tir à l'arc"] +df_Rprocess.loc[ + (df_Rprocess.id_site=='38CG0110') & + (df_Rprocess.activ_humaine =="Prélèvements d'eau") & + (df_Rprocess.position == 'ZH + EF'), + ['impact','rmq_activ_hum']] = [df_imp.loc[df_imp.id == '30','nom'].item(), 'Irrigation'] +# 38BO0058 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38BO0058') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38BO0058') & + (df_Rprocess.activ_humaine =='Pêche') & + (df_Rprocess.position == 'ZH'), + 'impact'] = df_imp.loc[df_imp.id == '63.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38BO0058') & + (df_Rprocess.activ_humaine =='Urbanisation') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '11.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38BO0058') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +# 38VE0331 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0331') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0331') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0331') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '31.0','nom'].item() +# 38VE0338 +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0338') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +# 38VE0345 +d = df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0345') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'].copy() +d['impact'] = df_imp.loc[df_imp.id == '31.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0345') & + (df_Rprocess.activ_humaine =='Agriculture') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '41.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0345') & + (df_Rprocess.activ_humaine =='Élevage / pastoralisme') & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '45.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0345') & + (df_Rprocess.activ_humaine =='Infrastructures linéaires (routes, voies ferrées)') & + (df_Rprocess.position == 'EF'), + 'impact'] = df_imp.loc[df_imp.id == '13.0','nom'].item() +df_Rprocess.loc[ + (df_Rprocess.id_site=='38VE0345') & + (df_Rprocess.activ_humaine =="Autre (préciser dans l'encart réservé aux remarques)") & + (df_Rprocess.position == 'ZH + EF'), + 'impact'] = df_imp.loc[df_imp.id == '15.0','nom'].item() + +df_Rprocess.loc[df_Rprocess.activ_humaine=="Pas d'activité marquante",'impact'] = df_imp.loc[df_imp.id == '0','nom'].item() + + + + +# Remplacement des valeurs par leurs identifiants +df_Rprocess['id_position'] = df_Rprocess.position.replace(df_Ppostion.nom.to_list(),df_Ppostion.index.to_list()) +df_Rprocess['id_activ_hum'] = df_Rprocess.activ_humaine.replace(df_ActHum.nom.to_list(),df_ActHum.id.to_list()) +df_Rprocess.impact = df_Rprocess.impact.str.lower() +df_Rprocess['id_impact'] = df_Rprocess.impact.replace(df_imp.nom.str.lower().to_list(),df_imp.id.to_list()) +df_Rprocess.drop(columns=['id_site', 'activ_humaine', 'position','impact'], inplace=True) +df_Rprocess.rename( + columns = {'rmq_activ_hum': 'remarques'}, + inplace = True +) +df_Rprocess.reset_index(drop=True, inplace=True) +df_Rprocess.dropna(subset=['id_activ_hum'], inplace=True) +df_Rprocess.id_activ_hum = df_Rprocess.id_activ_hum.astype(int) +df_Rprocess.id_position = df_Rprocess.id_position.astype(int) +# Incrémentation des usages et process +if not isin_bdd: + df_Rprocess.to_sql( + name='r_site_usageprocess', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + method='multi' + ) + print("INSERT ... ok !") + + + + +# df_Rprocess.loc[~df_Rprocess.activ_hum_autre.isna(),['id_site','rmq_activ_hum','activ_hum_autre']] +# df_Rprocess.loc[~( +# (df_Rprocess.activ_humaine == df_Rprocess.ACTIV_TYPO) | (df_Rprocess.activ_hum_autre == df_Rprocess.ACTIV_TYPO) | df_Rprocess.ACTIV_TYPO.isna()) +# , ['id_geom_site','id_site','activ_hum_autre','ACTIV_TYPO']] = None + + +# Recherche des 'activité autres' spécifiée dans medwet et non précisée dans zh +# lst_zhsit = df_Rprocess[~df_Rprocess.activ_hum_autre.isna()].id_site.tolist() +# tmp = med1[~med1.ACTIV_TYPO.eq(med1.ACTIVITE_HUM) & (~med1.ACTIV_TYPO.isna()) & (~med1.SITE_COD.isin(lst_zhsit))] +# df_Rprocess.loc[ +# (df_Rprocess.id_site.isin(tmp.SITE_COD)) & +# (df_Rprocess.activ_humaine == df_ActHum.loc[df_ActHum.id==21, 'nom'].values[0]) +# , :] +# , 'activ_hum_autre'] + +# df_Rprocess[df_Rprocess.activ_humaine.isna()].sort_values('id_site') +# df_Rprocess[df_Rprocess.id_site=='38VE0331'] +# dc[dc.CODE=='13.0'].DESCR +# '38RD0023' ==> ????? Ne sait pas où c'est ... +# '38RD0126' ==> ????? Ne sait pas où c'est ... +# '38VE0227' ==> Disparue en 2021 +# '38VE0331' ==> Inconnu +# '38VE0338' ==> Inconnu +# '38VE0344' ==> Inconnu +# '38VE0345' ==> Inconnu +# '38VA0006' ==> Dilemme : BDD = "Pas d'activité marquante" / Fiche = "Activité hydroélectrique, barrage" + + +######## NOT NaN IN activ_humaine FOR NEXT ---> ########### +df_Rprocess.position.replace(df_Ppostion.nom.to_list(),df_Ppostion.index.to_list(), inplace=True) +df_Rprocess.activ_humaine.replace(df_ActHum.nom.to_list(),df_ActHum.id.to_list(), inplace=True) +df_Rprocess.impact.replace(df_imp.nom.to_list(),df_imp.id.to_list(), inplace=True) +######## WHAT activ_hum_autre ??? FOR NEXT ---> ########### + + +if not isin_bdd: + df_Rprocess.to_sql( + name='r_site_usageprocess', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") + + + + +df_Rprocess[['usages_process', 'description']] = df_Rprocess['usages_process_natu'].str.split('\(', expand=True) +df_Rprocess['description'] = df_Rprocess['description'].str.replace('\)','', regex=True) +# Récupération des descriptions d'usages +process_describ = df_Rprocess.loc[~df_Rprocess.description.isna(), ['usages_process','description']] +process_describ['usages_process'] = process_describ['usages_process'].str.strip() +process_describ['description'] = process_describ['description'].str.strip() +process_describ['usages_process'] = process_describ['usages_process'].str[0].str.upper() + process_describ['usages_process'].str[1:].str.lower() +process_describ.drop_duplicates(inplace=True) +del df_Rprocess['description'] +del df_Rprocess['usages_process_natu'] +tmp = ['tourisme et loisirs', 'aérodrome, aéroport, héliport', 'extraction de granulats, mines', 'activité hydroélectrique, barrage'] +d = df_Rprocess[['usages_process']] +splt = '|'.join([',', ' et ']) +dd = d[~d.usages_process.str.lower().str.contains('|'.join(tmp))] +dd = dd.usages_process.str.split(splt).apply(pd.Series).stack() +d = pd.concat( + [d[d.usages_process.str.lower().str.contains('|'.join(tmp))], + pd.DataFrame(dd, columns=['usages_process'], index=dd.index).droplevel(1) ] +).sort_index() +del df_Rprocess['usages_process'] +df_Rprocess = df_Rprocess.merge(d, on='id',how='left') +for col in df_Rprocess.columns: + if col != 'id_geom_site': + df_Rprocess[col] = df_Rprocess[col].str.strip() +df_Rprocess['usages_process'] = df_Rprocess['usages_process'].str[0].str.upper() + df_Rprocess['usages_process'].str[1:].str.lower() +df_Rprocess.drop_duplicates(inplace=True) +df_Rprocess.dropna(subset=['usages_process'], inplace=True) +df_Rprocess.reset_index(inplace=True) +df_Rprocess = df_Rprocess.groupby(['id','id_geom_site', 'usages_process', 'position'])['rmq_activ_hum'].apply(' '.join).str.strip() +df_Rprocess.replace(' ', ', ', inplace=True, regex=True) +df_Rprocess = df_Rprocess.reset_index(['id_geom_site', 'usages_process', 'position']) +df_Rprocess['description'] = None +for i, row in process_describ.iterrows(): + df_Rprocess.loc[df_Rprocess.usages_process == row.usages_process,'description'] = row.description +df_Rprocess.usages_process.replace( + ['Dépots', 'Dépôt sauvage', 'Atterissement', "Entretien plan d'eau", "Création plan d'eau", 'Déchèterie communale' ,"Canalisation d'eau", 'Surpiétinement'], + ['Dépôts', 'Dépôts sauvages', 'Atterrissement', "Entretien de plan d'eau", "Création de plan d'eau", 'Déchèterie', 'Canalisation', 'Piétinement'], + regex=True, + inplace=True +) +df_Rprocess.usages_process.replace(['Canalisation', 'Step'], ["Canalisation d'eau", 'STEP'], + regex=True, inplace=True +) +df_Rprocess.impact.replace( + [', ,'], [', '], + regex=True, inplace=True +) +df_Rprocess.impact.replace({'':None}, inplace=True) +df_Rprocess.reset_index(inplace=True, drop=True) +df_Rprocess.index.name = 'id' +# Loop who slow code ... +for i, row in df_Rprocess[df_Rprocess.impact.isna()].iterrows(): + # if row.id_geom_site==18: + # break + tmp = df_Rprocess.loc[(df_Rprocess.usages_process == row.usages_process) & (df_Rprocess.id_geom_site == row.id_geom_site) & (df_Rprocess.index != row.name)] + if not tmp.empty: + df_Rprocess.drop(row.name, inplace=True) +df_Rprocess.reset_index(inplace=True, drop=True) +df_Rprocess.index.name = 'id' +# df_Rprocess.loc[df_Rprocess.usages_process.str.contains("égout"), ['id_geom_site','usages_process', 'rmq_activ_hum', 'position']] +# Récupération de la liste des usages et process +df_process = pd.DataFrame( + { + 'nom': df_Rprocess['usages_process'], + 'description': df_Rprocess['description'] + }, + index = df_Rprocess[['usages_process', 'description']].index) +df_process.drop_duplicates(inplace=True) +df_process.sort_values('nom', inplace=True) +df_process.reset_index(inplace=True, drop=True) +df_process.index.name = 'id' +# Incrémentation des usages et process +# if not isin_bdd: +# df_process.to_sql( +# name='param_usageprocess', +# con = con_zh, +# schema='zones_humides', +# index=True, +# index_label='id', +# if_exists='append', +# ) +# print("INSERT ... ok !") +# # Mise en forme des relations sites / usages et process +# df_Rprocess.position.replace(df_Ppostion.nom.to_list(),df_Ppostion.index.to_list(), inplace=True) +# df_Rprocess.usages_process.replace(df_process.nom.to_list(),df_process.index.to_list(), inplace=True) +# df_Rprocess.reset_index(inplace=True, drop=True) +# df_Rprocess.index.name = 'id' +# Incrémentation des relations sites / usages et process +# if not isin_bdd: +# df_Rprocess.to_sql( +# name='r_site_usageprocess', +# con = con_zh, +# schema='zones_humides', +# index=True, +# index_label='id', +# if_exists='append', +# ) +# print("INSERT ... ok !") + + + +d = df_process.copy() +d['nom'] = df_process['nom'].str.lower() +d +df_process = d +df_process.reset_index(drop=True, inplace=True) +df_process.index.name = 'id' +df_process[['nom','description']] = df_process['nom'].str.split('\(', expand=True) +df_process['nom'] = df_process['nom'].str.strip() +df_process['description'] = df_process['description'].str.replace('\)','', regex=True).str.strip() +d = df_process['nom'].str.split(' et ').apply(pd.Series).stack() +d = pd.DataFrame(d, columns=['nom']) +d.index.name = 'id' +del df_process['nom'] +df_process = df_process.merge(d, on='id',how='left') +df_process['nom'] = df_process['nom'].str[0].str.upper() + df_process['nom'].str[1:] +df_process.drop_duplicates(inplace=True) + +d = d['nom'].str.split([' et ']).apply(pd.Series).stack() + +if not isin_bdd: + df_critDelm.to_sql( + name='r_site_usageprocess', + con = con_zh, + schema='zones_humides', + index=True, + index_label='id', + if_exists='append', + ) + print("INSERT ... ok !") \ No newline at end of file diff --git a/2_MEDWET/get_medwet.py b/2_MEDWET/get_medwet.py new file mode 100644 index 0000000..2fedbb3 --- /dev/null +++ b/2_MEDWET/get_medwet.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : get_medwet.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import pymedwet as pym +import pandas as pd + +if __name__ == "__main__": + + NAME_OUT = '~/Documents/9_PROJETS/1_ZH/inventaire_zh_2021.xlsx' + lst_fct = { + 'Infos générales': pym.get_SiteInfo, + 'Corine Biotope' : pym.get_cb, + 'Délimitation de la zh': pym.get_Delim_espaceFct, + 'Description de la zh' : pym.get_descrp_zh, + 'Fonctmt de la zh': pym.get_fctmt_zh, + 'Fonctions de la zh' : pym.get_fct_zh, + 'Evaluation de la zh': pym.get_eval_glob, + 'Observations' : pym.get_obs, + 'Bibliographie': pym.get_biblio + } + print('INIT récupération des données ...') + df = {} + for f,fonction in enumerate(lst_fct): + name = fonction + fct = lst_fct[fonction] + df1 = fct(pym.db_file1) + df2 = fct(pym.db_file2) + if f == 0: + lst_stcd = df1[df1.SITE_COD.isin(df2.SITE_COD)].SITE_COD + if fct == pym.get_SiteInfo: + df2.loc[df2.SITE_COD.isin(lst_stcd), 'OTHER_NAME'] = df1.loc[df1.SITE_COD.isin(lst_stcd),'SITE_NAME'].tolist() + if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): + df[f] = pd.concat([df1[~df1.SITE_COD.isin(lst_stcd)], df2]) + df[f].name = name + elif isinstance(df1, dict) and isinstance(df2, dict): + df[f] = {} + df[f]['title'] = name + for d in df1: + df[f][d] = pd.concat([df1[d][~df1[d].SITE_COD.isin(lst_stcd)], df2[d]]) + + print('INIT écriture du fichier ...') + # Ecriture des données + with pd.ExcelWriter(NAME_OUT) as writer: + for d in df: + DF = df[d] + if isinstance(DF, pd.DataFrame): + DF.to_excel(writer,sheet_name=DF.name,startrow=1 , startcol=0, index=False, header=DF.columns) + ws = writer.book.active + writer.sheets[DF.name].cell(1,1,value=DF.name) + writer.save() + elif isinstance(DF, dict): + for i,d in enumerate(DF): + if d == 'title': + continue + if i == 1: + row = 1 + col = 0 + else: + col = DF[d].shape[1] + col + 3 + DF[d].to_excel(writer,sheet_name=DF['title'],startrow=row , startcol=col, index=False) + ws = writer.book.active + writer.sheets[DF['title']].cell(column=col+1,row=row,value=d) + writer.save() + + import sys + sys.exit('END SCRIPT ...') \ No newline at end of file diff --git a/2_MEDWET/get_zh_cen.py b/2_MEDWET/get_zh_cen.py new file mode 100644 index 0000000..b700709 --- /dev/null +++ b/2_MEDWET/get_zh_cen.py @@ -0,0 +1,511 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : get_zh_cen.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + + + +import re +import pandas as pd +import pandas_access as mdb +import numpy as np +from sqlalchemy.sql.expression import column +from sqlalchemy import create_engine +from geoalchemy2 import Geometry + + +isin_bdd = True +# Parametres bdd OUT +user_zh = 'postgres' +pwd_zh = 'tutu' +adr_zh = '192.168.60.10' +base_zh = 'bd_cen' +con_zh = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user_zh,pwd_zh,adr_zh,base_zh), echo=False) + + + +##################################### +### Fonctions générales ### +##################################### +def _aggr_cols(df, lst_col, sep=''): + df['aggreg'] = '' + for c,col in enumerate(lst_col): + add = '' + if c > 0: + add = sep + df.loc[~df[col].isna(),'aggreg'] = df.loc[~df[col].isna(),'aggreg'] + add + df.loc[~df[col].isna(),col] + return df + +def to_tuple(obj): + if isinstance(obj, list): obj = tuple(obj) + if isinstance(obj, (int, str)) : obj = tuple([obj]) + return obj + +def to_colStringSQL(obj): + if isinstance(obj, (int, str)) : obj = str(obj) + if isinstance(obj, list): obj = ",".join(obj) + return obj + +def to_upper(obj): + if isinstance(obj, tuple): obj = tuple([o.upper() for o in list(obj)]) + if isinstance(obj, list) : obj = [o.upper() for o in obj] + if isinstance(obj, str) : obj = obj.upper() + return obj + +def to_upperfirst(obj): + if isinstance(obj, tuple): obj = tuple([o.upper()[0] + o.lower()[1:] for o in list(obj)]) + if isinstance(obj, list) : obj = [o.upper()[0] + o.lower()[1:] for o in obj] + if isinstance(obj, str) : obj = obj.upper()[0] + obj.lower()[1:] + return obj + +def _get_table(con, schema, table, ids=None, nom=None, cols=None, params_col={}): + sql = 'SELECT * FROM {sch}.{tab}'.format(sch=schema, tab=table) + if cols : sql = sql.replace('*', to_colStringSQL(cols) ) + if ids or nom or params_col : sql = sql + ' WHERE ' + if ids : sql = sql + 'id IN %(ids)s' + if ids and (nom or params_col) : sql = sql + ' AND ' + if nom : sql = sql + 'nom IN %(nom)s' + if nom and params_col : sql = sql + ' AND ' + if params_col : + sql = sql + ' AND '.join([k + ' IN %({})s'.format(k) for k in params_col.keys()]) + params_col = {key:to_tuple(params_col[key]) for key in params_col.keys()} + df = pd.read_sql( + sql = sql, + con = con, + params = {'ids': to_tuple(ids), 'nom': to_tuple(nom), **params_col }) + return df + + + +##################################### +### schema personnes ### +##################################### +class pers: + def __init__(self): + self.schema = 'personnes' + self.con = con_zh + # self._get_table = _get_table + + def get_auteur(self, nom=None, prenom=None): + sql = 'SELECT * FROM %s.personne'%self.schema + if nom or prenom : sql = sql + ' WHERE ' + if nom : + sql = sql + 'nom IN %(nom)s' + nom = to_upper(nom) + if nom and prenom : sql = sql + ' AND ' + if prenom : + sql = sql + 'prenom IN %(prenom)s' + prenom = to_upperfirst(prenom) + df = pd.read_sql( + sql = sql, + con = self.con, + params = {'nom': to_tuple(nom), 'prenom': to_tuple(prenom) }) + return df + + def get_organisme(self, ids=None, nom=None): + table = 'organisme' + return _get_table(self.con, self.schema, table, ids=ids, nom=nom) + + + +##################################### +### schema sites ### +##################################### +class sites: + def __init__(self): + self.schema = 'sites' + self.con = con_zh + self.typ_milieux = self._get_typ_milieux() + self.typo_sdage = self._get_typo_sdage() + self.typ_site = self._get_typ_site() + self.auteur = pers().get_auteur() + self.organisme = pers().get_organisme() + + def _get_typ_milieux(self, ids=None, nom=None): + table = 'type_milieu' + df = _get_table(self.con, self.schema, table, ids=ids, nom=nom) + return df + + def _get_typo_sdage(self, ids=None, nom=None): + table = 'typo_sdage' + df = _get_table(self.con, self.schema, table, ids=ids, nom=nom) + return df + + def _get_typ_site(self, ids=None, nom=None): + table = 'type_site' + df = _get_table(self.con, self.schema, table, ids=ids, nom=nom) + return df + + def _merge_orga(self, df, split_cols): + org = self.organisme + aut = self.auteur + df = df.copy() + for c in split_cols: + if not isinstance(df[c], int): df[c] = df[c].astype(float) + df[c].replace(aut.id.tolist(), aut.id_organisme.tolist(), inplace=True) + df[c].replace(org.id.tolist(), org.nom.tolist(), inplace=True) + df['organisme'] = None + for c in split_cols: + df.loc[df.organisme.isna(), 'organisme'] = df.loc[df['organisme'].isna(), c] + for c in split_cols: + comp = df.loc[~df[c].isna(),'organisme'].compare(df.loc[~df[c].isna(), c]) + if not comp.empty: + comp['test'] = comp.apply(lambda x: x['other'] in x['self'], axis=1) + comp = comp[~comp.test] + if not comp.empty: + df.loc[comp.index,'organisme'] = comp.self + ' & ' + comp.other + df.drop(columns=split_cols, inplace=True) + return df + + def _merge_author(self, df, col_aut, orga=False): + # récupération des auteurs + aut = self.auteur.fillna('') + aut['nom_prenom'] = (aut['nom'] + ' ' + aut['prenom']).str.strip() + aut['id'] = aut['id'].astype(str) + # merge des auteurs + r_id = df[['id', col_aut]].copy() + r_idSplit = r_id[col_aut].str.split(' & ', expand=True) + r_id = r_id.join(r_idSplit) + cSplit = r_idSplit.shape[1] + cSplit = list(range(cSplit)) + if orga: + # récup des organismes + org = self._merge_orga(r_id, cSplit) + r_id[cSplit] = r_id[cSplit].replace(aut['id'].tolist(),aut['nom_prenom'].tolist()) + r_id = _aggr_cols(r_id,cSplit,' & ') \ + .rename(columns={'aggreg': 'auteur'}) \ + .drop(columns=cSplit) + if orga: + # merge des organismes + r_id = pd.merge(r_id,org, on=['id', col_aut]) + df = pd.merge(df,r_id, on=['id', col_aut]) \ + .drop(columns=[col_aut]) + return df + + def get_sitesInfos(self, ids=None, nom=None, columns=None, with_nameOrga=False, details=False): + drop = [] + table = 'sites' + df = _get_table(self.con, self.schema, table, ids=ids, nom=nom, cols=columns) + # récupération des auteurs + if 'id_auteur' in df.columns: + df = self._merge_author(df=df, col_aut='id_auteur', orga=with_nameOrga) + # merge type_site + if 'id_type_site' in df.columns: + df = pd.merge(df, self.typ_site, left_on='id_type_site', right_on='id', suffixes=('','_y') ) \ + .drop(columns=['id_type_site', 'id_y']) \ + .rename(columns={'nom_y': 'type_site', 'description': 'desc_type_site'}) + drop += ['desc_type_site'] + # merge typo_sdage + if 'id_typo_sdage' in df.columns: + df = pd.merge(df, self.typo_sdage, left_on='id_typo_sdage', right_on='id', suffixes=('','_y') ) \ + .drop(columns=['id_typo_sdage', 'id_y']) \ + .rename(columns={'nom_y': 'typo_sdage', 'description': 'desc_typo_sdage'}) + drop += ['desc_typo_sdage'] + # merge type_milieu + if 'id_type_milieu' in df.columns: + df = pd.merge(df, self.typ_milieux, left_on='id_type_milieu', right_on='id', suffixes=('','_y') ) \ + .drop(columns=['id_type_milieu', 'id_y']) \ + .rename(columns={'nom_y': 'type_milieu', 'description': 'desc_type_milieu', 'nom_court': 'nom_court_milieu'}) + drop += ['desc_type_milieu', 'nom_court_milieu'] + + if not details: + df.drop(columns=drop, inplace=True) + + return df.sort_values('id') + + def get_sitesGeom(self, id_site=None, nom_site=None, columns=None, last_update=False, with_nameOrga=False): + from shapely.wkb import loads + import geopandas as gpd # set_geometry + + if columns: + if not isinstance(columns, list): columns = [columns] + if 'id' not in columns: columns.insert(0,'id') + if 'id_site' not in columns: columns.insert(1,'id_site') + if 'geom' not in columns: columns.insert(2,'geom') + + table = 'sites' + df = _get_table(self.con, self.schema, table, ids=id_site, nom=nom_site, cols='id') + idSite = df.id.tolist() + table = 'r_sites_geom' + df = _get_table(self.con, self.schema, table, params_col={'id_site':idSite}, cols=columns) + if last_update: + df.drop_duplicates(subset=['id_site'], keep='last', inplace=True) + df.reset_index(inplace=True, drop=True) + + df['geom'] = [(loads(geom, hex=True)) for geom in df['geom']] + df = df.set_geometry('geom', crs='EPSG:2154') + # merge auteur + if 'id_auteur' in df.columns: + df = self._merge_author(df=df, col_aut='id_auteur', orga=with_nameOrga) + + return df + + + +##################################### +### schema sites ### +##################################### +class zh: + def __init__(self): + self.schema = 'zones_humides' + self.con = con_zh + + def _get_param(self, param_table, type_table=None, type_court=True): + if type_table: + typ = _get_table(self.con, self.schema, table=type_table) + par = _get_table(self.con, self.schema, table=param_table, params_col={'id_type':typ.id.tolist()}) + df = pd.merge(par, typ, left_on='id_type', right_on='id', how='left', suffixes=(None, '_typ')) \ + .drop(columns=['id_type','id_typ']) + if 'description_typ' in df.columns: del df['description_typ'] + if type_court: df = df.drop(columns=['nom_typ']).rename(columns={'nom_court_typ':'type'}) + else : df = df.drop(columns=['nom_court_typ'],errors='ignore').rename(columns={'nom_typ':'type'}) + df = df.set_index(['id', 'type']).reset_index() + else: + df = _get_table(self.con, self.schema, table=param_table) + return df + + def _get_relation_tab(self, tab, id_site=None, nom_site=None, last_update=False, geom=False): + table = 'sites' + dfSG = sites().get_sitesGeom(columns='date', id_site=id_site, nom_site=nom_site, last_update=last_update) + if not geom and not dfSG.empty: + dfSG.drop('geom',1,inplace=True) + ids = dfSG.id.tolist() + table = tab + if ids : + df = _get_table(self.con, self.schema, table, params_col={'id_geom_site':ids}) + if not df.empty: + df = pd.merge(dfSG,df, left_on='id', right_on='id_geom_site', suffixes=('_x', None)) \ + .drop(['id_x','id_geom_site'],1) \ + .set_index('id').reset_index() + return df + else: + print('PAS de géometries de sites sélectionnées ...') + + def get_delim(self, id_site=None, nom_site=None, last_update=False, geom=False, nom_type_court=True): + table = 'r_site_critdelim' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + dic = self._get_param(type_table='type_param_delim_fct', param_table='param_delim_fct', type_court=nom_type_court) + if not df.empty: + df = pd.merge(df,dic, left_on='id_crit_delim', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_crit_delim'],1) \ + .rename(columns={'description_y':'desc_param', 'nom_court':'nom_court_crit','nom':'nom_crit'}) \ + .sort_values('id_site') + if df.nom_court_crit.isnull().sum() == df.shape[0] : del df['nom_court_crit'] + # typ = df.type.unique() + # x = {} + # for t in typ: + # x[t] = df[df.type == t] + # x[t] = x[t].rename(columns={'nom': t}) \ + # .reset_index(drop=True) + return df + + def get_fct(self, id_site=None, nom_site=None, last_update=False, geom=False, nom_type_court=True): + table = 'r_site_fctecosociopatri' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + dic = self._get_param(type_table='type_param_fct', param_table='param_fct_eco_socio_patri', type_court=nom_type_court) + if not df.empty: + df = pd.merge(df,dic, left_on='id_fct', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_fct'],1) \ + .rename(columns={'description_y':'desc_param', 'nom_court':'nom_court_fct','nom':'nom_fct'}) \ + .sort_values('id_site') + if df.nom_court_fct.isnull().sum() == df.shape[0] : del df['nom_court_fct'] + return df + + def get_connex(self, id_site=None, nom_site=None, last_update=False, geom=False): + table = 'r_site_type_connect' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + dic = self._get_param(param_table='param_type_connect') + if not df.empty: + df = pd.merge(df,dic, left_on='id_param_connect', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_param_connect'],1) \ + .rename(columns={'description_y':'desc_param', 'nom':'connexion'}) \ + .sort_values('id_site') + return df + + def get_sub(self, id_site=None, nom_site=None, last_update=False, geom=False): + table = 'r_site_sub' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + dic = self._get_param(type_table='type_param_sub', param_table='param_sub', type_court=False) + d1 = dic[dic.type == 'Submersion étendue'] + d2 = dic[dic.type == 'Submersion fréquente'] + if not df.empty: + df = pd.merge(df,d1, how='left', left_on='id_etendsub', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_etendsub', 'type'],1) \ + .rename(columns={'description':'desc_param_etend', 'nom':'Submersion étendue'}) + df = pd.merge(df,d2, how='left', left_on='id_freqsub', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_freqsub', 'type'],1) \ + .rename(columns={'description':'desc_param_freq', 'nom':'Submersion fréquente'}) \ + .sort_values('id_site') + df.rename(columns={'id_origsub': 'origine_sub'}, inplace=True) + if df['desc_param_etend'].isnull().sum() == df.shape[0] : del df['desc_param_etend'] + if df['desc_param_freq'].isnull().sum() == df.shape[0] : del df['desc_param_freq'] + return df + + def get_usageprocess(self, id_site=None, nom_site=None, last_update=False, geom=False): + table = 'r_site_usageprocess' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + dic1 = self._get_param(param_table='param_usageprocess') + dic2 = self._get_param(param_table='param_position') + if not df.empty: + df = pd.merge(df,dic1, how='left', left_on='id_usageprocess', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_usageprocess'],1) \ + .rename(columns={'description':'desc_param_usag', 'nom':'usageprocess'}) + df = pd.merge(df,dic2, how='left', left_on='id_position', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_position'],1) \ + .rename(columns={'description':'desc_param_pos', 'nom':'position'}) \ + .sort_values('id_site') + return df + + def _get_r_toponymie(self, ids=None): + table = 'r_toponymie' + df = _get_table(self.con, self.schema, table=table, ids=ids) + dic1 = self._get_param(param_table='liste_table_topohydro') + if not df.empty: + df = pd.merge(df,dic1, left_on='id_orig', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_orig'],1) + n_tab = df.nom_table.unique() + for tab in n_tab: + iids = df.loc[df.nom_table == tab, 'id_topo'].to_list() + if tab == 'orig_hydro': dic = _get_table(self.con, self.schema, table='orig_hydro', ids=iids) + if tab == 'troncon_hydro': dic = ref_hydro.get_troncon(cols=['id','nom'], ids=iids) + df.loc[df.nom_table == tab, 'id_topo'] = df.loc[df.nom_table == tab, 'id_topo'].replace(dic.id.to_list(),dic.nom.to_list()) + if tab == 'troncon_hydro': df = pd.merge(df, dic, left_on='id_topo', right_on='nom', suffixes=(None,'_y')) \ + .drop(columns=['id_y', 'nom']) + df.rename(columns={'id_topo':'toponymie'}) + return df + + def get_regHydro(self, id_site=None, nom_site=None, last_update=False, geom=False): + table = 'r_site_reghydro' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + dic1 = self._get_param(param_table='param_reg_hydro') + dic2 = self._get_param(param_table='param_permanance') + if not df.empty: + dic3 = self._get_r_toponymie(ids=df.id_toponymie.unique().tolist()) + df.in_out = df.in_out.replace([True,False],['entree','sortie']) + df = pd.merge(df,dic1, left_on='id_reg_hydro', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_reg_hydro'],1) \ + .rename(columns={'description':'desc_param_regHydri', 'nom':'regime_hydri'}) + df = pd.merge(df,dic2, left_on='id_permanance', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_permanance'],1) \ + .rename(columns={'description':'desc_param_perm', 'nom':'permanance'}) + df = pd.merge(df,dic3, left_on='id_toponymie', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y','id_toponymie'],1) \ + .rename(columns={'description':'desc_topo'}) \ + .sort_values('id_site') + return df + + def get_habitat(self, id_site=None, nom_site=None, last_update=False, geom=False): + table = 'r_site_habitat' + df = self._get_relation_tab(tab=table,id_site=id_site,nom_site=nom_site,last_update=last_update,geom=geom) + if not df.empty: + dic = ref_hab().get_CB(ids=df.id_cb.unique().tolist(),cols=['id','lb_hab_fr']) + df = pd.merge(df,dic, left_on='id_cb', right_on='id', suffixes=(None,'_y')) \ + .drop(['id_y'],1) \ + .rename(columns={'id_cb':'code_cb'}) \ + .sort_values('id_site') + return df + + + +##################################### +### schema ref_habitats ### +##################################### +class ref_hab: + def __init__(self): + self.schema = 'ref_habitats' + self.con = con_zh + + def get_CB(self, ids=None, cols=None, params_col={}): + table = 'corine_biotope' + df = _get_table(self.con, self.schema, table=table, ids=ids, cols=cols, params_col=params_col) + return df + + + +##################################### +### schema ref_hydro ### +##################################### +class ref_hydro: + def __init__(self): + self.schema = 'ref_hydro' + self.con = con_zh + + def get_troncon(self, ids=None, cols=None, params_col={}): + table = 'troncon_hydro' + df = _get_table(self.con, self.schema, table=table, ids=ids, cols=cols, params_col=params_col) + return df + + + +##################################### +### Bilan ### +##################################### +def get_bilan(code_site=None, nom_site=None): + ''' + :sites: list,str. Nom de code du site de la zh. + ''' + SITES = sites() + ZH = zh() + info = SITES.get_sitesInfos(ids=code_site, nom=nom_site) + CB = ZH.get_habitat(id_site=code_site, nom_site=nom_site) + delim = ZH.get_delim(id_site=code_site, nom_site=nom_site) + desc = ZH.get_usageprocess(id_site=code_site, nom_site=nom_site) + rghyd = ZH.get_regHydro(id_site=code_site, nom_site=nom_site) + subm = ZH.get_sub(id_site=code_site, nom_site=nom_site) + conn = ZH.get_connex(id_site=code_site, nom_site=nom_site) + fct = ZH.get_fct(id_site=code_site, nom_site=nom_site) + evall = SITES.get_sitesGeom().drop(columns=['geom']) + sub_con = pd.merge(subm, conn, how='outer', on=['id', 'id_site', 'date']) \ + .rename(columns={'description': 'desc_connex'}) + fctmt = { + 'entree_eau': rghyd[rghyd.in_out == 'entree'].drop(columns=['in_out']), + 'sortie_eau': rghyd[rghyd.in_out == 'sortie'].drop(columns=['in_out']), + 'sub_connex': sub_con, + } + lst_df = { + 'infos':info, + 'corine_biotope': CB, + 'delimitation': delim, + 'description': desc, + 'fonctionnement': fctmt, + 'fonction': fct, + 'evaluation': evall} + for key in lst_df: + if isinstance(lst_df[key], pd.DataFrame): lst_df[key].name = key + if isinstance(lst_df[key], dict): + for d in lst_df[key]: + lst_df[key][d].name = d + lst_df[key]['title'] = key + return lst_df + +def write_bilan(df, output): + ''' + :df: dict. Dictionnaire de DataFrame. + Ecriture d'un feuillet par élément du dictionnaire. + Le nom du DataFrame est le titre du feuillet. + output: str. chemin_vers_mon_fichier/mon_fichier.xlsx + ''' + # Ecriture des données + with pd.ExcelWriter(output) as writer: + for d in df: + DF = df[d] + if isinstance(DF, pd.DataFrame): + DF.to_excel(writer,sheet_name=DF.name,startrow=1 , startcol=0, index=False, header=DF.columns) + ws = writer.book.active + writer.sheets[DF.name].cell(1,1,value=DF.name) + writer.save() + elif isinstance(DF, dict): + for i,d in enumerate(DF): + if d == 'title': continue + if i == 0: + row = 1 + col = 0 + else: + col = DF[d].shape[1] + col + 3 + DF[d].to_excel(writer,sheet_name=DF['title'],startrow=row , startcol=col, index=False) + ws = writer.book.active + writer.sheets[DF['title']].cell(column=col+1,row=row,value=d) + writer.save() diff --git a/2_MEDWET/recup_zh_from_medwet.py b/2_MEDWET/recup_zh_from_medwet.py new file mode 100644 index 0000000..87be6ed --- /dev/null +++ b/2_MEDWET/recup_zh_from_medwet.py @@ -0,0 +1,939 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : recup_zh_from_medwet.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import re +import pandas as pd +import pandas_access as mdb +import numpy as np +from sqlalchemy import create_engine +from geoalchemy2 import Geometry + + +isin_bdd = True +# Parametres bdd OUT +user_zh = 'postgres' +pwd_zh = 'tutu' +adr_zh = '192.168.60.10' +base_zh = 'bd_cen' +con_zh = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user_zh,pwd_zh,adr_zh,base_zh), echo=False) + +# Read MS access database +db_file1 = '/home/colas/Documents/5_BDD/ZONES_HUMIDES/MEDWET_v1.mdb' +db_file2 = '/home/colas/Documents/5_BDD/ZONES_HUMIDES/MEDWET_V2.mdb' +db_file74 = '/home/colas/Documents/13_CEN74/medwet2gn_ZH/bdd/Inventaires ZHRMCvivante.mdb' +# df_med1 = mdb.read_table(db_file1, "SITEINFO") +# df_med2 = mdb.read_table(db_file2, "SITEINFO") + + + +##################################### +### Obsrevation de la zone humide ### +##################################### +def _aggr_cols(df, lst_col): + df['aggreg'] = None + + for col in lst_col: + iscol = ~df[col].isna() + isagg = ~df['aggreg'].isna() + + df.loc[iscol & isagg, 'aggreg'] = df.loc[iscol & isagg, 'aggreg'] + \ + ' ; ' + col + ' : ' + df.loc[iscol & isagg, col] + df.loc[iscol & ~isagg, 'aggreg'] = col + ' : ' + df.loc[iscol & ~isagg, col] + + return df + +def _del_na_col(df): + for col in df.columns: + tmp = pd.notna(df[col]).unique() + if len(tmp) == 1 and False in tmp: + del df[col] + return df + +def __get_auteur__(db_file): + df = mdb.read_table(db_file, 'MWDKC') + df = _del_na_col(df) + df.drop(columns=['FAX', 'E_MAIL', 'COUNTRY', 'PHONE', 'ADDRESS', 'CITY'], inplace=True) + return df + +def __get_DicGenREGLE__(): + d = {'CODE':['1','2','3','4','5'], 'DESCR':[ + 'protection : nationale (PN) / régionale (PR)', + 'livres rouges : nationale (LN) / régionale (LR)', + 'Directives Habitats (annexe II ou annexe IV)', + 'Directive Oiseaux (annexe I)', + 'autres statuts réglementaires (Convention de Bonn, Convention de Bern…)', + ]} + return pd.DataFrame(data=d) + +def __get_DicEsp__(db_file, dic, detail=False): + df = mdb.read_table(db_file, dic) + df = _del_na_col(df) + df.drop(columns=[ + 'FRANCE', 'ITALY', 'GREECE', 'SPAIN', 'PORTUGAL', + 'ANNEX_II', 'SPCLIV', 'SPCLV', 'SPBCAX1','IUCN_REDL'], inplace=True) + df_cols = df.columns + df_cols = df_cols[df_cols.str.startswith(('SPBONN','ANNEX'))] + if not detail and not df_cols.empty: + df.drop(columns=df_cols, inplace=True) + return df + +def __get_ObsEsp__(db_file, dic): + df = mdb.read_table(db_file, dic) + df = _del_na_col(df) + return df + +def __get_SiteInfo__(db_file): + df = mdb.read_table(db_file, 'SITEINFO') + df = _del_na_col(df) + df.drop(columns=[ + 'COMPLE_COD', 'AREA_EF', 'AREA_COMP' + ], inplace=True) + for col in ['PROTOCOLE', 'VALEUR_HUMIDITE']: + if col in df.columns: + df.drop(columns=[col], inplace=True) + return df + +def __insert_type_esp__(df, type_esp_name): + for col in df.columns: + if col == 'ORDER': break + fix_col = col + tmp = df.loc[:,:fix_col] + tmp['TYPE_ESP'] = type_esp_name + return tmp.join(df.loc[:,'ORDER':]) + +def _get_obs(db_file, tab_obs, dic_obs, detail=False): + df_obs = __get_ObsEsp__(db_file, tab_obs) + df_dic = __get_DicEsp__(db_file, dic_obs, detail) + df = pd.merge(df_obs,df_dic,how='left', left_on='SPEC_COD', right_on='ID_COD') + if tab_obs == 'OBSFLO': + df.rename(columns={'FLO_MOIS': 'BRIDDATE_M', 'FLO_YEAR': 'BRIDDATE_Y'}, inplace=True) + if not detail: + df.drop(columns=['SPEC_COD','ID_COD'], inplace=True) + return df + +def get_obs(db_file, type_obs='all', detail=False): + ''' + :db_file: str. Nom de la base de données access + :type_obs: str ou list. Code des observations à extraire de + la bdd access MEDWET : 'amphi', 'bird', 'flore', 'fish', + 'invert', 'mamm', 'rept'. 'all' si tous. + :detail: bool. + ''' + obs_amp = pd.DataFrame() + obs_brd = pd.DataFrame() + obs_flo = pd.DataFrame() + obs_fsh = pd.DataFrame() + obs_inv = pd.DataFrame() + obs_mam = pd.DataFrame() + obs_rep = pd.DataFrame() + if 'all' == type_obs or 'all' in type_obs: + type_obs = ['amphi', 'bird', 'flore', 'fish', 'invert', 'mamm', 'rept'] + if 'amphi' == type_obs or 'amphi' in type_obs: + tab_obs = 'OBSAMP' + dic_obs = 'DICAMP' + obs_amp = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_amp = __insert_type_esp__(obs_amp, 'Amphibien') + if 'bird' == type_obs or 'bird' in type_obs: + tab_obs = 'OBSBRD' + dic_obs = 'DICBRD' + obs_brd = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_brd = __insert_type_esp__(obs_brd, 'Oiseau') + if 'flore' == type_obs or 'flore' in type_obs: + tab_obs = 'OBSFLO' + dic_obs = 'DICFLO' + obs_flo = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_flo = __insert_type_esp__(obs_flo, 'Flore') + if 'fish' == type_obs or 'fish' in type_obs: + tab_obs = 'OBSFSH' + dic_obs = 'DICFSH' + obs_fsh = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_fsh = __insert_type_esp__(obs_fsh, 'Poisson') + if 'invert' == type_obs or 'invert' in type_obs: + tab_obs = 'OBSINV' + dic_obs = 'DICINV' + obs_inv = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_inv = __insert_type_esp__(obs_inv, 'Invertébré') + if 'mamm' == type_obs or 'mamm' in type_obs: + tab_obs = 'OBSMAM' + dic_obs = 'DICMAM' + obs_mam = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_mam = __insert_type_esp__(obs_mam, 'Mammifère') + if 'rept' == type_obs or 'rept' in type_obs: + tab_obs = 'OBSREP' + dic_obs = 'DICREP' + obs_rep = _get_obs(db_file, tab_obs, dic_obs, detail) + obs_rep = __insert_type_esp__(obs_rep, 'Reptile') + + df_obs = pd.concat([obs_amp,obs_brd,obs_flo,obs_fsh,obs_inv,obs_mam,obs_rep]) + df_obs = _del_na_col(df_obs) + df_rgl = __get_DicGenREGLE__() + df_aut = __get_auteur__(db_file) + if 'REGLE_COD' in df_obs.columns: + df_obs = pd.merge(df_obs,df_rgl,how='left', left_on='REGLE_COD', right_on='CODE') + df_obs.rename(columns={'DESCR':'NIV_PROTECT', 'CODE': 'CD_PROTECT'}, inplace=True) + if not detail: + df_obs.drop(columns=['REGLE_COD', 'CD_PROTECT'], inplace=True) + df_obs = pd.merge(df_obs,df_aut,how='left', left_on='AUTEUR', right_on='CODE') + df_obs.rename(columns={'DESCR':'NOM_AUT', 'CODE': 'CD_AUT'}, inplace=True) + if not detail: + df_obs.drop(columns=['AUTEUR', 'CD_AUT'], inplace=True) + return df_obs + + + +######################################## +### Fonctionnement de la zone humide ### +######################################## +def __get_DicGenIN1__(): + d = {'CODE':['1','2','3','4','5','6','7','8','9'], 'DESCR':[ + 'mer/océan', "cours d'eau", + 'Canaux/fossés','Sources', + 'nappes','précipitations', + "plans d'eau",'Ruissellement diffus', + 'Eaux de crues', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenIN2__(): + d = {'CODE':['1','2','3','4'], 'DESCR':[ + 'permanent','saisonnier', + 'temporaire','intermittent', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenOUT__(): + d = {'CODE':['1','2','3','4','5','6','7'], 'DESCR':[ + 'mer/océan', "cours d'eau", + 'Canaux/fossés','Pompage, drainage', + 'nappes','évaporation', + "plans d'eau", + ]} + return pd.DataFrame(data=d) + +def __get_DicGenOUT2__(): + d = {'CODE':['1','2','3','4','5'], 'DESCR':[ + 'aucune','permanent', + 'saisonnier','temporaire', + 'intermittent', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenFREQ__(): + d = {'CODE':['1','2','3','4'], 'DESCR':[ + 'Jamais submergé','Toujours submergé', + 'Exceptionnellement submergé', + 'Régulièrement submergé', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenETEND__(): + d = {'CODE':['1','2'], 'DESCR':[ + 'Totalement submergé', + 'Partiellement submergé', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenCONNEX__(): + d = {'CODE':['1','2','3','4','5','6'], 'DESCR':[ + 'traversée', + 'entrée et sortie', + 'entrée', + 'sortie', + 'passe à coté', + 'aucune connexion', + ], 'NOMFICH':[ + 'MWD1.JPG','MWD2.JPG','MWD3.JPG','MWD4.JPG','MWD5.JPG','MWD6.JPG']} + return pd.DataFrame(data=d) + +def get_regHydroEntree(db_file, detail=False): + ''' + :db_file: str. Nom de la base de données access + :detail: bool. + ''' + df_in = mdb.read_table(db_file, 'SITEINFL') + df_dic1= __get_DicGenIN1__() + df_dic2= __get_DicGenIN2__() + df_in = pd.merge(df_in,df_dic1, how='left', left_on='INFLOW1', right_on='CODE') + df_in.rename(columns={'CODE':'CD_ENTREE_EAU', 'DESCR':'ENTREE_EAU'}, inplace=True) + df_in = pd.merge(df_in,df_dic2, how='left', left_on='INFLOW2', right_on='CODE') + df_in.rename(columns={'CODE':'CD_PERM', 'DESCR':'PERM_IN'}, inplace=True) + if not detail: + df_in.drop(columns=['INFLOW1','INFLOW2','CD_ENTREE_EAU','CD_PERM'], inplace=True) + return df_in + +def get_regHydroSortie(db_file, detail=False): + ''' + :db_file: str. Nom de la base de données access + :detail: bool. + ''' + df_out = mdb.read_table(db_file, 'SITEOUTF') + df_dic1= __get_DicGenOUT__() + df_dic2= __get_DicGenOUT2__() + df_out = pd.merge(df_out,df_dic1, how='left', left_on='OUTFLOW', right_on='CODE') + df_out.rename(columns={'CODE':'CD_SORTIE_EAU', 'DESCR':'SORTIE_EAU'}, inplace=True) + if not isinstance(df_out.PERMANENCE,str): + df_out.PERMANENCE.fillna(0, inplace=True) + df_out.PERMANENCE = df_out.PERMANENCE.astype(int).astype(str) + df_out = pd.merge(df_out,df_dic2, how='left', left_on='PERMANENCE', right_on='CODE') + df_out.rename(columns={'PERMANENCE': 'CD_PERM1','CODE':'CD_PERM2', 'DESCR':'PERM_OUT'}, inplace=True) + if not detail: + df_out.drop(columns=['OUTFLOW','CD_SORTIE_EAU','CD_PERM1','CD_PERM2'], inplace=True) + return df_out + +def get_regSubmersion(db_file, detail=False): + ''' + :db_file: str. Nom de la base de données access + :detail: bool. + ''' + df_inf = __get_SiteInfo__(db_file) + df = df_inf[['SITE_COD', 'FREQUENCE', 'ORIGINE', 'ETENDUE']] + # df.rename(columns={'ORIGINE':'SUB_ORIG'}, inplace=True) + df_dic1 = __get_DicGenFREQ__() + df_dic2 = __get_DicGenETEND__() + df = pd.merge(df,df_dic1, how='left', left_on='FREQUENCE', right_on='CODE') + df.rename(columns={'ORIGINE':'SUB_ORIG','FREQUENCE':'CD_FREQ1' ,'CODE':'CD_FREQ2', 'DESCR':'SUB_FREQ'}, inplace=True) + df = pd.merge(df,df_dic2, how='left', left_on='ETENDUE', right_on='CODE') + df.rename(columns={'ETENDUE':'CD_ETEND1' ,'CODE':'CD_ETEND2', 'DESCR':'SUB_ETEND'}, inplace=True) + if not detail: + df.drop(columns=['CD_FREQ1','CD_FREQ2','CD_ETEND1','CD_ETEND2'], inplace=True) + return df + +def get_connex(db_file, detail=False): + df_inf = __get_SiteInfo__(db_file) + df_inf = df_inf[['SITE_COD', 'CONNEX']] + df_dic = __get_DicGenCONNEX__() + df = pd.merge(df_inf, df_dic, how='left', left_on='CONNEX', right_on='CODE') + df.rename(columns={'CODE':'CD_CONNEX', 'DESCR':'CONNEXION'}, inplace=True) + if not detail: + df.drop(columns=['CONNEX','CD_CONNEX','NOMFICH'], inplace=True) + return df + +def get_fctmt_zh(db_file, detail=False): + df_in = get_regHydroEntree(db_file, detail) + if df_in.COORD_X.max() == 0 : df_in.drop(columns=['COORD_X'], inplace=True) + if df_in.COORD_Y.max() == 0 : df_in.drop(columns=['COORD_Y'], inplace=True) + df_out = get_regHydroSortie(db_file, detail) + if df_out.COORD_X.max() == 0 : df_out.drop(columns=['COORD_X'], inplace=True) + if df_out.COORD_Y.max() == 0 : df_out.drop(columns=['COORD_Y'], inplace=True) + df_sub = get_regSubmersion(db_file, detail) + df_con = get_connex(db_file, detail) + df_sub_con = pd.merge(df_sub,df_con, how='outer', on='SITE_COD') + lst_df = { + 'entree_eau': df_in, + 'sortie_eau': df_out, + 'sub_connex': df_sub_con, + } + return lst_df + + + +###################################### +### Delimitation de la zone humide ### +###################################### +def __get_DicGenLIM__(): + d = {'CODE':['1','2','3','4','5','6','7'], 'DESCR': [ + "hydrologie (balancement des eaux, crues, zones d'inondation, fluctuation de la nappe)", + 'présence ou absence de sols hydromorphes', + "présence ou absence d'une végétation hygrophile", + 'périodicité des inondations ou saturation du sol en eau', + 'occupation des terres (limite entre les espaces naturels et les milieux anthropisés)', + 'répartition et agencement spatial des habitats (types de milieux)', + 'fonctionnement écologique (espace nécessaire à la biologie des espèces : connexions biologiques, relations entre écosystèmes)' + ]} + return pd.DataFrame(data=d) + +def __get_DicGenLIM_1__(): + d = {'CODE':['1','2','3','4','5','6','7','8','9','10','11','12'], 'DESCR':[ + 'limites du bassin ou sous-bassin versant', + 'limites des zones inondables', + "bassin d'alimentation souterrain", + "zone de recharge d'une nappe", + 'occupation du sol', + 'formations végétales, étages de végétation', + 'limites paysagères', + 'répartition et agencement spatial des habitats (types de milieux)', + "zone nécessaire à la vie d'une espèce", + 'espace de transition entre des zones humides', + 'zone humide altérée en partie ou totalement, restaurable', + 'non déterminé', + ]} + return pd.DataFrame(data=d) + +def _get_espFct(db_file, detail=False): + ''' + :db_file: str. Nom de la base de données access + :detail: bool. + ''' + df = mdb.read_table(db_file, 'EFLIM') + if not isinstance(df.LIM1_COD,str): + df.LIM1_COD = df.LIM1_COD.astype(str) + df_dic = __get_DicGenLIM_1__() + df = pd.merge(df,df_dic, how='left', left_on='LIM1_COD', right_on='CODE') + df.rename(columns={'SIT_COD':'SITE_COD', 'DESCR':'DEF_ESPACE_FTC'}, inplace=True) + if not detail: + df.drop(columns=['LIM1_COD','CODE'], inplace=True) + return df + +def _get_delim(db_file, detail=False): + ''' + :db_file: str. Nom de la base de données access + :detail: bool. + ''' + df = mdb.read_table(db_file, 'SITELIM') + if not isinstance(df.LIM_COD,str): + df.LIM_COD = df.LIM_COD.astype(str) + df_dic = __get_DicGenLIM__() + df = pd.merge(df,df_dic, how='left', left_on='LIM_COD', right_on='CODE') + df.rename(columns={'SIT_COD':'SITE_COD', 'DESCR':'CRIT_DELIM'}, inplace=True) + if not detail: + df.drop(columns=['LIM_COD','CODE'], inplace=True) + return df + +def get_Delim_espaceFct(db_file, detail=False): + df_espFct = _get_espFct(db_file, detail) + df_delim = _get_delim(db_file, detail) + df = pd.merge(df_espFct,df_delim, how='left', on='SITE_COD') + return df + + + +##################################### +### Description de la zone humide ### +##################################### +def __get_DicGenIMP__(): + d = {'CODE':[ + '0','10','11.0','12.0','13.0','14.0','15.0','16.0','17.0','20','21.0','22.0','23.0','24.0', + '30','31.0','32.0','33.0','34.0','35.0','36.0','37.0','38.0','40','41.0','42.0','43.0','44.0', + '45.0','46.0','47.4','48.0','50','51.0','52.0','53.0','54.0','55.0','61.0','62.0','63.0','64.0', + '70','71.0','72.0','73.0','74.0','75','75.0','76.0','77.0','78.0','79.0','80.0','81.0','82.0', + '83.0','84.0','85.0 ','86.0','90','91.0','91.1','91.2 ','91.3','91.4','91.5', + ], 'DESCR':[ + 'AUCUN', + "IMPLANTATION, MODIFICATION OU FONCTIONNEMENT D'INFRASTRUCTURES ET AMENAGEMENTS LOURDS", + 'habitats humain, zone urbanisée','zone industrielle ou commerciale', + 'infrastructure linéaire, réseaux de communication', + 'extraction de matériaux','dépôt de matériaux, décharge', + 'équipement sportif et de loisirs','Infrastructure et équipement agricoles', + 'POLLUTIONS ET NUISANCES', + 'rejets substances polluantes dans les eaux','rejets substances polluantes dans les sols', + "rejets substances polluantes dans l'atmosphère",'nuisances liées à la surfréquentation, au piétinement', + 'PRATIQUES LIÉES À LA GESTION DES EAUX', + 'comblement, assèchement, drainage, poldérisation des zones humides', + "mise en eau, submersion, création de plan d'eau",'modification des fonds, des courants', + 'création ou modification des berges et des digues, îles et îlots artificiels, remblais et déblais, fossés', + "entretien rivières, canaux, fossés, plan d'eau",'modification du fonctionnement hydraulique', + 'action sur la végétation immergée, flottante ou amphibie, y compris faucardage et démottage', + 'pêche professionnelle', + 'PRATIQUES AGRICOLES ET PASTORALES', + 'mise en culture, travaux du sol', + 'débroussaillage, suppression haies et bosquets, remembrement et travaux connexes', + 'jachère, abandon provisoire','traitement de fertilisation et pesticides','pâturage', + 'suppression ou entretien de la végétation fauchage et fenaison', + 'abandon de systèmes culturaux et pastoraux, apparition de friches', + 'plantation de haies et de bosquets', + 'PRATIQUES ET TRAVAUX FORESTIERS', + 'coupes, abattages, arrachages et déboisements','taille, élagage', + 'plantation, semis et travaux connexes', + 'entretien liés à la sylviculture, nettoyage, épandage', + 'autre aménagement forestier, accueil du public, création de pistesPRATIQUES LIEES AUX LOISIRS', + 'Sport et loisir de plein air','Chasse','Pêche','Cueillette et ramassage', + "PRATIQUES DE GESTION OU D'EXPLOITATION DES ESPÈCES ET HABITATS", + 'prélèvement sur la faune ou la flore', + 'introduction, gestion ou limitation des populations', + "gestion des habitats pour l'accueil et l'information du public", + "autre (préciser dans l'encart réservé aux remarques)", + 'PRATIQUES AQUACOLES', + 'aménagements liés à la pisciculture ou à la conchyliculture', + 'fertilisation, amendements','alimentation artificielle','rejets de déchets', + 'vidanges', + 'PROCESSUS NATURELS ABIOTIQUES', + 'érosion','atterrissement, envasement, assèchement','submersion', + 'mouvement de terrain','incendie','catastrophe naturelle', + 'PROCESSUS BIOLOGIQUES ET ÉCOLOGIQUES', + 'évolution écologique, appauvrissement, enrichissement', + 'atterrissement','eutrophisation','acidification',"envahissement d'une espèce", + 'fermeture du milieu', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenPOS__(): + d = {'CODE':['0','1','2','3'], 'DESCR':[ + 'inconnu','au niveau de la zone humide', + "au niveau de l'espace de fonctionnalité", + "au niveau de la zone humide et de l'espace de fonctionnalité", + ]} + return pd.DataFrame(data=d) + +def __get_DicACT__(db_file): + return mdb.read_table(db_file, 'DICACT') + +def __get_DicGenSDA__(): + d = {'CODE':['0','01','02','03','04','05','06','07','08','9','10','11','12','13'], 'DESCR':[ + 'None', + 'grands estuaires', + 'baies et estuaires moyens-plats', + 'marais et lagunes côtiers', + 'marais saumâtres aménagés', + "bordures de cours d'eau", + 'plaines alluviales', + 'zones humides de bas-fond en tête de bassin versant', + "régions d'étangs", + "petits plans d'eau et bordures de plans d'eau", + 'marais et landes humides de plaines et plateaux', + 'zones humides ponctuelles', + 'marais aménagés dans un but agricole', + 'zones humides artificielles', + ]} + return pd.DataFrame(data=d) + +def _get_SITEACT(db_file, detail=False): + df = mdb.read_table(db_file, 'SITEACT') + df = _del_na_col(df) + df_dic1 = __get_DicGenIMP__() + df_dic2 = __get_DicGenPOS__() + df_dic3 = __get_DicACT__(db_file) + if 'IMPACT_COD' in df.columns: + if not isinstance(df.IMPACT_COD,str): + df.IMPACT_COD = df.IMPACT_COD.astype(str) + df = pd.merge(df,df_dic1, how='left', left_on='IMPACT_COD', right_on='CODE') + df.rename(columns={'CODE':'CD_IMP', 'DESCR':'IMPACT'}, inplace=True) + if not detail: + df.drop(columns=['IMPACT_COD','CD_IMP'],inplace=True) + df = pd.merge(df,df_dic2, how='left', left_on='POSITION', right_on='CODE') + df.rename(columns={'CODE':'CD_LOC', 'DESCR':'LOCALISATION'}, inplace=True) + df = pd.merge(df,df_dic3, how='left', left_on='ACTIV_COD', right_on='CODE') + df.rename(columns={'CODE':'CD_ACTIV', 'DESCR':'ACTIVITE_HUM'}, inplace=True) + if not detail: + df.drop( + columns=['COORD_X','COORD_Y','POSITION','CD_LOC','ACTIV_COD','CD_ACTIV'], + inplace=True) + return df + +def _get_SITEIMP(db_file, detail=False): + df = mdb.read_table(db_file, 'SITEIMP') + df = _del_na_col(df) + df_dic1 = __get_DicGenIMP__() + df_dic2 = __get_DicGenPOS__() + df_dic3 = __get_DicACT__(db_file) + if 'IMPACT_COD' in df.columns: + if not isinstance(df.IMPACT_COD,str): + df.IMPACT_COD = df.IMPACT_COD.astype(str) + df = pd.merge(df,df_dic1, how='left', left_on='IMPACT_COD', right_on='CODE') + df.rename(columns={'CODE':'CD_IMP', 'DESCR':'IMPACT'}, inplace=True) + if not detail: + df.drop(columns=['IMPACT_COD','CD_IMP'],inplace=True) + # df = pd.merge(df,df_dic2, how='left', left_on='POSITION', right_on='CODE') + # df.rename(columns={'CODE':'CD_LOC', 'DESCR':'LOCALISATION'}, inplace=True) + df = pd.merge(df,df_dic3, how='left', left_on='ACTIV_COD', right_on='CODE') + df.rename(columns={'CODE':'CD_ACTIV', 'DESCR':'ACTIVITE_HUM'}, inplace=True) + if not detail: + df.drop( + columns=[ + # 'POSITION','CD_LOC', + 'ACTIV_COD','CD_ACTIV'], + inplace=True) + return df + +def get_usage_process(db_file, detail=False): + df_sitImp = _get_SITEIMP(db_file, detail) + df_sitAct = _get_SITEACT(db_file, detail) + if 'IMPACT' not in df_sitAct.columns: + df_sitAct.merge(df_sitImp, how='left', on=['SITE_COD', 'ACTIVITE_HUM']) + # Complexe.......... + # df = (pd.concat([df_sitAct,df_sitImp]) + # .sort_values(by=['SITE_COD','ACTIVITE_HUM','IMPACT'], na_position='last') + # .drop_duplicates(subset=['SITE_COD','ACTIVITE_HUM','IMPACT'], keep='first') + # .reset_index(drop=True)) + # dup = df[df.duplicated(subset=['SITE_COD','ACTIVITE_HUM'])].index + # rmq = df[df.index.isin(dup)].REMARKS.dropna().index + # df.drop(dup).reset_index(drop=True) + return df_sitAct + +def get_sdage(db_file, detail=False): + df_inf = __get_SiteInfo__(db_file) + df_inf = df_inf[['SITE_COD','SDAGE_COD']] + df_dic = __get_DicGenSDA__() + df = pd.merge(df_inf,df_dic, how='left', left_on='SDAGE_COD', right_on='CODE') + df.rename(columns={'CODE':'CD_SDAGE', 'DESCR':'TYPO_SDAGE'}, inplace=True) + if not detail: + df.drop(columns=['CD_SDAGE','SDAGE_COD'],inplace=True) + return df + +def get_descrp_zh(db_file, detail=False): + df_usgP = get_usage_process(db_file, detail) + df_sdag = get_sdage(db_file, detail) + df_usgP = df_usgP.merge(df_sdag, how='outer', on=['SITE_COD']) + return df_usgP + + + +####################################### +### Fonctions eclologiques, valeurs ### +### socio-économiques, interêt ### +### patrimonial de la zone humide ### +####################################### +def __get_DicGenTYPE__(): + d = {'CODE':['1','2','3','4'], 'DESCR':[ + 'Fonctions hydrologiques', + 'Fonctions biologiques', + 'Valeurs socio-économiques', + 'Intérêt patrimonial', + ]} + return pd.DataFrame(data=d) + +def __get_DicGenFVI__(): + d = { + 'CODE':['01','02','03','04','05','06','07','08','09','10', + '20','21','22','23','24','25','26','27','30','31', + '32','33','34','35','36','40','41','42','43','44', + '50','51','60','61','62','63','64','70'], + 'TYPE': ['3','3','3','3','3','3','3','3','3','4','4','4', + '4','4','4','4','4','4','4','4','4','4','4','4','4', + '1','1','1','1','1','1','1','2','2','2','2','2','2'], + 'DESCR':[ + "réservoir pour l'alimentation en eau potable", + 'production biologique (pâturage; fauche; sylviculture; aquaculture; pêche; chasse)', + 'production de matière première (irrigation; granulats; énergie; tourbe; roseaux; etc.)', + 'intérêt pour la valorisation pédagogique/éducation', + 'intérêt paysager','intérêt pour les loisirs/valeurs récréatives', + 'valeur scientifique','valeur culturelle', + 'nuisances sur les conditions de vie des populations humaines résidentes (transmission parasitaire; moustiques; etc.)', + 'HABITATS','FAUNISTIQUES','invertébrés (sauf insectes)','insectes','poissons', + 'amphibiens','reptiles','oiseaux','mammifères','FLORISTIQUES','algues', + 'champignons','lichens','bryophytes','ptéridophytes','phanérogames', + 'FONCTIONS DE REGULATION HYDRAULIQUE', + 'expansion naturelle des crues (contrôle des crues; écrêtement des crues; stockage des eaux de crues; prévention des inondations)', + 'ralentissement du ruissellement', + "soutien naturel d'étiage (alimentation des nappes phréatiques; émergence des nappes phréatiques; recharge et protection des nappes phréatiques)", + "fonctions d'épuration (rétention de sédiments et de produits toxiques; recyclage et stockage de matière en suspension; régulation des cycles trophiques par exportation de matière organique; influence sur les cycles du carbone et de l'azote)", + 'FONCTIONS DE PROTECTION DU MILIEU PHYSIQUE',"rôle naturel de protection contre l'érosion", + "FONCTION D'HABITAT POUR LES POPULATIONS ANIMALES OU VEGETALES", + "connexions biologiques (continuité avec d'autres milieux naturels), zone d'échanges, zone de passages, corridor écologique (faune, flore)", + 'étapes migratoires, zones de stationnement, dortoirs', + "zone particulière d'alimentation pour la faune",'zone particulière liée à la reproduction', + "AUTRE INTERET FONCTIONNEL D'ORDRE ECOLOGIQUE (préciser dans l'encart réservé aux remarques)", + ]} + return pd.DataFrame(data=d) + +def _get_siteFVI(db_file, tab_obs): + return mdb.read_table(db_file,tab_obs) + +def get_fct_zh(db_file, type_fct='all', detail=False): + ''' + :db_file: str. Nom de la base de données access + :type_fct: str ou list. Code des observations à extraire de + la bdd access MEDWET : 'amphi', 'bird', 'flore', 'fish', + 'invert', 'mamm', 'rept'. 'all' si tous. + :detail: bool. + ''' + df_vib = pd.DataFrame() + df_vih = pd.DataFrame() + df_vis = pd.DataFrame() + df_vip = pd.DataFrame() + if type_fct == 'all' or 'all' in type_fct: + type_fct = ['hydro', 'bio', 'socio-eco', 'patri'] + if 'bio' == type_fct or 'bio' in type_fct: + tab_obs = 'SITEFVIB' + df_vib = _get_siteFVI(db_file, tab_obs) + if 'hydro' == type_fct or 'hydro' in type_fct: + tab_obs = 'SITEFVIH' + df_vih = _get_siteFVI(db_file, tab_obs) + if 'socio-eco' == type_fct or 'socio-eco' in type_fct: + tab_obs = 'SITEFVIS' + df_vis = _get_siteFVI(db_file, tab_obs) + if 'patri' == type_fct or 'patri' in type_fct: + tab_obs = 'SITEFVIP' + df_vip = _get_siteFVI(db_file, tab_obs) + df = pd.concat([df_vib,df_vih,df_vis,df_vip]) + df_dic1 = __get_DicGenFVI__() + df_dic2 = __get_DicGenTYPE__() + df = pd.merge(df,df_dic1, how='left', left_on='FVI_COD', right_on='CODE') + df.rename(columns={'CODE': 'CD_FVI','DESCR':'FONCTION'}, inplace=True) + df = pd.merge(df,df_dic2, how='left', left_on='TYPE', right_on='CODE') + df.rename(columns={'CODE': 'CD_TYPE','DESCR':'TYPE_FCT'}, inplace=True) + if not detail: + df.drop(columns=['FVI_COD','CD_FVI','CD_TYPE','TYPE'],inplace=True) + + cols = df.columns + lst_memo = cols[cols.str.startswith('MEMO')] + df = _aggr_cols(df, lst_memo) + df.rename(columns={'aggreg': 'MEMO'}, inplace=True) + df.drop(columns=lst_memo, inplace=True) + return df + + + +######################################## +### CORINE BIOTOPE de la zone humide ### +######################################## +def get_cb(db_file, detail=False): + df_cb = mdb.read_table(db_file, 'CATCHCBiot') + df_dic = mdb.read_table(db_file, 'DicGen-CBio') + df = pd.merge(df_cb,df_dic, how='left', left_on='CB_COD', right_on='CODE') + df.rename(columns={'DESCR':'DESC_CBIOTOPE'}, inplace=True) + if not detail: + df.drop(columns=['CODE'],inplace=True) + return df + + +############################################# +### Evaluation générale de la zone humide ### +############################################# +# get_eval_glob() +def get_eval_glob(db_file): + df_inf = __get_SiteInfo__(db_file) + df = df_inf[['SITE_COD','SITE_RAP1','SITE_RAP2','SITE_RAP3','SITE_RAP4']] + df.columns = ['SITE_COD', 'FCT_VALEURS_MAJEURS', 'INT_PATR_MAJEUR','BILAN_MENACES_FACTEURS', 'ORIENT_ACTIONS'] + return df + + + +##################################### +### Référence de la zone humide ### +##################################### +def __get_MWDREF__(db_file): + df = mdb.read_table(db_file, 'MWDREF') + df = _del_na_col(df) + return df + +def get_biblio(db_file, detail=False): + df_ref = mdb.read_table(db_file, 'SITEREF') + if not isinstance(df_ref.REF_NO,str): + df_ref.REF_NO = df_ref.REF_NO.astype(str) + df_dic = __get_MWDREF__(db_file) + df = pd.merge(df_ref,df_dic, how='left', on='REF_NO') + if not detail: + df.drop(columns=['REF_NO'],inplace=True) + return df + + +##################################### +### Information de la zone humide ### +##################################### +def __get_CATCHINF__(db_file): + df = mdb.read_table(db_file, 'CATCHINF') + df = df[~df.CATCH_NAME.isna()] + df = df[['CATCH_COD', 'CATCH_NAME']] + return df + +def get_SiteInfo(db_file, detail=False): + """Récupération des informations génériques + des zones humides. + + Paramètres : + ------------ + db_file : string. Chemin d'accès ver la bdd access MEDWET. + detail : bool. Affichage des relations entre les tables. + """ + + # lst_col = [ + # 'SITE_COD','DEPT', 'ORG','NUM','SITE_NAME','OTHER_NAME','COMPILER', + # 'EDITDATE','UPDDATE','AREA_WET','COORD_X','COORD_Y','SITE_RAP1','SITE_RAP2', + # 'SITE_RAP2','SITE_RAP3','SITE_RAP4','OTHER3_COD','OTHER4_COD','OTHER_INV', + # 'HISTORIQ','ZH_REMARKS','MEMO_ACTIV'] ###### NON FINI !! voir df_inf.columns + df_inf = __get_SiteInfo__(db_file) + # if 'CATCH_COD' in df_inf.columns: + # lst_col += ['CATCH_COD'] + # df_inf = df_inf[lst_col] + df_inf.drop(columns=['CONNEX','FREQUENCE','ORIGINE','ETENDUE','SDAGE_COD', + 'SITE_RAP1','SITE_RAP2','SITE_RAP3','SITE_RAP4'], inplace=True) + df_inf.rename(columns={ + 'OTHER1_COD':'CD_ZNIEFF_1G', + 'OTHER2_COD':'CD_ZNIEFF_2G', + 'OTHER3_COD':'CD_ZICO', + 'OTHER4_COD':'CD_N2000', + }, inplace=True) + df_dic1 = __get_auteur__(db_file) + df_inf = df_inf.merge(df_dic1,how='left', left_on='COMPILER', right_on='CODE') + df_inf.rename(columns={'DESCR': 'AUTEUR'}, inplace=True) + if not detail: + df_inf.drop(columns=['COMPILER','CODE'], inplace=True) + + if 'CATCH_COD' in df_inf.columns: + df_dic2 = __get_CATCHINF__(db_file) + df_inf = df_inf.merge(df_dic2,how='left', on='CATCH_COD') + if not detail: + df_inf.drop(columns=['CATCH_COD'],inplace=True) + return df_inf + + + +if __name__ == "__main__": + + # writer = pd.ExcelWriter('~/Documents/9_PROJETS/1_ZH/inventaire_zh_2021.xlsx', engine='xlsxwriter') + # workbook=writer.book + NAME_OUT = '/home/colas/Documents/13_CEN74/medwet2gn_ZH/inventaire_zh74_test.xlsx' + lst_fct = { + 'Infos générales': get_SiteInfo, + 'Corine Biotope' : get_cb, + 'Délimitation de la zh': get_Delim_espaceFct, + 'Description de la zh' : get_descrp_zh, + 'Fonctmt de la zh': get_fctmt_zh, + 'Fonctions de la zh' : get_fct_zh, + 'Evaluation de la zh': get_eval_glob, + 'Observations' : get_obs, + 'Bibliographie': get_biblio + } + print('INIT récupération des données ...') + df = {} + for f,fonction in enumerate(lst_fct): + name = fonction + fct = lst_fct[fonction] + df1 = fct(db_file74) + # df2 = fct(db_file2) + # if f == 0: + # lst_stcd = df1[df1.SITE_COD.isin(df2.SITE_COD)].SITE_COD + # if fct == get_SiteInfo: + # df2.loc[df2.SITE_COD.isin(lst_stcd), 'OTHER_NAME'] = df1.loc[df1.SITE_COD.isin(lst_stcd),'SITE_NAME'].tolist() + if isinstance(df1, pd.DataFrame) :#and isinstance(df2, pd.DataFrame): + # df[f] = pd.concat([df1[~df1.SITE_COD.isin(lst_stcd)], df2]) + df[f] = df1 + df[f].name = name + elif isinstance(df1, dict) :#and isinstance(df2, dict): + df[f] = {} + df[f]['title'] = name + for d in df1: + # df[f][d] = pd.concat([df1[d][~df1[d].SITE_COD.isin(lst_stcd)], df2[d]]) + df[f][d] = df1[d] + + print('INIT écriture du fichier ...') + # Ecriture des données + with pd.ExcelWriter(NAME_OUT) as writer: + for d in df: + DF = df[d] + if isinstance(DF, pd.DataFrame): + DF.to_excel(writer,sheet_name=DF.name,startrow=1 , startcol=0, index=False, header=DF.columns) + ws = writer.book.active + writer.sheets[DF.name].cell(1,1,value=DF.name) + writer.save() + elif isinstance(DF, dict): + for i,d in enumerate(DF): + if d == 'title': + continue + if i == 1: + row = 1 + col = 0 + else: + col = DF[d].shape[1] + col + 3 + DF[d].to_excel(writer,sheet_name=DF['title'],startrow=row , startcol=col, index=False) + ws = writer.book.active + writer.sheets[DF['title']].cell(column=col+1,row=row,value=d) + writer.save() + + import sys + sys.exit('END SCRIPT ...') + + + + # ws.cell(1,1,value=df_cb.name) + # writer.save() + # writer.write_cells(df_cb.name,sheet_name='infos_site',startrow=0,startcol=0) + # worksheet.write_string(df_inf.shape[0] + 4, 0, df2.name) + # df2.to_excel(writer,sheet_name='infos_site',startrow=df_inf.shape[0] + 5, startcol=0) + + # df_inf = get_SiteInfo(db_file) + # df_inf.name = 'Infos générales' + # df_cb = get_cb(db_file) + # df_cb.name = 'Corine Biotope' + # df_evl = get_eval_glob(db_file) + # df_evl.name = 'Evaluation de la zh' + # df_dlm = get_Delim_espaceFct(db_file) + # df_dlm.name = 'Délimitation de la zh' + # df_dsc = get_descrp_zh(db_file) + # df_dsc.name = 'Description de la zh' + # df_ftm = get_fctmt_zh(db_file) + # df_ftm['title'] = 'Fonctmt de la zh' + # df_fct = get_fct_zh(db_file) + # df_fct.name = 'Fonctions de la zh' + # df_obs = get_obs(db_file) + # df_obs.name = 'Observations' + + + lst_df = [df_inf,df_cb,df_evl,df_dlm,df_dsc,df_ftm,df_fct,df_obs] + with pd.ExcelWriter(NAME_OUT) as writer: + for df in lst_df: + if isinstance(df, pd.DataFrame): + df.to_excel(writer,sheet_name=df.name,startrow=1 , startcol=0, index=False) + ws = writer.book.active + writer.sheets[df.name].cell(1,1,value=df.name) + writer.save() + elif isinstance(df, dict): + for i,d in enumerate(df): + if d == 'title': + continue + if i == 0: + row = 1 + col = 0 + else: + col = df[d].shape[1] + col + 3 + df[d].to_excel(writer,sheet_name=df['title'],startrow=row , startcol=col, index=False) + ws = writer.book.active + writer.sheets[df['title']].cell(column=col+1,row=row,value=d) + writer.save() + + import numpy as np + df1 = get_SiteInfo(db_file1) + df2 = get_SiteInfo(db_file2) + # df1 = get_fctmt_zh(db_file1) + # df2 = get_fctmt_zh(db_file2) + lst_stcd = df1[df1.SITE_COD.isin(df2.SITE_COD)].SITE_COD + # Get same columns + cols1 = df1.columns + cols2 = df2.columns + same_cols = cols1[cols1.isin(cols2)] + # tmp1 = df1.loc[df1.SITE_COD.isin(lst_stcd),same_cols].sort_values('SITE_COD').reset_index(drop=True) + # tmp2 = df2.loc[df2.SITE_COD.isin(lst_stcd),same_cols].sort_values('SITE_COD').reset_index(drop=True) + tmp1 = df1.loc[df1.SITE_COD.isin(lst_stcd),same_cols].sort_values('SITE_COD').set_index('SITE_COD',drop=True) + tmp2 = df2.loc[df2.SITE_COD.isin(lst_stcd),same_cols].sort_values('SITE_COD').set_index('SITE_COD',drop=True) + ne_stacked = (tmp1 != tmp2).stack() + changed = ne_stacked[ne_stacked] + changed.index.names = ['id', 'col'] + difference_locations = np.where(tmp1 != tmp2) + changed_from = tmp1.values[difference_locations] + changed_to = tmp2.values[difference_locations] + pd.DataFrame({'from': changed_from, 'to': changed_to}, index=changed.index) + + print(df1[df1.SITE_COD.isin(lst_stcd)].sort_values('SITE_COD').iloc[:,:15]) + print(df2[df2.SITE_COD.isin(lst_stcd)].sort_values('SITE_COD').iloc[:,:15]) + + + + + get_regHydroEntree(db_file) + get_regHydroSortie(db_file) + get_regSubmersion(db_file) + + df = mdb.read_table(db_file, 'SITEINFO') + df = _del_na_col(df) + + # Trouver un code parmis les tables dictionnaires + # paramètres + code = '26CCRV0012' + db_file = db_file1 + # run + not_in = [ + 'SIG','DicGen','List','Switchboard','Items', # MEDWET2 + '#Save_DicGen-FVI','~TMPCLP308581','#Save_SITEFVIS', # MEDWET1 + '#Save_SITEFVIH','#Save_SITEFVIB','#Save_SITEFVIP', # MEDWET1 + '$$$$1','$$$$2','#Save_SITEACT','$TEMP_ACTIV$', # MEDWET1 + ] + tmp = mdb.list_tables(db_file) + lst_t = [t for t in tmp if t.startswith('DicGen') and not t.endswith('TEMP')] + lst_t = [t for t in tmp] + for t in lst_t: + if t in not_in: + continue + df = mdb.read_table(db_file, t) + if not df.empty and 'SITE_COD' in df.columns: + boolean_findings = df.SITE_COD.str.contains(code) + total_occurence = boolean_findings.sum() + if(total_occurence > 0): + print(t) + elif not df.empty and not 'SITE_COD' in df.columns: + print('SITE_COD column is not "%s" '%t) + diff --git a/3_AZALEE/RHEZO-TUF/identification_tufiere.py b/3_AZALEE/RHEZO-TUF/identification_tufiere.py new file mode 100644 index 0000000..f29f07c --- /dev/null +++ b/3_AZALEE/RHEZO-TUF/identification_tufiere.py @@ -0,0 +1,31 @@ +from pycen import zh,con_bdcen +import geopandas as gpd + +zh = zh() + +dzh = zh.v_zoneshumides() +col_date = dzh.columns[dzh.columns.str.contains('date')] +dzh[col_date] = dzh[col_date].astype(str) +site = gpd.read_postgis('SELECT * FROM sites.c_sites_zonages WHERE date_fin IS NULL',con_bdcen) + +col_rmq = dzh.columns[dzh.columns.str.contains('rmq')] + +c1 = dzh[dzh.code_cb.str.contains('54.1',na=False)] +c2 = c1[c1.intersects(site.unary_union)] +c3 = dzh[dzh.code_cb.str.contains('54.1') & + ( + dzh.rmq_interet_patri.str.contains('tufi',case=False) + # dzh.rmq_site.str.contains('tufi',case=False) | + # dzh.rmq_fct_majeur.str.contains('tufi',case=False) | + # dzh.rmq_bilan_menace.str.contains('tufi',case=False) | + # dzh.rmq_orient_act.str.contains('tufi',case=False) | + # dzh.rmq_usage_process.str.contains('tufi',case=False) | + # dzh.rmq_activ_hum.str.contains('tufi',case=False) + ) + ] + + +to_file = '/home/colas/Documents/tmp/diagnos_tuffière.gpkg' +c1.to_file(to_file,layer='541 only',driver='GPKG') +c2.to_file(to_file,layer='541 ENS',driver='GPKG') +c3.to_file(to_file,layer='541 TUF',driver='GPKG') diff --git a/3_AZALEE/azaleeTOgeonature.py b/3_AZALEE/azaleeTOgeonature.py new file mode 100644 index 0000000..ffd956f --- /dev/null +++ b/3_AZALEE/azaleeTOgeonature.py @@ -0,0 +1,1157 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from pycen import con_gn, zh, pers +import pandas as pd +import geopandas as gpd +import uuid +import re +zh = zh() + +DICT_TZH = { + 'site_code':'code', + 'nom':'main_name', + 'autre_nom':'secondary_name', + 'auteur_site':'create_author', + ##'auteur_geom', + 'auteur_last_maj':'update_author', + 'date_site':'create_date', + ##'date_geom', + 'date_last_maj':'update_date', + # 'CEN Isère': 'id_org', # organisme opérateur + ##'type_milieu', + ##'type_site', + # 'typo_sdage':'id_sdage', + # 'id_sage' #A récup + # 'rmq_site':'remark_pres', + # 'rmq_fct_majeur':'', + # 'rmq_interet_patri':'', + # 'rmq_bilan_menace':'', + # 'rmq_orient_act':'', + # 'rmq_usage_process':'', + # 'code_cb':'', + # 'lib_cb':'', + # 'activ_hum':'', + # 'impact':'', + # 'position':'', + # 'rmq_activ_hum':'', + # 'connexion':'', + # 'subm_orig':'', + # 'subm_freq':'', + # 'subm_etend':'', + # 'fct_bio':'', + # 'fct_hydro':'', + # 'int_patri':'', + # 'val_socioEco':'', + # 'crit_delim':'', + # 'crit_def_esp':'', + # 'entree_eau_reg':'', + # 'entree_eau_perm':'', + # 'entree_eau_topo':'', + # 'sortie_eau_reg':'', + # 'sortie_eau_perm':'', + # 'sortie_eau_topo':'', + # 'geom':'geom' +} + + +def get_id_organisme(nom): + if "'" in nom: + nom = nom.replace("'","’") + sql = "SELECT id_organisme FROM utilisateurs.bib_organismes WHERE nom_organisme = '%s'"%nom + return pd.read_sql_query(sql,con_gn)['id_organisme'].values + + +def remove_special_char(obj,space=False): + dict_char = { + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + # r'[ ]':"", + r'[–]':"-" + } + if space: + dict_char = {**dict_char, **{r'[ ]':""}} + return obj.replace(dict_char,regex=True) + + +def recup_cols_table(table,con,schema='pr_zh'): + lst_cols = con.dialect.get_columns(con,table,schema) + return [x['name'] for x in lst_cols] + + +def t_nomenclature_ZH(bib_mnemo=None,source='ZONES_HUMIDES'): + sch = 'ref_nomenclatures' + tab = ['t_nomenclatures','bib_nomenclatures_types'] + sql = """ + SELECT + a.id_nomenclature, + a.cd_nomenclature, + a.mnemonique mnemo, + a.label_default as label, + a.definition_default def, + b.mnemonique bib_mnemo, + b.label_default bib_label, + b.definition_default bib_def, + a.active + FROM {sch}.{tab0} a + JOIN {sch}.{tab1} b USING (id_type) + """.format(sch=sch,tab0=tab[0],tab1=tab[1]) + if source is not None or bib_mnemo is not None: + sql += " WHERE " + if source is not None: + sql += """b."source" = '%s'"""%source + sql += " AND " if bib_mnemo is not None else '' + if bib_mnemo is not None: + sql += "b.mnemonique = '%s'"%bib_mnemo + return pd.read_sql_query(sql,con_gn).replace({r'[’]':"'"},regex=True) + + +def recup_sdage(df): + # Récupération de l'id SDAGE dans la typo + df['id_sdage'] = [str(int(re.search(r'\d+', x).group())) for x in df.typo_sdage.sort_index()] + sdage = t_nomenclature_ZH(bib_mnemo='SDAGE') + dict_sdage = dict(zip(sdage.cd_nomenclature,sdage.id_nomenclature)) + df['id_sdage'].replace(dict_sdage,inplace=True) + return df + + +def recup_sage(df): + # Identification de la précision SAGE + df['id_sage'] = [df.loc[df.typo_sdage==x,'mnemo_sdage'].values[0] if re.search(r'\.\d+', x) else None for x in df.typo_sdage.sort_index()] + sage = t_nomenclature_ZH(bib_mnemo='SAGE') + dict_sage = dict(zip(sage.mnemo,sage.id_nomenclature)) + df['id_sage'].replace(dict_sage,inplace=True) + return df + + +def cor_lim_list(crit_delim): + """Remplis la table pr_zh.cor_lim_list et retourn les uuid associés + """ + + delim = t_nomenclature_ZH(bib_mnemo='CRIT_DELIM') + delim.mnemo = delim.mnemo.str.replace(r'.\(.*\)','',regex=True) + dict_delim = dict(zip(delim.mnemo.str.lower(),delim.id_nomenclature)) + + serie = crit_delim\ + .fillna('non déterminé')\ + .str.split(';',expand=True).stack().droplevel(-1).reset_index() + serie.columns = ['id','delim'] + serie.set_index('id',inplace=True) + serie['id_lim'] = serie.delim.str.replace(r'.\(.*\)','',regex=True) + + serie.id_lim.replace(dict_delim,inplace=True) + genuuid = serie.index.to_frame().drop_duplicates() + del genuuid['id'] + genuuid['id_lim_list'] = [uuid.uuid4() for x in genuuid.index] + + _cor_lim_list = pd.merge(serie,genuuid,how='inner',right_index=True,left_index=True) + uuidreturn = pd.merge(crit_delim,genuuid,how='left',right_index=True,left_index=True) + + # Remplissage de la table pr_zh.cor_lim_list + _cor_lim_list[['id_lim_list','id_lim']].to_sql( + name='cor_lim_list',con=con_gn,schema='pr_zh',if_exists='append',index=False, + # dtype={ + # 'id_lim_list':uuid.SafeUUID + # } + ) + return uuidreturn.id_lim_list.sort_index() + + +def recup_delim_rmq(crit_delim): + serie = crit_delim.str.split(';',expand=True).stack().droplevel(-1).reset_index() + serie.columns = ['id','delim'] + serie.set_index('id',inplace=True) + serie['remarks'] = [ x[x.find("(")+1:x.rfind(")")] if x.find("(") > -1 else None for x in serie.delim ] + uniserie = serie.groupby('id')['remarks'].apply(list).reset_index() + uniserie.columns = ['id','remarks'] + uniserie.set_index('id',inplace=True) + uniserie.remarks = ['\n'.join(list(set(filter(None,x)))).strip() for x in uniserie.remarks] + uniserie.remarks.replace({'': None},inplace=True) + df_remarks = pd.merge( + crit_delim, + # serie.reset_index().drop_duplicates(subset='id').set_index('id'), + uniserie, + how='left',right_index=True,left_index=True + ) + return df_remarks.remarks + + +def recup_subm(col_subm,typ_subm): + """Correspondance subm--id_nomenclature. + @col_subm : Series. Colonne de submersion + @typ_subm : str. Type de submersion ['frequente','etendue','connexion'] + """ + # Manque la fréquence "partiellement submergé" + if typ_subm == 'frequente': + nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_FREQ') + elif typ_subm == 'etendue': + nom_subm = t_nomenclature_ZH(bib_mnemo='SUBMERSION_ETENDUE') + elif typ_subm == 'connexion': + nom_subm = t_nomenclature_ZH(bib_mnemo='TYPE_CONNEXION') + dict_submfreq = dict(zip(nom_subm.mnemo,nom_subm.id_nomenclature)) + serie = col_subm.replace({'Inconnu':'Non déterminé'}) + return serie.replace(dict_submfreq) + + +def to_bib_organismes_util(): + table = 'bib_organismes' + isin_db = pd.read_sql_table( + table,con_gn,'utilisateurs',['id_organisme'],columns=['nom_organisme'] + ).replace({r'[’]':"'"},regex=True) + insert_from = pers.get_organisme() + to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.nom_organisme.str.lower())] + to_insert\ + .drop(columns='abbrev')\ + .rename(columns={'nom':'nom_organisme'})\ + .to_sql(name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False) + + +def to_bib_organismes_przh(): + table = 'bib_organismes' + + # if first_time: + # sql = 'DELETE FROM pr_zh.%s'%table + # with con_gn.begin() as cnx: + # cnx.execute(sql) + + isin_db = pd.read_sql_table( + table,con_gn,'pr_zh',['id_org'],columns=['name'] + ).replace({r'[’]':"'"},regex=True) + insert_from = pers.get_organisme()\ + .replace({'Inconnu':'Autre'}) + + to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.name.str.lower())] + to_insert.abbrev = remove_special_char(to_insert.abbrev,space=True)\ + .str.upper()\ + .str[:6] + to_insert.loc[to_insert.abbrev.notna()]\ + .rename(columns={'nom':'name','abbrev':'abbrevation'})\ + .to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) + to_insert.loc[to_insert.abbrev.isna()]\ + .rename(columns={'nom':'name'})\ + .drop(columns=['abbrev'])\ + .to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) + + +def get_bib_organismes(schema='utilisateurs'): + table = 'bib_organismes' + return pd.read_sql_table( + table,con_gn,schema,['id_organisme'],columns=['nom_organisme'] + ).replace({r'[’]':"'"},regex=True) + + + +def to_t_roles(): + table = 't_roles' + isin_db = pd.read_sql_table( + table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] + ).replace({r'[’]':"'"},regex=True) + + bib_organismes = get_bib_organismes(schema='utilisateurs') + t_roles = pd.merge(isin_db,bib_organismes,'left',left_on='id_organisme',right_index=True) + insert_from = pers.get_auteur2().replace({'GENTIANA':'Gentiana'}) + ident_notin_db = [ + x for x in insert_from.index.sort_values() + if t_roles[(t_roles.nom_role==insert_from.nom[x]) & (t_roles.prenom_role==insert_from.prenom[x]) & (t_roles.nom_organisme==insert_from.organisme[x])].empty + ] + + dict_orga = dict(zip(bib_organismes.nom_organisme,bib_organismes.index)) + to_insert = insert_from[insert_from.index.isin(ident_notin_db)]\ + .drop(columns=['nom_prenom'])\ + .rename(columns={'nom':'nom_role','prenom':'prenom_role','organisme':'id_organisme',})\ + .replace({**dict_orga,**{'Inconnu':-1}}) + + to_insert.to_sql( + name=table,con=con_gn,schema='utilisateurs',if_exists='append',index=False + ) + + +def get_t_roles(id_role=None): + table = 't_roles' + t_roles = pd.read_sql_table( + table,con_gn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] + ).replace({r'[’]':"'"},regex=True).sort_index() + if id_role: + t_roles = t_roles.iloc[[id_role]] + return pd.merge(t_roles,get_bib_organismes(),'left',left_on='id_organisme',right_index=True) + + +def recup_id_role(author): # A finir ! + adapt_auth = author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().unique() + azalee_auth = pers.get_auteur2().sort_index()#.replace({' ':' '},regex=True) + azalee_auth = azalee_auth[azalee_auth.nom_prenom.isin(adapt_auth)].replace({'Inconnu':'Autre'}) + # azalee_auth.nom_prenom.replace({'Inconnu':'Autre'},regex=True,inplace=True) + t_roles = pd.merge(get_t_roles().reset_index(),azalee_auth, how='inner',left_on=['nom_role','prenom_role','nom_organisme'],right_on=['nom','prenom','organisme']) + dict_role = dict(zip(t_roles.nom_prenom,t_roles.id_role)) + return author.replace({' \(Inconnu\)':'',' ':' '},regex=True).str.strip().replace(dict_role) + + +def get_id_t_zh(code=None): + """@code : str, list, Serie, Index. Code à 12 characters maximum de la zone humide. + """ + sql = "SELECT id_zh,zh_uuid,code FROM pr_zh.t_zh" + if isinstance(code,str): + sql += " WHERE code='%s'"%code + elif isinstance(code,list) or isinstance(code,pd.Series) or isinstance(code,pd.Index): + sql += " WHERE code IN %s"%str(tuple(code)) + return pd.read_sql_query(sql,con_gn) + + +def get_id_org_przh(): + return pd.read_sql_table('bib_organismes',con_gn,'pr_zh') + + + +def _cor_zh_hydro(tzh_code): + """ + @tzh : pd.Serie. Série de valeurs + correspondants à la colonne pr_zh.t_zh."code". + """ + table = 'cor_zh_hydro' + sql = ''' + SELECT h.id_hydro,zh.id_zh + FROM pr_zh.t_hydro_area h, pr_zh.t_zh zh + WHERE zh."code" in {tzh_code} + AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) + AND (h.id_hydro,zh.id_zh) NOT IN (SELECT id_hydro,id_zh FROM pr_zh.cor_zh_hydro) + '''.format(tzh_code=tuple(tzh_code)) + df = pd.read_sql_query(sql,con_gn) + + if not df.empty: + df.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + print('INSERT %i correspondances'%df.shape[0]) + else: + print('AUCUNE nouvelles correspondances identifiées') + + +def _cor_zh_(tzh_code,typ): + """ + @tzh : pd.Serie. Série de valeurs + correspondants à la colonne pr_zh.t_zh."code". + @typ : str. [hydro,rb] + """ + typ = typ.lower() + table = 'cor_zh_%s'%typ + tab_typ = 't_hydro_area' if typ == 'hydro' else 't_river_basin' + id_typ = 'id_hydro' if typ == 'hydro' else 'id_rb' + sql = ''' + SELECT h.{id_typ},zh.id_zh + FROM pr_zh.{tab_typ} h, pr_zh.t_zh zh + WHERE zh."code" in {tzh_code} + AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) + AND (h.{id_typ},zh.id_zh) NOT IN (SELECT {id_typ},id_zh FROM pr_zh.{tab_to}) + ;'''.format( + tzh_code = tuple(tzh_code), + id_typ = id_typ, + tab_typ = tab_typ, + tab_to = table) + df = pd.read_sql_query(sql,con_gn) + + if not df.empty: + df.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + print('INSERT %i correspondances'%df.shape[0]) + else: + print('AUCUNE nouvelles correspondances identifiées') + + +def _calc_recouvrmt(df1,df2,how='inner'): + ''' + Calcule le recouvrement de df2 sur df1 + pour chaque géométrie de df1: + Parameters + ---------- + df1 : GeoDataFrame. + df2 : GeoDataFrame. + ''' + iddf1 = df1.columns[0] + iddf2 = df2.columns[0] + + # Jointure spaciale + tmp = gpd.sjoin( + df1, + df2[['geom']], + predicate = 'intersects', + how = how) + + tmp.dropna(subset=['index_right'],inplace=True) + tmp.index_right = tmp.index_right.astype(int) + tmp.reset_index(inplace=True) + + tmp = tmp.join( + df2[['geom',iddf2]].rename(columns={'geom': 'right_geom'}), + on=['index_right'], how='left') + + tmp2 = tmp[['index_right','right_geom',iddf2]].copy() \ + .rename(columns={'right_geom': 'geom'}) \ + .set_geometry('geom') + + tmp1 = tmp[[iddf1,'geom']].copy() \ + .set_geometry('geom') + + if not tmp1.geom.values.is_valid.all(): + tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) + + if not tmp2.geom.values.is_valid.all(): + tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) + + tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 + + return tmp[[iddf1,iddf2,'perc_rcvmt']] + + +def _cor_zh_areaBis(tzh_code,typ,cover=False): + """ + @tzh : pd.Serie. Série de valeurs + correspondants à la colonne pr_zh.t_zh."code". + @typ : str. COM, DEP, ref_geo + """ + from math import ceil + + table = 'cor_zh_area' + + sqltzh = """ + SELECT zh.id_zh, zh.geom FROM pr_zh.t_zh zh WHERE zh."code" in {tzh_code} + """.format(tzh_code=tuple(tzh_code)) + tzh = gpd.read_postgis(sqltzh,con_gn,crs=4326) + + if tzh.crs.srs=='epsg:4326': + tzh.to_crs(2154,inplace=True) + + sqllarea = """ + SELECT l.id_area, l.geom FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type) + WHERE bib.type_code='{typ}' and l."enable" + """.format(typ=typ) + larea = gpd.read_postgis(sqllarea,con_gn,crs=2154) + + df = _calc_recouvrmt(larea,tzh).rename(columns={'perc_rcvmt':'cover'}) + + if cover: + df['cover'] = [ceil(x) for x in df.cover] + else : + df.drop(columns=['cover'],inplace=True) + + # return df + if not df.empty: + df.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + print('INSERT %i correspondances'%df.shape[0]) + else: + print('AUCUNE nouvelles correspondances identifiées') + + +def _cor_zh_area(tzh_code,typ): + """ + @tzh : pd.Serie. Série de valeurs + correspondants à la colonne pr_zh.t_zh."code". + @typ : str. COM, DEP, ref_geo + """ + from math import ceil + + table = 'cor_zh_area' + if typ == 'COM': + cd1 = """, + ( ST_Area(ST_INTERSECTION( l.geom,ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) )) *100) / ST_Area(l.geom) AS cover + """ + cd2 = ' AND l."enable"' + else: cd1 = cd2 = '' + + sql = ''' + SELECT l.id_area,zh.id_zh {cover1} + FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type), + pr_zh.t_zh zh + WHERE zh."code" in {tzh_code} + AND bib.type_code='{typ}' + AND ST_INTERSECTS( ST_SetSRID(l.geom,2154), ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) ) + AND (l.id_area,zh.id_zh) NOT IN (SELECT id_area,id_zh FROM pr_zh.cor_zh_area) + {cd2} + '''.format(tzh_code=tuple(tzh_code),typ=typ,cover1=cd1,cd2=cd2) + df = pd.read_sql_query(sql,con_gn) + + if cd1 != '': + df['cover'] = [ceil(x) for x in df.cover.sort_index()] + + if not df.empty: + df.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + print('INSERT %i correspondances'%df.shape[0]) + else: + sql = ''' + SELECT l.id_area + FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type) + WHERE bib.type_code='{typ}' + '''.format(typ=typ) + res = pd.read_sql_query(sql,con_gn) + if not res.empty: + print('AUCUNE nouvelles correspondances identifiées') + else : + print('AUCUNE geometrie dans la table `ref_geo.l_areas` pour le `type_code` %s'%typ) + + +def find_nb_hab_bylbcode(df): + to_corzhcb, not_bib = __filter_lb_code__( + df, join_ch=True + ) + return to_corzhcb.groupby('id_zh',dropna=False)\ + .agg({'is_ch':sum})\ + .rename(columns={'is_ch':'nb_hab'})\ + .reset_index() + + +def to_t_zh(DF): + """Need IN : columns['typo_sdage','mnemo_sdage'] + """ + from geoalchemy2 import Geometry + df = DF.copy() + table = 't_zh' + t_role = get_t_roles().sort_index() + org = get_id_org_przh() + dict_org = dict(zip(org.name,org.id_org)) + # First modif = update_author. Important + test_auth = df.create_author.str.contains(';',na=False) + if test_auth.any(): + df.loc[test_auth,'update_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[1] + df.loc[test_auth,'create_author'] = df.loc[test_auth,'create_author'].str.split(';',expand=True)[0] + + df['create_author'] = recup_id_role(author=df['create_author']) + df['update_author'] = recup_id_role(author=df['update_author']) + + df['id_org'] = [t_role.loc[t_role.index==x,'nom_organisme'].values[0] for x in df['create_author']] + df.id_org.replace(dict_org,inplace=True) + df['id_lim_list'] = cor_lim_list(crit_delim=df.crit_delim) + df['remark_lim'] = recup_delim_rmq(crit_delim=df.crit_delim) + recup_sdage(df) + recup_sage(df) + df['remark_pres'] = df.rmq_site.copy() + # df['v_habref'] = None + # df['ef_area'] = None # A ne pas remplir. Nos inventaires ne s'en sont pas préocupé. + # df['global_remark_activity'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. + df['id_thread'] = None + df['id_frequency'] = recup_subm(col_subm=df.subm_freq,typ_subm='frequente') # Manque la fréquence "partiellement submergé" + df['id_spread'] = recup_subm(col_subm=df.subm_etend,typ_subm='etendue') + df['id_connexion'] = recup_subm(col_subm=df.connexion,typ_subm='connexion')\ + .replace({'Non déterminé':None}) + # df['id_diag_hydro'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. + # df['id_diag_bio'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. + # df['id_strat_gestion'] = None # A ne pas remplir. (MEDWET a vérifier) Infos non présente dans nos inventaires. + # df['remark_diag'] = None + # df['is_other_inventory'] = None # A mettre à jour depuis MEDWET + # df['is_carto_hab'] = None # défault : False + # df['nb_hab'] = [len(x) if x else None for x in df.code_cb.str.split(';')] # Concerne les HABs Patrimoniaux + df = df.merge(find_nb_hab_bylbcode(df),on='id_zh',how='left') + # df['total_hab_cover'] = 100 # Concerne les HABs Patrimoniaux ; Ne peut pas être rempli. + df['remark_eval_functions'] = df.rmq_fct_majeur.copy() + df['remark_eval_heritage'] = df.rmq_interet_patri.copy() + df['remark_eval_thread'] = df.rmq_bilan_menace.copy() + df['remark_eval_actions'] = df.rmq_orient_act.copy() + df['area'] = round(df.geom.area,2) + + tzh_cols = recup_cols_table(table,con_gn) + lst_cols = df.columns[df.columns.isin(tzh_cols)] + to_tzh = df[lst_cols].copy() + print('Columns non intégrés : %s'%str([x for x in tzh_cols if x not in lst_cols])) + if to_tzh.crs.srs=='EPSG:2154': + to_tzh.to_crs(4326,inplace=True) + + + # dict_crs = to_tzh.crs.to_json_dict() + # dict_crs['id']['code'] = 0 + # to_tzh.crs.from_json_dict(dict_crs) + + # to_tzh.geom = to_tzh.geom.to_wkt().copy() + to_tzh.to_wkt().to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False, + dtype={ + 'geom': Geometry(srid=4326) + # 'id_lim_list':uuid.SafeUUID + } + ) + print('INSERT t_zh OK !') + + _cor_zh_area(tzh_code=to_tzh.code,typ='DEP') + print('INSERT cor_zh_area DEP OK !') + _cor_zh_areaBis(tzh_code=to_tzh.code,typ='COM',cover=True) + print('INSERT cor_zh_area COM OK !') + _cor_zh_area(tzh_code=to_tzh.code,typ='ZPS') + print('INSERT cor_zh_area ZPS OK !') + _cor_zh_area(tzh_code=to_tzh.code,typ='SIC') + print('INSERT cor_zh_area SIC OK !') + # _cor_zh_area(tzh_code=to_tzh.code,typ='ZSC') + # print('INSERT cor_zh_area ZSC OK !') + # _cor_zh_area(tzh_code=to_tzh.code,typ='PSIC') + # print('INSERT cor_zh_area PSIC OK !') + _cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF1') + print('INSERT cor_zh_area ZNIEFF1 OK !') + _cor_zh_area(tzh_code=to_tzh.code,typ='ZNIEFF2') + print('INSERT cor_zh_area ZNIEFF2 OK !') + _cor_zh_hydro(tzh_code=to_tzh.code) + print('INSERT cor_zh_hydro OK !') + _cor_zh_(tzh_code=to_tzh.code,typ='rb') + print('INSERT cor_zh_rb OK !') + + + +def to_cor_zh_lim_fs(df): + df = df[['code','crit_def_esp']]\ + .fillna('Non déterminé') + cor_zh_lim_fs = pd.merge(df,get_id_t_zh(df.code),on='code')\ + .rename(columns={'crit_def_esp':'id_lim_fs'}) + crit_def_esp_fct = t_nomenclature_ZH('CRIT_DEF_ESP_FCT') + dict_crit = dict(zip(crit_def_esp_fct.mnemo,crit_def_esp_fct.id_nomenclature)) + cor_zh_lim_fs.id_lim_fs.replace(dict_crit, inplace=True) + cor_zh_lim_fs[['id_zh','id_lim_fs']].to_sql( + name='cor_zh_lim_fs',con=con_gn,schema='pr_zh',if_exists='append',index=False + ) + + +def get_azalee_activity(): + sql = """ + SELECT + g.id_site code, + CASE WHEN length(pa.id::varchar)=1 + THEN '0'||pa.id::varchar||' - '||pa.nom + ELSE pa.id::varchar||' - '||pa.nom + END activ_hum, + pp.description "position", + pi.nom impact, + a.activ_hum_autre||'\n'||a.remarques rmq_activ_hum, + a."valid" + FROM zones_humides.r_site_usageprocess a + LEFT JOIN zones_humides.param_activ_hum pa ON pa.id = a.id_activ_hum + LEFT JOIN zones_humides.param_position pp ON pp.id = a.id_position + LEFT JOIN zones_humides.param_impact pi ON pi.id = a.id_impact + JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site) + ON g.id = a.id_geom_site + WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat)) + and a."valid" + and s.date_fin is NULL + """ + return pd.read_sql_query(sql,zh.con) + + +def get_cor_impact_types(): + return pd.read_sql_table('cor_impact_types',con_gn,'pr_zh') + + +def to_t_activity(df=None): + table = 't_activity' + + if df is None: + df = get_azalee_activity()\ + .drop_duplicates() + else: + df = df[['code','activ_hum','impact','position','rmq_activ_hum']] + activ_hum = df.activ_hum.str.split(';',expand=True).stack() + impact = df.impact.str.split(';',expand=True).stack() + position = df.position.str.split(';',expand=True).stack() + rmq_activ_hum = df.rmq_activ_hum.str.split(';',expand=True).stack() + + # df['activ_hum'] = remove_special_char(df['activ_hum'],space=True) + # df['impact'] = remove_special_char(df['impact'],space=True) + df['impact'] = remove_special_char(df['impact'].str.lower(),space=True) + df['impact'].fillna('aucun',inplace=True) + # df['position'] = remove_special_char(df['position'],space=True) + + no_activ_hum = t_nomenclature_ZH('ACTIV_HUM') + dict_activ_hum = dict(zip(no_activ_hum.mnemo,no_activ_hum.id_nomenclature)) + no_impact = t_nomenclature_ZH('IMPACTS').rename(columns={'id_nomenclature':'id_impact'}) + cor_impact_types = pd.merge(get_cor_impact_types(),no_impact[['id_impact','mnemo']],on='id_impact') + dict_impact = dict(zip(remove_special_char(cor_impact_types.mnemo.str.lower(),space=True),cor_impact_types.id_cor_impact_types)) + no_position = t_nomenclature_ZH('LOCALISATION') + dict_position = dict(zip(no_position.mnemo,no_position.id_nomenclature)) + + df['activ_hum'].replace(dict_activ_hum, inplace=True) + df['impact'].replace(dict_impact, inplace=True) + df['position'].replace(dict_position, inplace=True) + df.rename(columns={ + 'activ_hum':'id_activity', + 'impact':'id_cor_impact_types', + 'position':'id_position', + 'rmq_activ_hum':'remark_activity' + },inplace=True) + + # group_df = df.groupby(['code','id_activity','id_position'])['id_cor_impact_types'].apply(list).reset_index() + group_df = df.groupby(['code','id_activity'],dropna=False)\ + .agg({'id_position':list,'id_cor_impact_types':list,'remark_activity':list}).reset_index() + group_df.id_position = [list(set(x)) for x in group_df.id_position ] + group_df.id_position = [ + x[0] if len(x)==1 else no_position.loc[no_position.cd_nomenclature=='3','id_nomenclature'].values[0] + for x in group_df.id_position + ] + group_df.remark_activity = ['\n'.join(list(set(x))) if list(set(x)) != [None] else None for x in group_df.remark_activity] + + group_df['id_impact_list'] = [uuid.uuid4() for x in group_df.index] + cor_impact_list = group_df[['id_impact_list','id_cor_impact_types']]\ + .explode('id_cor_impact_types')\ + .drop_duplicates() + + # activity = pd.merge(group_df[['code','id_activity','id_impact_list','id_position']],df,on=['code','id_activity','id_position'],how='left') + # t_activity = pd.merge(activity,get_id_t_zh(df.code),on='code') + t_activity = pd.merge(group_df,get_id_t_zh(df.code),on='code',how='left') + tactiv_cols = recup_cols_table(table,con_gn) + lst_cols = t_activity.columns[t_activity.columns.isin(tactiv_cols)] + to_tactiv = t_activity[lst_cols] + + to_tactiv.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False, + # dtype={ + # 'id_impact_list':uuid.UUID + # } + ) + + cor_impact_list.to_sql( + name='cor_impact_list', con=con_gn, schema='pr_zh', + if_exists='append', index=False, + # dtype={ + # 'id_impact_list':uuid.UUID + # } + ) + + +def get_azalee_functions(): + sql = """ + SELECT + g.id_site code, + pa.nom id_function, + a."quantite", + a.description justification, + a."valid" + FROM zones_humides.r_site_fctecosociopatri a + LEFT JOIN zones_humides.param_fct_eco_socio_patri pa ON pa.id = a.id_fct + JOIN (sites.r_sites_geom g JOIN sites.sites s ON s.id = g.id_site) + ON g.id = a.id_geom_site + WHERE g.date = (SELECT max(dat) FROM (values (g.date)) AS value(dat)) + and a."valid" + and s.date_fin is NULL + """ + return pd.read_sql_query(sql,zh.con) + + +def to_t_functions(df=None): + table = 't_functions' + + if df is None: + func = get_azalee_functions() + func.id_function.replace({' / ':'/'},regex=True,inplace=True) + else: + func = df[['code','fct_bio','fct_hydro','int_patri','val_socioEco']].set_index('code').unstack() + func = func.str.split(';',expand=True).stack()\ + .str.split(' \(',1,expand=True) + func.columns = ['id_function','justification'] + func.justification = func.justification.str.rsplit('\)',1,expand=True)[0] + + functions = pd.concat([ + t_nomenclature_ZH('FONCTIONS_HYDRO'),t_nomenclature_ZH('FONCTIONS_BIO'), + t_nomenclature_ZH('VAL_SOC_ECO'),t_nomenclature_ZH('INTERET_PATRIM'), + ]) + functions.mnemo.replace({ + r'..- ':'', + r' \(.*\)':'' + },regex=True,inplace=True) + dict_func = dict(zip(functions.mnemo,functions.id_nomenclature)) + func.id_function.replace(dict_func,inplace=True) + not_idfunc = ['non documenté','aucune fonction hydrologique','aucune valeur socio-économique'] + del_index = func[func.id_function.isin(not_idfunc)].index + func.drop(del_index,inplace=True) + funct = func.groupby(['code','id_function']).agg(list).reset_index() + funct.justification = ['\n'.join(x) if x != [None] else None for x in funct.justification] + + qualif = t_nomenclature_ZH('FONCTIONS_QUALIF') + knowle = t_nomenclature_ZH('FONCTIONS_CONNAISSANCE') + funct['id_qualification'] = qualif.loc[qualif.mnemo=='Non évaluée','id_nomenclature'].values[0] + funct['id_knowledge'] = knowle.loc[knowle.mnemo=='Lacunaire ou nulle','id_nomenclature'].values[0] + + t_func = pd.merge(funct,get_id_t_zh(funct.code),on='code') + tactiv_cols = recup_cols_table(table,con_gn) + lst_cols = t_func.columns[t_func.columns.isin(tactiv_cols)] + to_tfunction = t_func[lst_cols] + + to_tfunction.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + +def add_remark_pres(not_bib): + table = 't_zh' + rmq_pres = pd.read_sql_table(table,con_gn,'pr_zh',columns=['id_zh','remark_pres']) + rmq_pres.remark_pres.replace({'':None},inplace=True) + not_bib = rmq_pres.merge(not_bib,on='id_zh') + + not_bib.loc[not_bib.remark_pres.notna(),'remark_pres'] = \ + not_bib[not_bib.remark_pres.notna()].remark_pres+ '\n' + \ + not_bib[not_bib.remark_pres.notna()].lb_code + + not_bib.loc[not_bib.remark_pres.isna(),'remark_pres'] = \ + not_bib[not_bib.remark_pres.isna()].lb_code + + not_bib.drop(columns='lb_code',inplace=True) + + from pycen import update_to_sql + update_to_sql( + df=not_bib, + con=con_gn, + table_name=table, + schema_name='pr_zh', + key_name='id_zh', + ) + + +def __format_lb_code__(t): + table = 'cor_zh_cb' + cols = ['code','code_cb'] + df_cb = t[cols].copy() + df_cb.set_index('code',inplace=True) + + zh_cb = df_cb.code_cb.str.split(';',expand=True)\ + .stack()\ + .droplevel(-1)\ + .reset_index() + zh_cb.columns = cols + zh_cb.rename(columns={'code_cb':'lb_code'},inplace=True) + + cor_zh_cb = pd.merge(zh_cb,get_id_t_zh(zh_cb.code.unique()),on='code') + tzhcb = recup_cols_table(table,con_gn) + lst_cols = cor_zh_cb.columns[cor_zh_cb.columns.isin(tzhcb)] + to_corzhcb = cor_zh_cb[lst_cols].copy() + to_corzhcb.lb_code = to_corzhcb.lb_code.astype(str) + return to_corzhcb + + +def __filter_lb_code__(t, join_ch=False): + to_corzhcb = __format_lb_code__(t) + + bib_cb = pd.read_sql_table('bib_cb',con_gn,'pr_zh') + bib_cb.lb_code = bib_cb.lb_code.astype(str) + + not_bib = to_corzhcb[~to_corzhcb.lb_code.isin(bib_cb.lb_code)] + to_corzhcb.drop(not_bib.index,inplace=True) + not_bib = not_bib\ + .groupby('id_zh').agg(','.join) + + if join_ch: + to_corzhcb = to_corzhcb.merge( + bib_cb.drop(columns='humidity'),on='lb_code',how='left' + ) + + return to_corzhcb, not_bib + + +def to_cor_zh_cb(t): + table = 'cor_zh_cb' + + to_corzhcb, not_bib = __filter_lb_code__(t, join_ch=False) + not_bib.lb_code = 'Autre(s) habitat(s) décrit(s) :\n' + not_bib.lb_code + + add_remark_pres(not_bib) + + to_corzhcb.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + + +def to_t_flow(DF, type_flow=None): + """ + @df : pd.DataFrame + @type_flow : str. ['inflow' or 'outflow'] + """ + if type_flow=='inflow': + table = 't_inflow' + cols = ['entree_eau_reg', 'entree_eau_perm', 'entree_eau_topo'] + _flow = t_nomenclature_ZH('ENTREE_EAU') + _perm = t_nomenclature_ZH('PERMANENCE_ENTREE') + dict_table = { + 'flow':'id_inflow', + 'perm':'id_permanance'} + elif type_flow=='outflow': + table = 't_outflow' + cols = ['sortie_eau_reg', 'sortie_eau_perm', 'sortie_eau_topo'] + _flow = t_nomenclature_ZH('SORTIE_EAU') + _perm = t_nomenclature_ZH('PERMANENCE_SORTIE') + dict_table = { + 'flow':'id_outflow', + 'perm':'id_permanance'} + df = DF.copy().set_index('code') + # df[cols] + flow = df[cols[0]].str.split(';',expand=True).stack() + perm = df[cols[1]].str.split(';',expand=True).stack() + topo = df[cols[2]].str.split(';',expand=True).stack() + + inflow = pd.concat( + [flow,perm,topo],axis=1,join='outer' + ).droplevel(-1) + # inflow2 = flow.to_frame()\ + # .merge(perm.to_frame(),left_index=True,right_index=True,how='outer')\ + # .merge(topo.to_frame(),left_index=True,right_index=True,how='outer') + inflow.columns = ['flow','perm','topo'] + inflow.reset_index(drop=False,inplace=True) + + dict_flow = dict(zip(_flow.mnemo,_flow.id_nomenclature)) + dict_perm = dict(zip(_perm.mnemo.str.lower(),_perm.id_nomenclature)) + + inflow.flow.replace(dict_flow,inplace=True) + inflow.perm.fillna('non déterminé',inplace=True) + inflow.perm.replace({'inconnu':'non déterminé','':'non déterminé'},inplace=True) + inflow.perm.replace(dict_perm,inplace=True) + inflow.rename(columns=dict_table, inplace=True) + + t_flow = pd.merge(inflow,get_id_t_zh(inflow.code.unique()),on='code') + tflow = recup_cols_table(table,con_gn) + lst_cols = t_flow.columns[t_flow.columns.isin(tflow)] + to_tflow = t_flow[lst_cols] + + to_tflow.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + + +def cor_zh_corine_cover(): + table = 'cor_zh_corine_cover' + + sql = 'SELECT id_zh,geom FROM pr_zh.t_zh' + df = gpd.read_postgis(sql,con_gn,crs=4326) + df.to_crs(2154,inplace=True) + df.geom = df.buffer(-0.5) + + mask = df.to_crs(2154).unary_union + clc_path = '/home/colas/Documents/5_BDD/CLC/' + clc_file = clc_path+'CLC12_FR_RGF.shp' + clc_mfil = clc_path+'Metadonnees/CLC_nomenclature.xls' + clc = gpd.read_file(clc_file,mask=mask) + clc.rename_geometry('geom', inplace=True) + meta_clc0 = pd.read_excel(clc_mfil,0).rename(columns={'code_clc_niveau_1':'code_clc'}) + meta_clc1 = pd.read_excel(clc_mfil,1).rename(columns={'code_clc_niveau_2':'code_clc'}) + meta_clc2 = pd.read_excel(clc_mfil,2).rename(columns={'code_clc_niveau_3':'code_clc'}) + meta_clc = pd.concat([meta_clc0,meta_clc1,meta_clc2]) + meta_clc.code_clc = meta_clc.code_clc.astype(str) + + gn_occsol = t_nomenclature_ZH('OCCUPATION_SOLS',source=None) + dict_clc1 = dict(zip(meta_clc.code_clc,['.'.join(x) for x in meta_clc.code_clc])) + dict_clc2 = dict(zip(gn_occsol.cd_nomenclature,gn_occsol.id_nomenclature)) + + tmp = gpd.sjoin( + df, + clc[['CODE_12','geom']], + predicate = 'intersects', + how = 'inner') + + cor_zh_clc = tmp[['id_zh','CODE_12']]\ + .drop_duplicates()\ + .rename(columns={'CODE_12':'id_cover'})\ + .replace({'id_cover':dict_clc1})\ + .replace({'id_cover':dict_clc2}) + + cor_zh_clc.to_sql( + name=table, con=con_gn, schema='pr_zh', + if_exists='append', index=False + ) + + +def migrate_to_gnZH(df:pd.DataFrame=None): + to_bib_organismes_przh() + to_t_zh(df) + to_cor_zh_lim_fs(df) + to_t_activity(df) + to_t_functions(df) + to_t_flow(df,type_flow='inflow') + to_t_flow(df,type_flow='outflow') + cor_zh_corine_cover() + + + +def to_t_references(db_file, suffixe_refnum=None): + import pandas_access as mdb + table = 't_references' + t_ref = pd.read_sql_table(table,con_gn,'pr_zh') + + dic_col_ref = { + 'REF_NO':'ref_number', + 'REFERENCE':'reference', + 'AUTHOR':'authors', + 'TITLE':'title', + 'YEAR':'pub_year', + 'PUBLISHER':'editor', + 'LOCATION':'editor_location', + } + df = mdb.read_table(db_file, 'MWDREF')\ + .rename(columns=dic_col_ref) + df.ref_number = df.ref_number.astype(str) + + siteref = mdb.read_table(db_file, 'SITEREF') + siteref.REF_NO = siteref.REF_NO.astype(str) + + df = df[df.ref_number.isin(siteref.REF_NO)] + df.loc[df.title.isna(),'title'] = df[df.title.isna()].reference + if suffixe_refnum is not None: + df.ref_number = suffixe_refnum + df.ref_number + + df.to_sql(name=table,con=con_gn,schema='pr_zh',if_exists='append',index=False) + + +def to_cor_zh_ref(db_file, suffixe_refnum=None): + import pandas_access as mdb + dict_col_cor = { + 'REF_NO':'id_ref' + } + table = 'cor_zh_ref' + ref = pd.read_sql_table('t_references',con_gn,'pr_zh') + dict_idref = dict(zip(ref.ref_number,ref.id_reference)) + + # SITEREF + cor = mdb.read_table(db_file, 'SITEREF')\ + .rename(columns=dict_col_cor) + cor.id_ref = cor.id_ref.astype(str) + + if suffixe_refnum is not None: + cor.id_ref = suffixe_refnum + cor.id_ref + + t_zh = get_id_t_zh(cor.SITE_COD) + to_tab = cor.merge( + t_zh.drop(columns='zh_uuid'), + left_on='SITE_COD', + right_on='code', + ).drop(columns=['SITE_COD','code'])\ + .replace(dict_idref) + + to_tab.to_sql( + name=table,con=con_gn,schema='pr_zh', + if_exists='append',index=False) + + + +def OTHERINV_to_tref(db_file): + import pandas_access as mdb + table = 't_zh' + + dic = { + 'FFn' :'FF n', + r'n \° ' :'n°', + r'n \°' :'n°', + r'n\+' :'n°', + r'n\° ':'n°', + r'n\° ':'n°', + r'n\° ' :'n°', + r' ' :' ', + } + sitinfo = mdb.read_table(db_file, 'SITEINFO')\ + .set_index('SITE_COD') + otinv = sitinfo[['OTHER_INV']]\ + .dropna().OTHER_INV\ + .str.split(';',expand=True).stack()\ + .str.strip()\ + .replace(dic,regex=True)\ + .str.split(', Z',expand=True,regex=True).stack()\ + .str.strip() + + znieff = otinv[otinv.str.startswith('ZNIEF')] + znieff = pd.concat([znieff,otinv[otinv.str.startswith('NIEF')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('ZNEIF')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('n°')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('Site N')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('Sites N')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('2606')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('3817')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('Inventaire N')]]) + znieff = pd.concat([znieff,otinv[otinv.str.startswith('Mais aussi ZPS')]]) + filter_inv = otinv[~otinv.isin(znieff)]\ + .droplevel([-1,-2])\ + .reset_index() + + other_inv = filter_inv.groupby('SITE_COD').agg('\n'.join)\ + .reset_index()\ + .rename(columns={0:'remark_is_other_inventory'}) + + t_zh = get_id_t_zh(other_inv.SITE_COD) + to_tab = other_inv.merge( + t_zh.drop(columns='zh_uuid'), + left_on='SITE_COD', + right_on='code', + ).drop(columns=['SITE_COD','code']) + to_tab['is_other_inventory'] = True + + from pycen import update_to_sql + update_to_sql( + df=to_tab, + con=con_gn, + table_name=table, + schema_name='pr_zh', + key_name='id_zh', + ) + + + +if __name__ == "__main__": + + from pycen.geonature import pr_zh + t_zh = pr_zh.t_zh() + + drop_cols = ['auteur_geom','date_geom','type_milieu','type_site',] + DF = zh.v_zoneshumides() + DF.rename(columns=DICT_TZH,inplace=True) + DF.drop(columns=drop_cols,inplace=True) + df = DF[~DF.code.isin(t_zh.code)].copy() + + migrate_to_gnZH(df) + + # to_bib_organismes_util() # Fait sch:'utilisateurs' + # to_bib_organismes_przh() # Fait sch:'pr_zh' + # to_t_roles() # Fait + # to_t_zh(df) # Fait + # to_cor_zh_lim_fs(df) # Fait + # to_t_activity(df) # Fait + # to_t_functions(df) # Fait + # to_t_flow(df,type_flow='inflow') # Fait + # to_t_flow(df,type_flow='outflow') # Fait + + # t_river_basin # OK ! cf.insert_lareas.py (sous bassin-versant SDAGE) + # t_hydro_area # OK ! cf.insert_lareas.py (bassin-versant Topographique) + # cor_zh_area # OK with to_t_zh ! ; bib_area = [COM,DEP,ref for ref_geo_referentiels of conf_gn_module.toml] + # cor_zh_rb # OK with to_t_zh ! ; + # cor_zh_hydro # OK with to_t_zh ! ; + # cor_zh_fct_area # Dépendand de t_fct_area (vide) : table des aires de fonctionnalités + # cor_zh_corine_cover # OK ! + + # fct_delim + + + # to_cor_zh_cb() # Prêt + + # DF[DF.sortie_eau_reg.str.contains('diffus',na=False)].code.tolist() + # ['38BB0109', '38BB0128', '38BB0129'] + + + # get_cor_zh_corine_cover() # A FAIRE via MEDWET + # get_cor_zh_protection() # A FAIRE via MEDWET + # get_t_ownership() # A FAIRE via MEDWET + + # get_t_table_heritage() # A FAIRE (pas sûre..) + # get_t_instruments() # A FAIRE (pas sûre..) + # get_t_management_structures() # A FAIRE (pas sûre..) + + t_nomenclature_ZH(bib_mnemo='EVAL_GLOB_MENACES') + def drop_table(table): + sql = 'TRUNCATE pr_zh.%s'%table + with con_gn.begin() as cnx: + cnx.execute(sql) + cnx.commit() + cnx.close() + diff --git a/3_AZALEE/create_view.py b/3_AZALEE/create_view.py new file mode 100644 index 0000000..3fe8bee --- /dev/null +++ b/3_AZALEE/create_view.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +v_zoneshumides = """ +DROP VIEW IF EXISTS zones_humides.v_zoneshumides; +CREATE OR REPLACE VIEW zones_humides.v_zoneshumides +AS WITH temp1 as ( + SELECT DISTINCT ON (s.id) + s.id, + max(s6.date) "date" + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + JOIN sites.r_sites_geom s6 ON s.id::text = s6.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s.id +), hab as ( + SELECT + s62.id_geom_site, + string_agg(s62.id_cb,';') code_cb, + string_agg(s621.lb_hab_fr,';') lib_cb + FROM zones_humides.r_site_habitat s62 JOIN ref_habitats.corine_biotope s621 ON s62.id_cb = s621.id + WHERE s62."valid" + GROUP BY 1 +), usagprocess AS ( + SELECT + s65.id_geom_site, + string_agg(s651.nom,';') activ_hum, + string_agg(s652.nom,';') impact, + string_agg(s653.nom,';') "position", + string_agg(s65.remarques,';') rmq_activ_hum + FROM zones_humides.r_site_usageprocess s65 + JOIN zones_humides.param_activ_hum s651 ON s65.id_activ_hum = s651.id + JOIN zones_humides.param_impact s652 ON s65.id_impact = s652.id + JOIN zones_humides.param_position s653 ON s65.id_position = s653.id + WHERE s65."valid" + GROUP BY 1 +), cnx as ( + SELECT + s63.id_geom_site, + s631.nom connexion + FROM zones_humides.r_site_type_connect s63 + JOIN zones_humides.param_type_connect s631 ON s63.id_param_connect = s631.id + WHERE s63."valid" +), tmp_auteur_site as ( + SELECT + s.id, + string_agg(btrim(concat(s11.nom, ' ', s11.prenom,' (',COALESCE(s111.abbrev, s111.nom),')')),';') AS auteur_site + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 JOIN (personnes.personne s11 + JOIN personnes.organisme s111 ON s11.id_organisme = s111.id) ON s1.id_auteur = s11.id + ) ON s.id::text = s1.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(btrim(concat(s611.nom, ' ', s611.prenom,' (',COALESCE(s6111.abbrev, s6111.nom),')')),';') AS auteur_geom + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 JOIN (personnes.personne s611 + JOIN personnes.organisme s6111 ON s611.id_organisme = s6111.id) ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s.id = s6.id_site + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s6.id +) +SELECT + s.id AS site_code, + s.nom, + s1.auteur_site, + s61.auteur_geom, + s6.geom, + s.date_deb AS date_site, + s6.date AS date_geom, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s5.nom AS typo_sdage, + s.remarques AS rmq_site, + s6.rmq_fct_majeur, + s6.rmq_interet_patri, + s6.rmq_bilan_menace, + s6.rmq_orient_act, + s6.rmq_usage_process, + s62.code_cb, + s62.lib_cb, + ct1.fct_bio, + ct1.fct_hydro, + ct1.int_patri, + ct1."val_socioEco", + ct2.crit_delim, + ct2.crit_def_esp, + s65.activ_hum, + s65.impact, + s65."position", + s65.rmq_activ_hum, + (string_to_array(ct3.entree_eau,'//'))[1] entree_eau_reg, + (string_to_array(ct3.entree_eau,'//'))[2] entree_eau_perm, + (string_to_array(ct3.entree_eau,'//'))[3] entree_eau_topo, + (string_to_array(ct3.sortie_eau,'//'))[1] sortie_eau_reg, + (string_to_array(ct3.sortie_eau,'//'))[2] sortie_eau_perm, + (string_to_array(ct3.sortie_eau,'//'))[3] sortie_eau_topo, + s63.connexion, + CASE WHEN s64."valid" THEN s64.id_origsub END origine_sub, + CASE WHEN s64."valid" THEN s641.nom END freq_sub, + CASE WHEN s64."valid" THEN s642.nom END etendu_sub +FROM sites.sites s + JOIN temp1 w USING (id) + LEFT JOIN tmp_auteur_site s1 ON s.id = s1.id + LEFT JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s.id_type_site::text = s4.id::text + LEFT JOIN sites.typo_sdage s5 ON s.id_typo_sdage::text = s5.id::text + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN tmp_auteur_geom s61 ON s6.id = s61.id + LEFT JOIN hab s62 ON s6.id = s62.id_geom_site + LEFT JOIN cnx s63 ON s6.id = s63.id_geom_site + LEFT JOIN (zones_humides.r_site_sub s64 + JOIN (zones_humides.param_sub s641 JOIN zones_humides.type_param_sub s6411 ON s6411.id = s641.id_type) ON s64.id_freqsub = s641.id + JOIN (zones_humides.param_sub s642 JOIN zones_humides.type_param_sub s6421 ON s6421.id = s642.id_type) ON s64.id_etendsub = s642.id + ) ON s6.id = s64.id_geom_site + LEFT JOIN usagprocess s65 ON s6.id = s65.id_geom_site + LEFT JOIN crosstab( + 'SELECT + s63.id_geom_site::bigint, + s632.nom_court type_param, + string_agg( + CASE WHEN s63.description IS NULL THEN s631.nom + WHEN s63.description = '''' THEN s631.nom + ELSE CONCAT(s631.nom,'' ('',s63.description,'')'') END, + '';'') fct + FROM zones_humides.r_site_fctecosociopatri s63 + JOIN (zones_humides.param_fct_eco_socio_patri s631 + JOIN zones_humides.type_param_fct s632 ON s631.id_type = s632.id + ) ON s63.id_fct = s631.id + WHERE s63."valid" + GROUP BY 1,2 + ORDER BY 1,2,3', + 'SELECT DISTINCT s632.nom_court FROM zones_humides.r_site_fctecosociopatri s63 + JOIN (zones_humides.param_fct_eco_socio_patri s631 + JOIN zones_humides.type_param_fct s632 ON s631.id_type = s632.id + ) ON s63.id_fct = s631.id + ORDER BY 1 DESC;' + ) AS ct1 ("id_geom_site" bigint, "fct_bio" text, "fct_hydro" text, "int_patri" text, "val_socioEco" text) ON s6.id = ct1.id_geom_site + LEFT JOIN crosstab( + 'SELECT + s64.id_geom_site::bigint, + s642.nom_court type_param, + string_agg( + CASE WHEN s64.description IS NULL THEN s641.nom + WHEN s64.description = '''' THEN s641.nom + ELSE CONCAT(s641.nom,'' ('',s64.description,'')'') END, + '';'') fct + FROM zones_humides.r_site_critdelim s64 + JOIN (zones_humides.param_delim_fct s641 + JOIN zones_humides.type_param_delim_fct s642 ON s641.id_type = s642.id + ) ON s64.id_crit_delim = s641.id + WHERE s64."valid" + GROUP BY 1,2 + ORDER BY 1,2,3' + ) AS ct2 ("id_geom_site" bigint, "crit_delim" text, "crit_def_esp" text) ON s6.id = ct2.id_geom_site + LEFT JOIN crosstab( + 'SELECT + s64.id_geom_site, + s64.in_out, + CONCAT( + --''reg_hydro : '', + string_agg(s641.nom,'';''),''//'', + --''permanence : '', + string_agg(s642.nom,'';''),''//'', + --''rmq_toponymie : '', + string_agg(s64.rmq_toponymie,'';'') + ) hydro + FROM zones_humides.r_site_reghydro s64 + LEFT JOIN zones_humides.param_reg_hydro s641 ON s64.id_reg_hydro = s641.id + LEFT JOIN zones_humides.param_permanence s642 ON s64.id_permanence = s642.id + WHERE s64."valid" + GROUP BY 1,2 + ORDER BY 1,2', + 'SELECT DISTINCT in_out FROM zones_humides.r_site_reghydro ORDER BY 1 DESC;' + ) AS ct3 ("id_geom_site" bigint, "entree_eau" text, "sortie_eau" text) ON s6.id = ct3.id_geom_site + + ) ON s.id::text = s6.id_site::text +WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + AND s6.date = w.date + --AND s63."valid" AND s64."valid" +ORDER BY s.id ASC NULLS FIRST; +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zoneshumides TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zoneshumides TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zoneshumides) + cnx.execute(grant) + + + + +v_pelouseseche = """ +DROP VIEW IF EXISTS ps.v_pelouseseches; +CREATE OR REPLACE VIEW ps.v_pelouseseches +AS WITH temp1 as ( + SELECT DISTINCT ON (s.id) + s.id, + max(s6.date) "date" + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + JOIN sites.r_sites_geom s6 ON s.id::text = s6.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s.id +), hab as ( + SELECT + id_geom_site, + date, + (string_to_array(hab1,'//'))[1] n_hab1, + (string_to_array(hab1,'//'))[2] cb_hab1, + (string_to_array(hab1,'//'))[3] lb_hab1, + (string_to_array(hab2,'//'))[1] n_hab2, + (string_to_array(hab2,'//'))[2] cb_hab2, + (string_to_array(hab2,'//'))[3] lb_hab2, + (string_to_array(hab3,'//'))[1] n_hab3, + (string_to_array(hab3,'//'))[2] cb_hab3, + (string_to_array(hab3,'//'))[3] lb_hab3, + (string_to_array(hab4,'//'))[1] n_hab4, + (string_to_array(hab4,'//'))[2] cb_hab4, + (string_to_array(hab4,'//'))[3] lb_hab4 + FROM crosstab( + $$WITH tmp as ( + SELECT + id_geom_site, + MAX("date") date + FROM ps.r_site_habitat + GROUP BY 1 + ) + SELECT + s61.id_geom_site, + s61.date, + s61.index, + CONCAT(s61.n_hab,'//', + s62.code_hab,'//', + s621.lb_hab_fr) hab + FROM ps.r_site_habitat s61 + JOIN (ps.r_hab_cb s62 JOIN ref_habitats.corine_biotope s621 ON s62.code_hab = s621.id) ON s61.id = s62.id_sitehab + JOIN tmp USING (id_geom_site) + WHERE s61.date = tmp.date + ORDER BY 1,2,3;$$, + 'SELECT DISTINCT index FROM ps.r_site_habitat' + ) AS ct ("id_geom_site" bigint, "date" date, "hab1" text, "hab2" text, "hab3" text, "hab4" text) +), tmp_auteur_site as ( + SELECT + s.id, + string_agg(btrim(concat(s11.nom, ' ', s11.prenom,' (',COALESCE(s111.abbrev, s111.nom),')')),';') AS auteur_site + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 JOIN (personnes.personne s11 + JOIN personnes.organisme s111 ON s11.id_organisme = s111.id) ON s1.id_auteur = s11.id + ) ON s.id::text = s1.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(btrim(concat(s611.nom, ' ', s611.prenom,' (',COALESCE(s6111.abbrev, s6111.nom),')')),';') AS auteur_geom + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 JOIN (personnes.personne s611 + JOIN personnes.organisme s6111 ON s611.id_organisme = s6111.id) ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s.id = s6.id_site + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s6.id +) +SELECT DISTINCT ON (s.id) + s.id AS site_code, + s.nom, + s1.auteur_site, + s61.auteur_geom, + s6.geom, + s.date_deb AS date_site, + s6.date AS date_geom, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s.remarques AS rmq_site, + s6.rmq_fct_majeur, + s6.rmq_interet_patri, + s6.rmq_bilan_menace, + s6.rmq_orient_act, + s6.rmq_usage_process, + ct1.statut, + ct1.pratique, + ct1.recouvrement, + ct1.embrouss, + ct1.taux tx_embrouss, + hab.n_hab1, + hab.cb_hab1, + hab.lb_hab1, + hab.n_hab2, + hab.cb_hab2, + hab.lb_hab2, + hab.n_hab3, + hab.cb_hab3, + hab.lb_hab3, + hab.n_hab4, + hab.cb_hab4, + hab.lb_hab4 +FROM sites.sites s + JOIN temp1 w USING (id) + LEFT JOIN tmp_auteur_site s1 ON s.id = s1.id + LEFT JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s.id_type_site::text = s4.id::text + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN tmp_auteur_geom s61 ON s6.id = s61.id + LEFT JOIN hab ON s6.id = hab.id_geom_site + LEFT JOIN crosstab( + 'SELECT + s63.id_geom_site::bigint, + s63.taux, + s632.nom, + s631.description + FROM ps.r_site_param s63 + JOIN (ps.param s631 + JOIN ps.type_param s632 ON s631.id_type = s632.id + ) ON s63.id_param = s631.id + --GROUP BY 1,2 + ORDER BY 1,2,3', + 'SELECT nom FROM ps.type_param' + ) AS ct1 ("id_geom_site" bigint, "taux" text, "recouvrement" text, "embrouss" text, "pratique" text, "statut" text) ON s6.id = ct1.id_geom_site + ) ON s6.id_site = s.id +WHERE s.date_fin IS NULL AND s3.nom_court::text = 'Pelouses sèches'::text +ORDER BY s.id, s6.date DESC NULLS LAST +;""" +grant = """ +GRANT ALL ON TABLE ps.v_pelouseseches TO grp_admin; +GRANT SELECT ON TABLE ps.v_pelouseseches TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_pelouseseche) + cnx.execute(grant) + + + +########################### +######## TEST CODE ######## +########################### + +select_import = """ +WITH temp1 as ( + SELECT DISTINCT ON (s.id) + s.id, + max(s6.date) "date" + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + JOIN sites.r_sites_geom s6 ON s.id::text = s6.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s.id +), tmp_auteur_site as ( + SELECT + s.id, + string_agg(btrim(concat(s11.nom, ' ', s11.prenom,' (',COALESCE(s111.abbrev, s111.nom),')')),';') AS auteur_site + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 JOIN (personnes.personne s11 + JOIN personnes.organisme s111 ON s11.id_organisme = s111.id) ON s1.id_auteur = s11.id + ) ON s.id::text = s1.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(btrim(concat(s611.nom, ' ', s611.prenom,' (',COALESCE(s6111.abbrev, s6111.nom),')')),';') AS auteur_geom + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 JOIN (personnes.personne s611 + JOIN personnes.organisme s6111 ON s611.id_organisme = s6111.id) ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s.id = s6.id_site + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s6.id +) +SELECT DISTINCT ON (s.id) + s.id AS site_code, + s.nom, + s1.auteur_site, + s61.auteur_geom, + s6.geom, + s.date_deb AS date_site, + s6.date AS date_geom, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s.remarques AS rmq_site, + s6.rmq_fct_majeur, + s6.rmq_interet_patri, + s6.rmq_bilan_menace, + s6.rmq_orient_act, + s6.rmq_usage_process +FROM sites.sites s + JOIN temp1 w USING (id) + LEFT JOIN tmp_auteur_site s1 ON s.id = s1.id + LEFT JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s.id_type_site::text = s4.id::text + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN tmp_auteur_geom s61 ON s6.id = s61.id + LEFT JOIN crosstab( + 'SELECT + s63.id_geom_site::bigint, + s63.taux tx_embrouss, + s632.nom type_param, + s631.description param + FROM ps.r_site_param s63 + JOIN (ps.param s631 + JOIN ps.type_param s632 ON s631.id_type = s632.id + ) ON s63.id_param = s631.id + --GROUP BY 1,2 + ORDER BY 1,2,3', + 'SELECT nom FROM ps.type_param' + ) AS ct1 ("id_geom_site" bigint, "taux" text, "statut" text, "embrouss" text, "pratique" text, "recouvremt" text) ON s6.id = ct1.id_geom_site + ) ON s6.id_site = s.id +WHERE s.date_fin IS NULL AND s3.nom_court::text = 'Pelouses sèches'::text +ORDER BY s.id, s6.date DESC NULLS LAST +; +""" + +select_import = """ +SELECT * FROM zones_humides.v_zoneshumides +; +""" +data = gpd.read_postgis( + sql = text(select_import), + con = con, + geom_col='geom') \ No newline at end of file diff --git a/3_AZALEE/create_view_pers.py b/3_AZALEE/create_view_pers.py new file mode 100644 index 0000000..86a12b1 --- /dev/null +++ b/3_AZALEE/create_view_pers.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +v_perso = """ +DROP VIEW IF EXISTS personnes.v_personne CASCADE; +CREATE OR REPLACE VIEW personnes.v_personne +AS +SELECT + a.id, + CASE WHEN btrim(concat(a.nom, ' ', a.prenom)) = COALESCE(b.abbrev, b.nom) + then btrim(concat(a.nom, ' ', a.prenom)) + else string_agg(btrim(concat(a.nom, ' ', a.prenom,' (',COALESCE(b.abbrev, b.nom),')')),';') + end AS auteur +FROM personnes.personne a + JOIN personnes.organisme b ON a.id_organisme = b.id +GROUP BY a.id,b.abbrev, b.nom; +""" +grant = """ +GRANT ALL ON TABLE personnes.v_personne TO grp_admin; +GRANT SELECT ON TABLE personnes.v_personne TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_perso)) + cnx.execute(grant) diff --git a/3_AZALEE/create_view_ps.py b/3_AZALEE/create_view_ps.py new file mode 100644 index 0000000..91fc49c --- /dev/null +++ b/3_AZALEE/create_view_ps.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +drop_v_ps = ''' + DROP VIEW IF EXISTS ps.v_pelouseseches_all CASCADE; + DROP VIEW IF EXISTS ps.v_termophile CASCADE; +''' +with con.begin() as cnx: + cnx.execute(drop_v_ps) + + + +v_ps_habcb = """ +DROP VIEW IF EXISTS ps.v_ps_habcb; +CREATE OR REPLACE VIEW ps.v_ps_habcb +AS +SELECT + b.id_sitehab, + string_agg( + CASE + WHEN b.suspect is not null AND b.sep is not null THEN CONCAT(b.code_hab,' (',b.suspect,')',b.sep) + WHEN b.suspect is not null AND b.sep is null THEN CONCAT(b.code_hab,' (',b.suspect,')') + WHEN b.suspect is null AND b.sep is not null THEN CONCAT(b.code_hab,b.sep) + WHEN b.suspect is null AND b.sep is null THEN b.code_hab + ELSE b.code_hab + END,'' ORDER BY b.ordre) code_hab, + string_agg(b1.lb_hab_fr,';') lb_hab_fr +FROM ps.r_hab_cb b JOIN ref_habitats.corine_biotope b1 ON b.code_hab = b1.id +GROUP BY 1 +ORDER BY 1 +; +""" +grant = """ +GRANT ALL ON TABLE ps.v_ps_habcb TO grp_admin; +GRANT SELECT ON TABLE ps.v_ps_habcb TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_ps_habcb)) + cnx.execute(grant) + + + +v_ps_sitehab_cross = """ +DROP VIEW IF EXISTS ps.v_ps_sitehab_cross; +CREATE OR REPLACE VIEW ps.v_ps_sitehab_cross +AS +SELECT + id_site, + id_geom_site, + auteur, + date, + date_max, + (string_to_array(hab1,'//'))[1] n_hab1, + (string_to_array(hab1,'//'))[2] cb_hab1, + (string_to_array(hab1,'//'))[3] lb_hab1, + (string_to_array(hab2,'//'))[1] n_hab2, + (string_to_array(hab2,'//'))[2] cb_hab2, + (string_to_array(hab2,'//'))[3] lb_hab2, + (string_to_array(hab3,'//'))[1] n_hab3, + (string_to_array(hab3,'//'))[2] cb_hab3, + (string_to_array(hab3,'//'))[3] lb_hab3, + (string_to_array(hab4,'//'))[1] n_hab4, + (string_to_array(hab4,'//'))[2] cb_hab4, + (string_to_array(hab4,'//'))[3] lb_hab4 +FROM crosstab( + $$ + WITH auteur AS ( + SELECT DISTINCT ON (id_sitehab) + c.id_sitehab, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM ps.r_sitehab_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitehab + ) + SELECT + a.id_site, + a.id_geom_site, + c.auteur, + a.date, + (SELECT MAX(date) FROM ps.r_site_habitat WHERE id_geom_site = a.id_geom_site ) date_max, + a.index, + CONCAT(a.n_hab,'//', + b.code_hab,'//', + b.lb_hab_fr + ) hab + FROM ps.r_site_habitat a + LEFT JOIN ps.v_ps_habcb b ON a.id = b.id_sitehab + LEFT JOIN auteur c ON c.id_sitehab = a.id + WHERE + a.date = (SELECT MAX(date) FROM ps.r_site_habitat WHERE id_geom_site = a.id_geom_site) + ORDER BY a.id_site,a.id_geom_site,a.index + ;$$, + 'SELECT DISTINCT index FROM ps.r_site_habitat ORDER BY index LIMIT 4 ;' +) AS ct ( + "id_site" varchar(10), + "id_geom_site" bigint, + "auteur" text, + "date" date, + "date_max" date, + "hab1" text, + "hab2" text, + "hab3" text, + "hab4" text) +ORDER BY 1,2 +""" +grant = """ +GRANT ALL ON TABLE ps.v_ps_sitehab_cross TO grp_admin; +GRANT SELECT ON TABLE ps.v_ps_sitehab_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_ps_sitehab_cross)) + cnx.execute(grant) + + +# PB a.id_geom_site in (52716) +v_ps_params_cross = """ +DROP VIEW IF EXISTS ps.v_ps_params_cross; +CREATE OR REPLACE VIEW ps.v_ps_params_cross +AS +SELECT --ct.* + ct.id_site, + ct.id_geom_site, + ct.auteur, + ct.date, + ct.date_max, + ct.statut, + ct.pratique, + ct.recouvrement, + ct.embrouss, + ct.taux tx_embrouss +FROM crosstab( + $$ + WITH auteur AS ( + SELECT DISTINCT ON (id_siteparam) + c.id_siteparam, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM ps.r_siteparam_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_siteparam + ) + SELECT DISTINCT ON (a.id_site,a.id_geom_site, a2.nom,a.date) + a.id_site, + a.id_geom_site::bigint, + a.date, + (SELECT MAX(date) FROM ps.r_site_param WHERE id_geom_site = a.id_geom_site ) date_max, + c.auteur, + a.taux, + a2.nom, + a1.description + FROM ps.r_site_param a + JOIN (ps.param a1 + JOIN ps.type_param a2 ON a1.id_type = a2.id + ) ON a.id_param = a1.id + JOIN auteur c ON c.id_siteparam = a.id + --WHERE + -- a.id_geom_site in (52716) + --GROUP BY 1 + ORDER BY 1,2,7,3 DESC + $$, +$$SELECT nom FROM ps.type_param ORDER BY nom;$$ + +) AS ct ("id_site" varchar(10),"id_geom_site" bigint, "date" date, "date_max" date, "auteur" text, "taux" text, "embrouss" text, "pratique" text, "recouvrement" text, "statut" text) +--GROUP BY 1,2,4,5,6,7,8 +ORDER BY 1,2 +""" +grant = """ +GRANT ALL ON TABLE ps.v_ps_params_cross TO grp_admin; +GRANT SELECT ON TABLE ps.v_ps_params_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_ps_params_cross)) + cnx.execute(grant) + + + +v_pelouseseches_all = drop_v_ps + """ +CREATE OR REPLACE VIEW ps.v_pelouseseches_all +AS WITH tmp_auteur_site as ( + SELECT + s.id, + string_agg(s11.auteur,';' ORDER BY s11.auteur) AS auteur_site + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 + JOIN personnes.v_personne s11 ON s1.id_auteur = s11.id + ) ON s.id::text = s1.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s.id + ORDER BY s.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(s611.auteur,';' ORDER BY s611.auteur) AS auteur_geom + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 + JOIN personnes.v_personne s611 ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s.id = s6.id_site + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + GROUP BY s6.id + ORDER BY s6.id +) +SELECT DISTINCT ON (s.id) + s.id AS site_code, + s6.geom, + s.nom, + s5.nom district_nat, + s6.libelle source, + s6.id_origine, + s1.auteur_site, + s6.auteur_geom, + ( + SELECT DISTINCT ON ("date") auteur + FROM (values (param.date_max,param.auteur),(hab.date_max,hab.auteur)) AS value("date",auteur) + WHERE "date" = (SELECT MAX("date") FROM (values (param.date_max),(hab.date_max)) AS value("date")) + ) as auteur_attrs, + s.date_deb AS date_site, + s6."date" AS date_geom, + ( + SELECT MAX("date") FROM (values (param.date_max),(hab.date_max)) AS value("date") + ) as date_attrs, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s.remarques AS rmq_site, + s6.rmq_fct_majeur, + s6.rmq_interet_patri, + s6.rmq_bilan_menace, + s6.rmq_orient_act, + s6.rmq_usage_process, + param.statut, + param.pratique, + param.recouvrement, + param.embrouss, + param.tx_embrouss, + hab.n_hab1, + hab.cb_hab1, + hab.lb_hab1, + hab.n_hab2, + hab.cb_hab2, + hab.lb_hab2, + hab.n_hab3, + hab.cb_hab3, + hab.lb_hab3, + hab.n_hab4, + hab.cb_hab4, + hab.lb_hab4 + +FROM sites.sites s + JOIN tmp_auteur_site s1 ON s.id = s1.id + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s.id_type_site::text = s4.id::text + JOIN ref_territoire.districts_naturels s5 ON s.org = s5.abrev + JOIN ( SELECT DISTINCT ON (id_site) * + FROM sites.r_sites_geom sg + JOIN tmp_auteur_geom s61 ON sg.id = s61.id + LEFT JOIN sites.lots s62 USING (id_lot) + ORDER BY id_site, "date" DESC, date_insert DESC + ) s6 ON s6.id_site = s.id + LEFT JOIN ps.v_ps_sitehab_cross hab ON s.id = hab.id_site + LEFT JOIN ps.v_ps_params_cross param ON s.id = param.id_site + ----LEFT JOIN (sites.r_sites_geom s6 + ---- LEFT JOIN tmp_auteur_geom s61 ON s6.id = s61.id + ---- LEFT JOIN ps.v_ps_sitehab_cross hab ON s6.id = hab.id_geom_site + ---- LEFT JOIN ps.v_ps_params_cross param ON s6.id = param.id_geom_site + ----) ON s6.id_site = s.id + --LEFT JOIN ps.r_infeq_1200m s7 ON s.id = s7.id_site +WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Pelouses sèches'::text + --AND s3.id = 2 + --AND s6.date = (SELECT MAX(date) FROM sites.r_sites_geom WHERE id_site = s.id) + --AND s6.date_insert = ( + -- SELECT MAX(date_insert) FROM sites.r_sites_geom WHERE id_site = s.id AND date = ( + -- SELECT MAX(date) FROM sites.r_sites_geom WHERE id_site = s.id)) + --AND s7.infeq_1200 IS True +ORDER BY s.id, s6."date" DESC NULLS LAST +;""" +grant = """ +GRANT ALL ON TABLE ps.v_pelouseseches_all TO grp_admin; +GRANT SELECT ON TABLE ps.v_pelouseseches_all TO grp_consult; +""" +comment = """ +COMMENT ON VIEW ps.v_pelouseseches_all IS 'Vue des sites à pelouses sèches et milieux thermophiles'; +COMMENT ON COLUMN ps.v_pelouseseches_all.site_code IS 'Identifiant de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.nom IS 'Nom de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.district_nat IS 'District naturel sur lequel est positionner la majeur partie de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.source IS 'Nom de la couche source de la pelouse sèche au sein du CEN Isère.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.id_origine IS 'Identifiant d''origine de la pelouse sèche au sein de la couche source.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.auteur_site IS 'Auteur ayant caractérisé la pelouse sèche pour la première fois.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.auteur_geom IS 'Auteur ayant définis la géometrie actuelle de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.auteur_attrs IS 'Auteur le plus récent ayant défini les attributs de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.date_site IS 'Date de la première description de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.date_geom IS 'Date de la géométrie.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.date_attrs IS 'Date des attributs les plus récents.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.type_milieu IS 'Caractérisation du milieu.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.type_site IS ''; +COMMENT ON COLUMN ps.v_pelouseseches_all.rmq_site IS 'Remarques générale concernant la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.rmq_fct_majeur IS 'Remarques sur les fonctions majeurs de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.rmq_interet_patri IS 'Remarques sur les intérêts patrimoniaux de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.rmq_bilan_menace IS 'Remarques globales sur les menaces qui concernent la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.rmq_orient_act IS 'Remarques sur les orientations et les actes de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.rmq_usage_process IS 'Remarques concernant les usages et les processus naturels de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.statut IS 'Statut (Communautaire, Prioritaire ou non défini) de la pelouse sèche définit suivant la Directive Habitat.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.pratique IS 'Pratique agricole réalisée sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.recouvrement IS 'Niveau de recouvrement du sol herbacé par rapport au sol nu.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.embrouss IS 'Niveau d''embroussaillement de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.tx_embrouss IS 'Taux d''embroussaillement de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.n_hab1 IS 'Pourcentage de présence de l''habitat 1 sur 4 de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.cb_hab1 IS 'Identifiants CORINE Biotopes constituant l''habitat 1 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.lb_hab1 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 1 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.n_hab2 IS 'Pourcentage de présence de l''habitat 2 sur 4 de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.cb_hab2 IS 'Identifiants CORINE Biotopes constituant l''habitat 2 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.lb_hab2 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 2 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.n_hab3 IS 'Pourcentage de présence de l''habitat tertiaire sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.cb_hab3 IS 'Identifiants CORINE Biotopes constituant l''habitat 3 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.lb_hab3 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 3 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.n_hab4 IS 'Pourcentage de présence de l''habitat majoritaire sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.cb_hab4 IS 'Identifiants CORINE Biotopes constituant l''habitat 4 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_all.lb_hab4 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 4 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +""" +with con.begin() as cnx: + cnx.execute(v_pelouseseches_all) + cnx.execute(grant) + cnx.execute(comment) + +v_termophile = (v_pelouseseches_all + .replace('v_pelouseseches_all','v_termophile') + .replace('Pelouses sèches','Milieux thermophile')) +grant = grant.replace('v_pelouseseches_all','v_termophile') +comment = (comment + .replace('v_pelouseseches_all','v_termophile') + .replace('de la pelouse sèche','du milieux thermophile')) +with con.begin() as cnx: + cnx.execute(v_termophile) + cnx.execute(grant) + cnx.execute(comment) + + + +v_pelouseseches = """ +CREATE OR REPLACE VIEW ps.v_pelouseseches +AS +SELECT + * +FROM ps.v_pelouseseches_all s + LEFT JOIN ps.r_infeq_1200m s7 ON s.site_code = s7.id_site +WHERE + s7.infeq_1200 IS True +ORDER BY s.site_code DESC NULLS LAST +""" +grant = """ +GRANT ALL ON TABLE ps.v_pelouseseches TO grp_admin; +GRANT SELECT ON TABLE ps.v_pelouseseches TO grp_consult; +""" +comment = """ +COMMENT ON VIEW ps.v_pelouseseches IS 'Vue des sites à pelouses sèches strictes, situées à niveau ou sous 1200 mètre'; +COMMENT ON COLUMN ps.v_pelouseseches.site_code IS 'Identifiant de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.nom IS 'Nom de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.district_nat IS 'District naturel sur lequel est positionner la majeur partie de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.source IS 'Nom de la couche source de la pelouse sèche au sein du CEN Isère.'; +COMMENT ON COLUMN ps.v_pelouseseches.id_origine IS 'Identifiant d''origine de la pelouse sèche au sein de la couche source.'; +COMMENT ON COLUMN ps.v_pelouseseches.auteur_site IS 'Auteur ayant caractérisé la pelouse sèche pour la première fois.'; +COMMENT ON COLUMN ps.v_pelouseseches.auteur_geom IS 'Auteur ayant définis la géometrie actuelle de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.auteur_attrs IS 'Auteur le plus récent ayant défini les attributs de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.date_site IS 'Date de la première description de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.date_geom IS 'Date de la géométrie.'; +COMMENT ON COLUMN ps.v_pelouseseches.date_attrs IS 'Date des attributs les plus récents.'; +COMMENT ON COLUMN ps.v_pelouseseches.type_milieu IS 'Caractérisation du milieu.'; +COMMENT ON COLUMN ps.v_pelouseseches.type_site IS ''; +COMMENT ON COLUMN ps.v_pelouseseches.rmq_site IS 'Remarques générale concernant la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.rmq_fct_majeur IS 'Remarques sur les fonctions majeurs de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.rmq_interet_patri IS 'Remarques sur les intérêts patrimoniaux de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.rmq_bilan_menace IS 'Remarques globales sur les menaces qui concernent la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.rmq_orient_act IS 'Remarques sur les orientations et les actes de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.rmq_usage_process IS 'Remarques concernant les usages et les processus naturels de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.statut IS 'Statut (Communautaire, Prioritaire ou non défini) de la pelouse sèche définit suivant la Directive Habitat.'; +COMMENT ON COLUMN ps.v_pelouseseches.pratique IS 'Pratique agricole réalisée sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.recouvrement IS 'Niveau de recouvrement du sol herbacé par rapport au sol nu.'; +COMMENT ON COLUMN ps.v_pelouseseches.embrouss IS 'Niveau d''embroussaillement de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.tx_embrouss IS 'Taux d''embroussaillement de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.n_hab1 IS 'Pourcentage de présence de l''habitat 1 sur 4 de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.cb_hab1 IS 'Identifiants CORINE Biotopes constituant l''habitat 1 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches.lb_hab1 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 1 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches.n_hab2 IS 'Pourcentage de présence de l''habitat 2 sur 4 de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.cb_hab2 IS 'Identifiants CORINE Biotopes constituant l''habitat 2 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches.lb_hab2 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 2 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches.n_hab3 IS 'Pourcentage de présence de l''habitat tertiaire sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.cb_hab3 IS 'Identifiants CORINE Biotopes constituant l''habitat 3 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches.lb_hab3 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 3 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches.n_hab4 IS 'Pourcentage de présence de l''habitat majoritaire sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches.cb_hab4 IS 'Identifiants CORINE Biotopes constituant l''habitat 4 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches.lb_hab4 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 4 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +""" +with con.begin() as cnx: + cnx.execute(v_pelouseseches) + cnx.execute(grant) + cnx.execute(comment) + + + +v_pelouseseches_sup1200 = """ +CREATE OR REPLACE VIEW ps.v_pelouseseches_sup1200 +AS +SELECT + * +FROM ps.v_pelouseseches_all s + LEFT JOIN ps.r_infeq_1200m s7 ON s.site_code = s7.id_site +WHERE + s7.infeq_1200 IS False +ORDER BY s.site_code DESC NULLS LAST +""" +grant = """ +GRANT ALL ON TABLE ps.v_pelouseseches_sup1200 TO grp_admin; +GRANT SELECT ON TABLE ps.v_pelouseseches_sup1200 TO grp_consult; +""" +comment = """ +COMMENT ON VIEW ps.v_pelouseseches IS 'Vue des sites à pelouses sèches strictes, situées au dessus de 1200 mètre'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.site_code IS 'Identifiant de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.nom IS 'Nom de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.district_nat IS 'District naturel sur lequel est positionner la majeur partie de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.source IS 'Nom de la couche source de la pelouse sèche au sein du CEN Isère.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.id_origine IS 'Identifiant d''origine de la pelouse sèche au sein de la couche source.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.auteur_site IS 'Auteur ayant caractérisé la pelouse sèche pour la première fois.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.auteur_geom IS 'Auteur ayant définis la géometrie actuelle de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.auteur_attrs IS 'Auteur le plus récent ayant défini les attributs de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.date_site IS 'Date de la première description de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.date_geom IS 'Date de la géométrie.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.date_attrs IS 'Date des attributs les plus récents.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.type_milieu IS 'Caractérisation du milieu.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.type_site IS ''; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.rmq_site IS 'Remarques générale concernant la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.rmq_fct_majeur IS 'Remarques sur les fonctions majeurs de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.rmq_interet_patri IS 'Remarques sur les intérêts patrimoniaux de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.rmq_bilan_menace IS 'Remarques globales sur les menaces qui concernent la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.rmq_orient_act IS 'Remarques sur les orientations et les actes de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.rmq_usage_process IS 'Remarques concernant les usages et les processus naturels de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.statut IS 'Statut (Communautaire, Prioritaire ou non défini) de la pelouse sèche définit suivant la Directive Habitat.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.pratique IS 'Pratique agricole réalisée sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.recouvrement IS 'Niveau de recouvrement du sol herbacé par rapport au sol nu.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.embrouss IS 'Niveau d''embroussaillement de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.tx_embrouss IS 'Taux d''embroussaillement de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.n_hab1 IS 'Pourcentage de présence de l''habitat 1 sur 4 de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.cb_hab1 IS 'Identifiants CORINE Biotopes constituant l''habitat 1 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.lb_hab1 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 1 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.n_hab2 IS 'Pourcentage de présence de l''habitat 2 sur 4 de la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.cb_hab2 IS 'Identifiants CORINE Biotopes constituant l''habitat 2 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.lb_hab2 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 2 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.n_hab3 IS 'Pourcentage de présence de l''habitat tertiaire sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.cb_hab3 IS 'Identifiants CORINE Biotopes constituant l''habitat 3 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.lb_hab3 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 3 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.n_hab4 IS 'Pourcentage de présence de l''habitat majoritaire sur la pelouse sèche.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.cb_hab4 IS 'Identifiants CORINE Biotopes constituant l''habitat 4 sur 4 de la pelouse sèche.\n- & : Entremêlement d''habitat\n- / : Habitat en transition dans le sens de lecture\n- (..) : Habitat suspecté.'; +COMMENT ON COLUMN ps.v_pelouseseches_sup1200.lb_hab4 IS 'Libellé(s) CORINE Biotopes constituant(s) l''habitat 4 sur 4 de la pelouse sèche. Chaque élément de la liste est séparé par un '';''.'; +""" +with con.begin() as cnx: + cnx.execute(v_pelouseseches_sup1200) + cnx.execute(grant) + cnx.execute(comment) + + +v_pelouseseches_noalti = """ +CREATE OR REPLACE VIEW ps.v_pelouseseches_noalti +AS +SELECT + * +FROM ps.v_pelouseseches_all s + LEFT JOIN ps.r_infeq_1200m s7 ON s.site_code = s7.id_site +WHERE + s7.infeq_1200 IS NULL +ORDER BY s.site_code DESC NULLS LAST +""" +grant = """ +GRANT ALL ON TABLE ps.v_pelouseseches_noalti TO grp_admin; +""" +comment = """ +COMMENT ON VIEW ps.v_pelouseseches_noalti IS 'Vue des sites à pelouses sèches strictes, ne possèdant pas de critères ''infeq_1200'''; +""" +with con.begin() as cnx: + cnx.execute(v_pelouseseches_noalti) + cnx.execute(grant) + cnx.execute(comment) + +# test = """ +# SELECT * FROM ps.v_pelouseseches_all; +# """ +# data = gpd.read_postgis( +# sql = test, +# con = con) diff --git a/3_AZALEE/create_view_ref.py b/3_AZALEE/create_view_ref.py new file mode 100644 index 0000000..198a6cf --- /dev/null +++ b/3_AZALEE/create_view_ref.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +v_rpg2021_ilots_anonymes_isere = """ +DROP VIEW IF EXISTS ref_territoire.v_rpg2021_ilots_anonymes_isere; +CREATE OR REPLACE VIEW ref_territoire.v_rpg2021_ilots_anonymes_isere +AS SELECT --s.id, + s.* + FROM ref_territoire.rpg2021_ilots_anonymes_reg s, + ref_territoire.dept_isere reg + WHERE st_intersects(s.geom, reg.geom); +""" +grant_rpg2021_ilots = """ +ALTER TABLE ref_territoire.v_rpg2021_ilots_anonymes_isere OWNER TO cen_admin; +GRANT ALL ON TABLE ref_territoire.v_rpg2021_ilots_anonymes_isere TO cen_admin; +GRANT ALL ON TABLE ref_territoire.v_rpg2021_ilots_anonymes_isere TO grp_admin; +GRANT SELECT ON TABLE ref_territoire.v_rpg2021_ilots_anonymes_isere TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_rpg2021_ilots_anonymes_isere) + cnx.execute(grant_rpg2021_ilots) + + +v_rpg2021_parcelles_graphiques_isere = (v_rpg2021_ilots_anonymes_isere + .replace('v_rpg2021_ilots_anonymes_isere','v_rpg2021_parcelles_graphiques_isere') + .replace('rpg2021_ilots_anonymes_reg','rpg2021_parcelles_graphiques_reg')) +grant = grant_rpg2021_ilots.replace('v_rpg2021_ilots_anonymes_isere','v_rpg2021_parcelles_graphiques_isere') +with con.begin() as cnx: + cnx.execute(v_rpg2021_parcelles_graphiques_isere) + cnx.execute(grant) + + +v_rpg2020_ilots_anonymes_isere = v_rpg2021_ilots_anonymes_isere.replace('2021','2020') +grant = grant_rpg2021_ilots.replace('2021','2020') +with con.begin() as cnx: + cnx.execute(v_rpg2020_ilots_anonymes_isere) + cnx.execute(grant) + + +v_rpg2020_parcelles_graphiques_isere = v_rpg2021_ilots_anonymes_isere.replace('v_rpg2021_ilots_anonymes_isere','v_rpg2020_parcelles_graphiques_isere') +grant = grant_rpg2021_ilots.replace('v_rpg2021_ilots_anonymes_isere','v_rpg2020_parcelles_graphiques_isere') +with con.begin() as cnx: + cnx.execute(v_rpg2020_parcelles_graphiques_isere) + cnx.execute(grant) + + + diff --git a/3_AZALEE/create_view_site.py b/3_AZALEE/create_view_site.py new file mode 100644 index 0000000..9d573eb --- /dev/null +++ b/3_AZALEE/create_view_site.py @@ -0,0 +1,76 @@ +from pycen import con +from sqlalchemy import text + +v_sites = ''' +DROP VIEW IF EXISTS sites.v_sites; +CREATE OR REPLACE VIEW sites.v_sites +AS +WITH tmp_auteur_site as ( + SELECT + s.id, + string_agg(s11.auteur,';' ORDER BY s11.auteur) AS auteur_site + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 + JOIN personnes.v_personne s11 ON s1.id_auteur = s11.id + ) ON s.id::text = s1.id_site::text + WHERE s.date_fin IS NULL + GROUP BY s.id + ORDER BY s.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(s611.auteur,';' ORDER BY s611.auteur) AS auteur_geom + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 + JOIN personnes.v_personne s611 ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s.id = s6.id_site + WHERE s.date_fin IS NULL + GROUP BY s6.id + ORDER BY s6.id +) +SELECT + g.id id_geom_site, + s.id code_site, + s.nom, + s1.auteur_site, + g1.auteur_geom, + s.date_deb date, + s.date_fin, + s.id_old_site, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s5.id||' - '||s5.nom AS typo_sdage, + s.dept, + s.org, + s.num, + s.remarques, + s.nom_com, + s.nom_com_json, + g.geom, + g.date date_geom, + g.link_pdf, + g.rmq_fct_majeur, + g.rmq_interet_patri, + g.rmq_bilan_menace, + g.rmq_orient_act, + g.rmq_usage_process, + g2.libelle lot, + g.id_origine, + g.date_insert +FROM sites.sites s + LEFT JOIN tmp_auteur_site s1 ON s.id = s1.id + LEFT JOIN sites.autre_nom s2 ON s.id = s2.id_site + LEFT JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s.id_type_site::text = s4.id::text + LEFT JOIN sites.typo_sdage s5 ON s.id_typo_sdage::text = s5.id::text + LEFT JOIN sites.r_sites_geom g ON s.id = g.id_site + LEFT JOIN tmp_auteur_geom g1 ON g.id = g1.id + LEFT JOIN sites.lots g2 USING (id_lot) +; +''' +with con.begin() as cnx: + cnx.execute(text(v_sites)) \ No newline at end of file diff --git a/3_AZALEE/create_view_zh.py b/3_AZALEE/create_view_zh.py new file mode 100644 index 0000000..eb17bef --- /dev/null +++ b/3_AZALEE/create_view_zh.py @@ -0,0 +1,567 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +drop_v_zh = 'DROP VIEW IF EXISTS zones_humides.v_zoneshumides CASCADE;' +with con.begin() as cnx: + cnx.execute(drop_v_zh) + + +v_zh_hab = """ +DROP VIEW IF EXISTS zones_humides.v_zh_hab; +CREATE OR REPLACE VIEW zones_humides.v_zh_hab +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_sitehab) + c.id_sitehab, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitehab_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitehab + ORDER BY 1 +) +SELECT DISTINCT ON (a.id_geom_site) + a.id_geom_site, + c.auteur, + MAX(a.date) date, + string_agg(a.id_cb,';') code_cb, + string_agg(b.lb_hab_fr,';') lib_cb +FROM zones_humides.r_site_habitat a + JOIN ref_habitats.corine_biotope b ON a.id_cb = b.id + JOIN auteur c ON c.id_sitehab = a.id +WHERE a."valid" +GROUP BY 1,2 +ORDER BY a.id_geom_site +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_hab TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_hab TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_zh_hab)) + cnx.execute(grant) + + +v_zh_usgprocess = """ +DROP VIEW IF EXISTS zones_humides.v_zh_usgprocess; +CREATE OR REPLACE VIEW zones_humides.v_zh_usgprocess +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_siteusage) + c.id_siteusage, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsiteusage_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_siteusage + ORDER BY 1 +), usgprocess as ( + SELECT distinct on (a.id_geom_site,a1.nom,a2.nom) + a.id_geom_site, + c.auteur, + MAX(a.date) date, + a1.nom activ_hum, + a.activ_hum_autre, + a2.nom impact, + a3.nom "position", + a.remarques rmq_activ_hum + FROM zones_humides.r_site_usageprocess a + JOIN zones_humides.param_activ_hum a1 ON a.id_activ_hum = a1.id + JOIN zones_humides.param_impact a2 ON a.id_impact = a2.id + JOIN zones_humides.param_position a3 ON a.id_position = a3.id + JOIN auteur c ON c.id_siteusage = a.id + WHERE a."valid" + --AND date = a.date + GROUP BY a.id_geom_site,c.auteur,a1.nom,a.activ_hum_autre,a2.nom,a3.nom,a.remarques + ORDER BY 1 + ) +SELECT DISTINCT ON (id_geom_site) + id_geom_site, + auteur, + MAX(date) date, + string_agg( + CASE WHEN activ_hum = 'Autre (préciser dans l''encart réservé aux remarques)' THEN activ_hum_autre + ELSE activ_hum END,';') activ_hum, + string_agg( + CASE WHEN impact = 'Autre (préciser dans l''encart réservé aux remarques)' THEN rmq_activ_hum + ELSE impact END,';') impact, + string_agg("position",';') "position", + string_agg( + CASE WHEN impact = 'Autre (préciser dans l''encart réservé aux remarques)' THEN NULL + ELSE rmq_activ_hum END,';') rmq_activ_hum +FROM usgprocess +GROUP BY 1,2 +ORDER BY 1 +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_usgprocess TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_usgprocess TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_usgprocess) + cnx.execute(grant) + + +v_zh_connex = """ +DROP VIEW IF EXISTS zones_humides.v_zh_connex; +CREATE OR REPLACE VIEW zones_humides.v_zh_connex +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_siteconnect) + c.id_siteconnect, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsiteconnect_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_siteconnect + ORDER BY 1 +) +SELECT + a.id_geom_site, + c.auteur, + a.date, + b.nom connexion +FROM zones_humides.r_site_type_connect a + JOIN zones_humides.param_type_connect b ON a.id_param_connect = b.id + JOIN auteur c ON c.id_siteconnect = a.id +WHERE a."valid" + AND a.date = (SELECT MAX(date) FROM zones_humides.r_site_type_connect WHERE id_geom_site = a.id_geom_site) +ORDER BY 1 +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_connex TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_connex TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_connex) + cnx.execute(grant) + + +v_zh_submertion = """ +DROP VIEW IF EXISTS zones_humides.v_zh_submertion; +CREATE OR REPLACE VIEW zones_humides.v_zh_submertion +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_sitesub) + c.id_sitesub, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitesub_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitesub + ORDER BY 1 +) +SELECT + a.id_geom_site, + c.auteur, + a.date, + a1.nom subm_freq, + a2.nom subm_etend, + a.id_origsub subm_orig +FROM zones_humides.r_site_sub a + JOIN (zones_humides.param_sub a1 JOIN zones_humides.type_param_sub s6411 ON s6411.id = a1.id_type) ON a.id_freqsub = a1.id + JOIN (zones_humides.param_sub a2 JOIN zones_humides.type_param_sub s6421 ON s6421.id = a2.id_type) ON a.id_etendsub = a2.id + JOIN auteur c ON c.id_sitesub = a.id +WHERE a."valid" + AND a.date = (SELECT MAX(date) FROM zones_humides.r_site_sub WHERE id_geom_site = a.id_geom_site) + AND s6411.nom = 'Submersion fréquente' + AND s6421.nom = 'Submersion étendue' +ORDER BY 1 +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_submertion TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_submertion TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_submertion) + cnx.execute(grant) + + +v_zh_fctecosociopatri_cross = """ +DROP VIEW IF EXISTS zones_humides.v_zh_fctecosociopatri_cross; +CREATE OR REPLACE VIEW zones_humides.v_zh_fctecosociopatri_cross +AS +SELECT + id_geom_site, + auteur, + MAX(date) date, + fct_bio, + fct_hydro, + int_patri, + "val_socioEco" +FROM crosstab( + $$WITH auteur AS ( + SELECT DISTINCT ON (id_sitefct) + c.id_sitefct, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitefct_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitefct + ORDER BY 1 + ) + SELECT + a.id_geom_site::bigint, + d.auteur, + MAX(a.date) date, + c.nom_court type_param, + string_agg( + CASE WHEN a.description IS NULL THEN b.nom + WHEN a.description = '' THEN b.nom + ELSE CONCAT(b.nom,' (',a.description,')') END, + ';') fct + FROM zones_humides.r_site_fctecosociopatri a + JOIN (zones_humides.param_fct_eco_socio_patri b + JOIN zones_humides.type_param_fct c ON b.id_type = c.id + ) ON a.id_fct = b.id + JOIN auteur d ON d.id_sitefct = a.id + WHERE a."valid" + GROUP BY a.id_geom_site,d.auteur,c.nom_court + ORDER BY 1,4;$$, + $$SELECT DISTINCT nom_court FROM zones_humides.type_param_fct ORDER BY 1 ASC;$$ +) AS ct ( + "id_geom_site" bigint, + "auteur" text, + "date" date, + "fct_bio" text, + "fct_hydro" text, + "int_patri" text, + "val_socioEco" text) +GROUP BY id_geom_site,auteur,fct_bio,fct_hydro,int_patri,"val_socioEco" +ORDER BY 1,3,2; +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_fctecosociopatri_cross TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_fctecosociopatri_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_fctecosociopatri_cross) + cnx.execute(grant) + + +v_zh_critdelim_cross = """ +DROP VIEW IF EXISTS zones_humides.v_zh_critdelim_cross; +CREATE OR REPLACE VIEW zones_humides.v_zh_critdelim_cross +AS +SELECT DISTINCT ON (id_geom_site) + id_geom_site, + auteur, + MAX(date) date, + crit_delim, + crit_def_esp +FROM crosstab( + $$WITH auteur AS ( + SELECT DISTINCT ON (id_sitedelim) + c.id_sitedelim, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitedelim_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitedelim + ORDER BY 1 + ) + SELECT + a.id_geom_site::bigint, + d.auteur, + MAX(a.date) date, + c.nom_court type_param, + string_agg( + CASE WHEN a.description IS NULL THEN b.nom + WHEN a.description = '' THEN b.nom + ELSE CONCAT(b.nom,' (',a.description,')') END, + ';') fct + FROM zones_humides.r_site_critdelim a + JOIN (zones_humides.param_delim_fct b + JOIN zones_humides.type_param_delim_fct c ON b.id_type = c.id + ) ON a.id_crit_delim = b.id + JOIN auteur d ON d.id_sitedelim = a.id + WHERE a."valid" + GROUP BY a.id_geom_site,d.auteur,c.nom_court + ORDER BY 1,2,3$$, + $$SELECT DISTINCT nom_court FROM zones_humides.type_param_delim_fct ORDER BY 1 DESC;$$ +) AS ct ( + "id_geom_site" bigint, + "auteur" text, + "date" date, + "crit_delim" text, + "crit_def_esp" text) +GROUP BY id_geom_site,auteur,crit_delim,crit_def_esp +ORDER BY 1,3,2; +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_critdelim_cross TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_critdelim_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_critdelim_cross) + cnx.execute(grant) + + +v_zh_reghydro_cross = """ +DROP VIEW IF EXISTS zones_humides.v_zh_reghydro_cross; +CREATE OR REPLACE VIEW zones_humides.v_zh_reghydro_cross +AS +SELECT DISTINCT ON (id_geom_site) + id_geom_site, + auteur, + MAX(date) date, + (string_to_array(ct.entree_eau,'//'))[1] entree_eau_reg, + (string_to_array(ct.entree_eau,'//'))[2] entree_eau_perm, + (string_to_array(ct.entree_eau,'//'))[3] entree_eau_topo, + (string_to_array(ct.sortie_eau,'//'))[1] sortie_eau_reg, + (string_to_array(ct.sortie_eau,'//'))[2] sortie_eau_perm, + (string_to_array(ct.sortie_eau,'//'))[3] sortie_eau_topo +FROM crosstab( + $$WITH auteur AS ( + SELECT DISTINCT ON (id_sitehydro) + c.id_sitehydro, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitehydro_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitehydro + ORDER BY 1 + ) + SELECT + a.id_geom_site, + d.auteur, + MAX(a.date) date, + a.in_out, + CONCAT( + string_agg(a1.nom,';'),'//', -- reg_hydro + string_agg(a2.nom,';'),'//', -- permanence + string_agg(a.rmq_toponymie,';') -- rmq_toponymie + ) hydro + FROM zones_humides.r_site_reghydro a + LEFT JOIN zones_humides.param_reg_hydro a1 ON a.id_reg_hydro = a1.id + LEFT JOIN zones_humides.param_permanence a2 ON a.id_permanence = a2.id + JOIN auteur d ON d.id_sitehydro = a.id + WHERE a."valid" + GROUP BY a.id_geom_site,d.auteur,a.in_out + ORDER BY 1,2$$, + $$SELECT DISTINCT in_out FROM zones_humides.r_site_reghydro ORDER BY 1 DESC;$$ +) AS ct ( + "id_geom_site" bigint, + "auteur" text, + "date" date, + "entree_eau" text, + "sortie_eau" text) +GROUP BY id_geom_site,auteur,entree_eau,sortie_eau +ORDER BY 1,3,2; +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_reghydro_cross TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_reghydro_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_reghydro_cross) + cnx.execute(grant) + + + +v_zoneshumides = """ +DROP VIEW IF EXISTS zones_humides.v_zoneshumides CASCADE; +CREATE OR REPLACE VIEW zones_humides.v_zoneshumides +AS +WITH tmp_auteur_site as ( + SELECT + s.id, + string_agg(s11.auteur,';' ORDER BY s11.auteur) AS auteur_site + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 + JOIN personnes.v_personne s11 ON s1.id_auteur = s11.id + ) ON s.id::text = s1.id_site::text + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s.id + ORDER BY s.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(s611.auteur,';' ORDER BY s611.auteur) AS auteur_geom + FROM sites.sites s + JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 + JOIN personnes.v_personne s611 ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s.id = s6.id_site + WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s6.id + ORDER BY s6.id +) +SELECT + s.id AS site_code, + (SELECT ARRAY_AGG(id_site_old) FROM (values (s7.id_site_old)) AS value(id_site_old) WHERE id_site_old IS NOT NULL) old_code, + s.nom, + s2.autre_nom, + s1.auteur_site, + s61.auteur_geom, + ( + SELECT DISTINCT ON (date) auteur + FROM (values (s6.date,s61.auteur_geom),(s62.date,s62.auteur),(s63.date,s63.auteur),(s64.date,s64.auteur),(s65.date,s65.auteur), + (s66.date,s66.auteur),(s67.date,s67.auteur),(s68.date,s68.auteur)) AS value(date,auteur) + WHERE date = (SELECT MAX(date) FROM (values (s6.date),(s62.date),(s63.date),(s64.date),(s65.date),(s66.date),(s67.date),(s68.date)) AS value(date)) + ) as auteur_last_maj, + s.date_deb AS date_site, + s6.date AS date_geom, + (SELECT MAX(date) FROM (values (s6.date),(s62.date),(s63.date),(s64.date),(s65.date),(s66.date),(s67.date),(s68.date)) AS value(date)) as date_last_maj, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s5.id||' - '||s5.nom AS typo_sdage, + s5.mnemo mnemo_sdage, + s.remarques AS rmq_site, + s6.rmq_fct_majeur, + s6.rmq_interet_patri, + s6.rmq_bilan_menace, + s6.rmq_orient_act, + s6.rmq_usage_process, + s62.code_cb, + s62.lib_cb, + s63.activ_hum, + s63.impact, + s63."position", + s63.rmq_activ_hum, + s64.connexion, + s65.subm_orig, + s65.subm_freq, + s65.subm_etend, + s66.fct_bio, + s66.fct_hydro, + s66.int_patri, + s66."val_socioEco", + s67.crit_delim, + s67.crit_def_esp, + s68.entree_eau_reg, + s68.entree_eau_perm, + s68.entree_eau_topo, + s68.sortie_eau_reg, + s68.sortie_eau_perm, + s68.sortie_eau_topo, + s6.geom +FROM sites.sites s + LEFT JOIN tmp_auteur_site s1 ON s.id = s1.id + LEFT JOIN sites.autre_nom s2 ON s.id = s2.id_site + LEFT JOIN sites.type_milieu s3 ON s.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s.id_type_site::text = s4.id::text + LEFT JOIN sites.typo_sdage s5 ON s.id_typo_sdage::text = s5.id::text + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN tmp_auteur_geom s61 ON s6.id = s61.id + LEFT JOIN zones_humides.v_zh_hab s62 ON s6.id = s62.id_geom_site + LEFT JOIN zones_humides.v_zh_usgprocess s63 ON s6.id = s63.id_geom_site + LEFT JOIN zones_humides.v_zh_connex s64 ON s6.id = s64.id_geom_site + LEFT JOIN zones_humides.v_zh_submertion s65 ON s6.id = s65.id_geom_site + LEFT JOIN zones_humides.v_zh_fctecosociopatri_cross s66 ON s6.id = s66.id_geom_site + LEFT JOIN zones_humides.v_zh_critdelim_cross s67 ON s6.id = s67.id_geom_site + LEFT JOIN zones_humides.v_zh_reghydro_cross s68 ON s6.id = s68.id_geom_site + ) ON s.id::text = s6.id_site::text + LEFT JOIN sites.hist_id_site s7 ON s.id = s7.id_site_new +WHERE s.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + AND s6.date = (SELECT MAX(date) FROM sites.r_sites_geom WHERE id_site = s.id) +ORDER BY s.id ASC NULLS FIRST; +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zoneshumides TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zoneshumides TO grp_consult; +""" +comment = """ +COMMENT ON COLUMN zones_humides.v_zoneshumides.site_code IS 'Identifiant de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.nom IS 'Nom de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.auteur_site IS 'Auteur ayant caractérisé la zone humide pour la première fois.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.auteur_geom IS 'Auteur ayant définis la géometrie actuelle de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.auteur_last_maj IS 'Auteur le plus récent ayant défini les attributs de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.date_site IS 'Date de la première description de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.date_geom IS 'Date de la géométrie.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.date_last_maj IS 'Date des attributs les plus récents.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.type_milieu IS 'Caractérisation du milieu.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.type_site IS ''; +COMMENT ON COLUMN zones_humides.v_zoneshumides.typo_sdage IS 'Typologie sdage de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_site IS 'Remarques générale concernant la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_fct_majeur IS 'Remarques sur les fonctions majeurs de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_interet_patri IS 'Remarques sur les intérêts patrimoniaux de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_bilan_menace IS 'Remarques globales sur les menaces qui concernent la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_orient_act IS 'Remarques sur les orientations et les actes de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_usage_process IS 'Remarques concernant les usages et les processus naturels de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.code_cb IS 'Liste des codes CORINE Biotopes identifiés sur la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.lib_cb IS 'Liste des habitats CORINE Biotopes identifiés sur la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.activ_hum IS 'Liste des activités humaines identifiées sur la zone humide. Chaque élément de la liste est séparé par un '';'' et est lié à la colonne ''impacts'' et ''position''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.impact IS 'Liste des impacts identifiés et engendrés par les activités humaine sur la zone humide. Chaque élément de la liste est séparé par un '';'' et est lié à la colonne ''activ_hum'' et ''position''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides."position" IS 'Liste des potionnements des différentes activités humaines sur la zone humide. Chaque élément de la liste est séparé par un '';'' et est lié à la colonne ''activ_hum'' et ''impact''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_activ_hum IS 'Remarques concernant activités humaines identifiées sur la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.connexion IS 'Connexion de la zone humide dans son environnement.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.subm_orig IS 'Origine de l''eau submerssive.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.subm_freq IS 'Fréquence de la submersion de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.subm_etend IS 'Etendue de la submersion de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.fct_bio IS 'Liste des fonctions biologiques de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.fct_hydro IS 'Liste des fonctions hydrologiques de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.int_patri IS 'Liste des interêts patrimoniaux de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides."val_socioEco" IS 'Liste des valeurs socio-economiques de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.crit_delim IS 'Liste des critères de délimitation de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.crit_def_esp IS 'Liste des critères de l''espace de fonctionnalité de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.entree_eau_reg IS 'Liste des entrées d''eau du régime hydrique de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.entree_eau_perm IS 'Liste des permanances respectivement de chaque entrée d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.entree_eau_topo IS 'Liste de la toponymie respectivement de chaque entrée d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.sortie_eau_reg IS 'Liste des sorties d''eau du régime hydrique de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.sortie_eau_perm IS 'Liste des permanances respectivement de chaque sortie d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.sortie_eau_topo IS 'Liste de la toponymie respectivement de chaque sortie d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +""" +with con.begin() as cnx: + cnx.execute(text(v_zoneshumides)) + cnx.execute(grant) + cnx.execute(comment) + + +v_rhomeosite = """ +DROP VIEW IF EXISTS zones_humides.v_rhomeosite; +CREATE OR REPLACE VIEW zones_humides.v_rhomeosite +AS +SELECT + v.site_code||' - '||v.nom "NAME", + SPLIT_PART(v.auteur_geom,' (',1) "REFERENT", + REPLACE(SPLIT_PART(v.auteur_geom,' (',2),')','') "ORG", + SPLIT_PART(v.typo_sdage,' - ',1) "TYPE", + CASE WHEN r.nom::text = 'alpin' THEN '1' + WHEN r.nom::text = 'continental' THEN '2' + WHEN r.nom::text = 'mediterraneen' THEN '4' + END "ODONATE", + v.geom +FROM zones_humides.v_zoneshumides v, ref_territoire.ref_biogeo r +WHERE st_intersects(v.geom, r.geom) +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_rhomeosite TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_rhomeosite TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_rhomeosite)) + cnx.execute(grant) + + +test = """ +SELECT * FROM zones_humides.v_zoneshumides; +""" +data = pd.read_sql_query( + sql = text(test), + con = con) + + +data[~data.index.isin(data.drop_duplicates(subset='site_code',keep=False).index)] +data[data.duplicated(subset='site_code',keep=False)] \ No newline at end of file diff --git a/3_AZALEE/create_view_zh2.py b/3_AZALEE/create_view_zh2.py new file mode 100644 index 0000000..4b8662d --- /dev/null +++ b/3_AZALEE/create_view_zh2.py @@ -0,0 +1,592 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +drop_v_zh = 'DROP VIEW IF EXISTS zones_humides.v_zoneshumides CASCADE;' +with con.begin() as cnx: + cnx.execute(drop_v_zh) + + +v_zh_hab = """ +DROP VIEW IF EXISTS zones_humides.v_zh_hab; +CREATE OR REPLACE VIEW zones_humides.v_zh_hab +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_sitehab) + c.id_sitehab, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitehab_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitehab + ORDER BY 1 +) +SELECT DISTINCT ON (a.id_site) + --a.id_geom_site, + a.id_site, + c.auteur, + --MAX(a."date") "date", + a."date", + string_agg(a.id_cb,';' order by a.id_cb) code_cb, + string_agg(b.lb_hab_fr,';' order by a.id_cb) lib_cb +FROM zones_humides.r_site_habitat a + JOIN ref_habitats.corine_biotope b ON a.id_cb = b.id + JOIN auteur c ON c.id_sitehab = a.id +WHERE a."valid" +GROUP BY 1,2,3 +ORDER BY a.id_site, a."date" desc,row_number() OVER (ORDER BY a.id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_hab TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_hab TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_zh_hab)) + cnx.execute(grant) + + +v_zh_usgprocess = """ +DROP VIEW IF EXISTS zones_humides.v_zh_usgprocess; +CREATE OR REPLACE VIEW zones_humides.v_zh_usgprocess +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_siteusage) + c.id_siteusage, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsiteusage_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_siteusage + ORDER BY 1 +), usgprocess as ( + SELECT distinct on (a.id_site,a1.nom,a2.nom) + --a.id_geom_site, + a.id_site, + c.auteur, + a."date", + a1.nom activ_hum, + a.activ_hum_autre, + a2.nom impact, + a3.nom "position", + a.remarques rmq_activ_hum + FROM zones_humides.r_site_usageprocess a + JOIN zones_humides.param_activ_hum a1 ON a.id_activ_hum = a1.id + JOIN zones_humides.param_impact a2 ON a.id_impact = a2.id + JOIN zones_humides.param_position a3 ON a.id_position = a3.id + JOIN auteur c ON c.id_siteusage = a.id + WHERE a."valid" + --AND date = a.date + --GROUP BY a.id_geom_site,c.auteur,a1.nom,a.activ_hum_autre,a2.nom,a3.nom,a.remarques + --GROUP BY a.id_site,c.auteur,a1.nom,a.activ_hum_autre,a2.nom,a3.nom,a.remarques + ORDER BY a.id_site,a1.nom,a2.nom, a."date" desc,row_number() OVER (ORDER BY a.id_site) desc + ) +SELECT DISTINCT ON (id_site) + --id_geom_site, + id_site, + auteur, + "date", + string_agg( + CASE WHEN activ_hum = 'Autre (préciser dans l''encart réservé aux remarques)' THEN activ_hum_autre + ELSE activ_hum END,';') activ_hum, + string_agg( + CASE WHEN impact = 'Autre (préciser dans l''encart réservé aux remarques)' THEN rmq_activ_hum + ELSE impact END,';') impact, + string_agg("position",';') "position", + string_agg( + CASE WHEN impact = 'Autre (préciser dans l''encart réservé aux remarques)' THEN NULL + ELSE rmq_activ_hum END,';') rmq_activ_hum +FROM usgprocess +GROUP BY 1,2,3 +ORDER BY id_site, "date" desc,row_number() OVER (ORDER BY id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_usgprocess TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_usgprocess TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_usgprocess) + cnx.execute(grant) + + +v_zh_connex = """ +DROP VIEW IF EXISTS zones_humides.v_zh_connex; +CREATE OR REPLACE VIEW zones_humides.v_zh_connex +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_siteconnect) + c.id_siteconnect, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsiteconnect_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_siteconnect + ORDER BY 1 +) +SELECT DISTINCT ON (id_site) + --a.id_geom_site, + a.id_site, + c.auteur, + a."date", + b.nom connexion +FROM zones_humides.r_site_type_connect a + JOIN zones_humides.param_type_connect b ON a.id_param_connect = b.id + JOIN auteur c ON c.id_siteconnect = a.id +WHERE a."valid" + --AND a.date = (SELECT MAX(date) FROM zones_humides.r_site_type_connect WHERE id_site = a.id_site) +ORDER BY a.id_site,"date" desc,row_number() OVER (ORDER BY a.id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_connex TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_connex TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_connex) + cnx.execute(grant) + + +v_zh_submertion = """ +DROP VIEW IF EXISTS zones_humides.v_zh_submertion; +CREATE OR REPLACE VIEW zones_humides.v_zh_submertion +AS +WITH auteur AS ( + SELECT DISTINCT ON (id_sitesub) + c.id_sitesub, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitesub_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitesub + ORDER BY 1 +) +SELECT DISTINCT ON (a.id_site) + --a.id_geom_site, + a.id_site, + c.auteur, + a."date", + a1.nom subm_freq, + a2.nom subm_etend, + a.id_origsub subm_orig +FROM zones_humides.r_site_sub a + JOIN (zones_humides.param_sub a1 JOIN zones_humides.type_param_sub s6411 ON s6411.id = a1.id_type) ON a.id_freqsub = a1.id + JOIN (zones_humides.param_sub a2 JOIN zones_humides.type_param_sub s6421 ON s6421.id = a2.id_type) ON a.id_etendsub = a2.id + JOIN auteur c ON c.id_sitesub = a.id +WHERE a."valid" + --AND a.date = (SELECT MAX(date) FROM zones_humides.r_site_sub WHERE id_site = a.id_site) + AND s6411.nom = 'Submersion fréquente' + AND s6421.nom = 'Submersion étendue' +ORDER BY a.id_site,"date" desc,row_number() OVER (ORDER BY a.id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_submertion TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_submertion TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_submertion) + cnx.execute(grant) + + +v_zh_fctecosociopatri_cross = """ +DROP VIEW IF EXISTS zones_humides.v_zh_fctecosociopatri_cross; +CREATE OR REPLACE VIEW zones_humides.v_zh_fctecosociopatri_cross +AS +SELECT DISTINCT ON (id_site) + --id_geom_site, + id_site, + auteur, + "date", + fct_bio, + fct_hydro, + int_patri, + "val_socioEco" +FROM crosstab( + $$WITH auteur AS ( + SELECT DISTINCT ON (id_sitefct) + c.id_sitefct, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitefct_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitefct + ORDER BY 1 + ) + SELECT + --a.id_geom_site::bigint, + id_site, + d.auteur, + MAX(a.date) date, + c.nom_court type_param, + string_agg( + CASE WHEN a.description IS NULL THEN b.nom + WHEN a.description = '' THEN b.nom + ELSE CONCAT(b.nom,' (',a.description,')') END, + ';') fct + FROM zones_humides.r_site_fctecosociopatri a + JOIN (zones_humides.param_fct_eco_socio_patri b + JOIN zones_humides.type_param_fct c ON b.id_type = c.id + ) ON a.id_fct = b.id + JOIN auteur d ON d.id_sitefct = a.id + WHERE a."valid" + GROUP BY a.id_site,d.auteur,c.nom_court + ORDER BY 1,4;$$, + $$SELECT DISTINCT nom_court FROM zones_humides.type_param_fct ORDER BY 1 ASC;$$ +) AS ct ( + "id_site" text, + "auteur" text, + "date" date, + "fct_bio" text, + "fct_hydro" text, + "int_patri" text, + "val_socioEco" text) +--GROUP BY id_site,auteur,fct_bio,fct_hydro,int_patri,"val_socioEco" +ORDER BY id_site,"date" desc,row_number() OVER (ORDER BY id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_fctecosociopatri_cross TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_fctecosociopatri_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_fctecosociopatri_cross) + cnx.execute(grant) + + +v_zh_critdelim_cross = """ +DROP VIEW IF EXISTS zones_humides.v_zh_critdelim_cross; +CREATE OR REPLACE VIEW zones_humides.v_zh_critdelim_cross +AS +SELECT DISTINCT ON (id_site) + --id_geom_site, + id_site, + auteur, + "date", + crit_delim, + crit_def_esp +FROM crosstab( + $$WITH auteur AS ( + SELECT DISTINCT ON (id_sitedelim) + c.id_sitedelim, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitedelim_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitedelim + ORDER BY 1 + ) + SELECT + --a.id_geom_site::bigint, + id_site, + d.auteur, + MAX(a.date) date, + c.nom_court type_param, + string_agg( + CASE WHEN a.description IS NULL THEN b.nom + WHEN a.description = '' THEN b.nom + ELSE CONCAT(b.nom,' (',a.description,')') END, + ';') fct + FROM zones_humides.r_site_critdelim a + JOIN (zones_humides.param_delim_fct b + JOIN zones_humides.type_param_delim_fct c ON b.id_type = c.id + ) ON a.id_crit_delim = b.id + JOIN auteur d ON d.id_sitedelim = a.id + WHERE a."valid" + GROUP BY a.id_site,d.auteur,c.nom_court + ORDER BY 1,2,3$$, + $$SELECT DISTINCT nom_court FROM zones_humides.type_param_delim_fct ORDER BY 1 DESC;$$ +) AS ct ( + "id_site" text, + "auteur" text, + "date" date, + "crit_delim" text, + "crit_def_esp" text) +--GROUP BY id_site,auteur,crit_delim,crit_def_esp +ORDER BY id_site,"date" desc,row_number() OVER (ORDER BY id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_critdelim_cross TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_critdelim_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_critdelim_cross) + cnx.execute(grant) + + +v_zh_reghydro_cross = """ +DROP VIEW IF EXISTS zones_humides.v_zh_reghydro_cross; +CREATE OR REPLACE VIEW zones_humides.v_zh_reghydro_cross +AS +SELECT DISTINCT ON (id_site) + --id_geom_site, + id_site, + auteur, + "date", + (string_to_array(ct.entree_eau,'//'))[1] entree_eau_reg, + (string_to_array(ct.entree_eau,'//'))[2] entree_eau_perm, + (string_to_array(ct.entree_eau,'//'))[3] entree_eau_topo, + (string_to_array(ct.sortie_eau,'//'))[1] sortie_eau_reg, + (string_to_array(ct.sortie_eau,'//'))[2] sortie_eau_perm, + (string_to_array(ct.sortie_eau,'//'))[3] sortie_eau_topo +FROM crosstab( + $$WITH auteur AS ( + SELECT DISTINCT ON (id_sitehydro) + c.id_sitehydro, + string_agg(c1.auteur,';' ORDER BY c1.auteur) auteur + FROM zones_humides.r_rsitehydro_auteur c + JOIN personnes.v_personne c1 ON c1.id = c.id_auteur + GROUP BY c.id_sitehydro + ORDER BY 1 + ) + SELECT + --a.id_geom_site, + id_site, + d.auteur, + MAX(a.date) date, + a.in_out, + CONCAT( + string_agg(a1.nom,';'),'//', -- reg_hydro + string_agg(a2.nom,';'),'//', -- permanence + string_agg(a.rmq_toponymie,';') -- rmq_toponymie + ) hydro + FROM zones_humides.r_site_reghydro a + LEFT JOIN zones_humides.param_reg_hydro a1 ON a.id_reg_hydro = a1.id + LEFT JOIN zones_humides.param_permanence a2 ON a.id_permanence = a2.id + JOIN auteur d ON d.id_sitehydro = a.id + WHERE a."valid" + GROUP BY a.id_site,d.auteur,a.in_out + ORDER BY 1,2$$, + $$SELECT DISTINCT in_out FROM zones_humides.r_site_reghydro ORDER BY 1 DESC;$$ +) AS ct ( + "id_site" text, + "auteur" text, + "date" date, + "entree_eau" text, + "sortie_eau" text) +--GROUP BY id_site,auteur,entree_eau,sortie_eau +ORDER BY id_site,"date" desc,row_number() OVER (ORDER BY id_site) desc +;""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zh_reghydro_cross TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zh_reghydro_cross TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(v_zh_reghydro_cross) + cnx.execute(grant) + + +v_zoneshumides = """ +DROP VIEW IF EXISTS zones_humides.v_zoneshumides CASCADE; +CREATE OR REPLACE VIEW zones_humides.v_zoneshumides +AS +WITH tmp_auteur_site as ( + SELECT + s0.id, + string_agg(s11.auteur,';' ORDER BY s11.auteur) AS auteur_site + FROM sites.sites s0 + JOIN sites.type_milieu s3 ON s0.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_auteur s1 + JOIN personnes.v_personne s11 ON s1.id_auteur = s11.id + ) ON s0.id::text = s1.id_site::text + WHERE s0.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s0.id + ORDER BY s0.id +), tmp_auteur_geom as ( + SELECT + s6.id, + string_agg(s611.auteur,';' ORDER BY s611.auteur) AS auteur_geom + FROM sites.sites s0 + JOIN sites.type_milieu s3 ON s0.id_type_milieu = s3.id + LEFT JOIN (sites.r_sites_geom s6 + LEFT JOIN (sites.r_geomsites_auteur s61 + JOIN personnes.v_personne s611 ON s61.id_auteur = s611.id + ) ON s6.id = s61.id_geom_site + ) ON s0.id = s6.id_site + WHERE s0.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + GROUP BY s6.id + ORDER BY s6.id +), +tmp_other_name as ( + SELECT + id_site_new, + ARRAY_AGG(id_site_old) id_site_old + FROM sites.r_site_maj + WHERE id_site_old IS NOT NULL + GROUP BY id_site_new +) +SELECT + s0.id AS site_code, + s7.id_site_old old_code, + s0.nom, + s2.autre_nom, + s1.auteur_site, + s6.auteur_geom, + ( + SELECT DISTINCT ON (date) auteur + FROM (values (s6.date,s6.auteur_geom),(s62.date,s62.auteur),(s63.date,s63.auteur),(s64.date,s64.auteur),(s65.date,s65.auteur), + (s66.date,s66.auteur),(s67.date,s67.auteur),(s68.date,s68.auteur)) AS value(date,auteur) + WHERE date = (SELECT MAX(date) FROM (values (s6.date),(s62.date),(s63.date),(s64.date),(s65.date),(s66.date),(s67.date),(s68.date)) AS value(date)) + ) as auteur_last_maj, + s0.date_deb AS date_site, + s6.date AS date_geom, + (SELECT MAX(date) FROM (values (s6.date),(s62.date),(s63.date),(s64.date),(s65.date),(s66.date),(s67.date),(s68.date)) AS value(date)) as date_last_maj, + s3.nom_court AS type_milieu, + s4.nom AS type_site, + s5.id||' - '||s5.nom AS typo_sdage, + s5.mnemo mnemo_sdage, + s0.remarques AS rmq_site, + s6.rmq_fct_majeur, + s6.rmq_interet_patri, + s6.rmq_bilan_menace, + s6.rmq_orient_act, + s6.rmq_usage_process, + s62.code_cb, + s62.lib_cb, + s63.activ_hum, + s63.impact, + s63."position", + s63.rmq_activ_hum, + s64.connexion, + s65.subm_orig, + s65.subm_freq, + s65.subm_etend, + s66.fct_bio, + s66.fct_hydro, + s66.int_patri, + s66."val_socioEco", + s67.crit_delim, + s67.crit_def_esp, + s68.entree_eau_reg, + s68.entree_eau_perm, + s68.entree_eau_topo, + s68.sortie_eau_reg, + s68.sortie_eau_perm, + s68.sortie_eau_topo, + s6.id_origine, + s6.geom +FROM sites.sites s0 + LEFT JOIN tmp_auteur_site s1 ON s0.id = s1.id + LEFT JOIN sites.autre_nom s2 ON s0.id = s2.id_site + LEFT JOIN sites.type_milieu s3 ON s0.id_type_milieu = s3.id + LEFT JOIN sites.type_site s4 ON s0.id_type_site::text = s4.id::text + LEFT JOIN sites.typo_sdage s5 ON s0.id_typo_sdage::text = s5.id::text + LEFT JOIN (SELECT DISTINCT ON (sg.id_site) * + FROM sites.r_sites_geom sg + LEFT JOIN tmp_auteur_geom s61 USING (id) --ON s6.id = s61.id + ORDER BY sg.id_site, "date" DESC, date_insert DESC + ) s6 ON s0.id::text = s6.id_site::text + LEFT JOIN zones_humides.v_zh_hab s62 ON s0.id = s62.id_site + LEFT JOIN zones_humides.v_zh_usgprocess s63 ON s0.id = s63.id_site + LEFT JOIN zones_humides.v_zh_connex s64 ON s0.id = s64.id_site + LEFT JOIN zones_humides.v_zh_submertion s65 ON s0.id = s65.id_site + LEFT JOIN zones_humides.v_zh_fctecosociopatri_cross s66 ON s0.id = s66.id_site + LEFT JOIN zones_humides.v_zh_critdelim_cross s67 ON s0.id = s67.id_site + LEFT JOIN zones_humides.v_zh_reghydro_cross s68 ON s0.id = s68.id_site + LEFT JOIN tmp_other_name s7 ON s0.id = s7.id_site_new +WHERE s0.date_fin IS NULL + AND s3.nom_court::text = 'Milieux humides'::text + AND s6.date = (SELECT MAX(date) FROM sites.r_sites_geom WHERE r_sites_geom.id_site = s0.id) +ORDER BY s0.id ASC NULLS FIRST; +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_zoneshumides TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_zoneshumides TO grp_consult; +""" +comment = """ +COMMENT ON COLUMN zones_humides.v_zoneshumides.site_code IS 'Identifiant de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.old_code IS 'Ancien identifiant de la zone humide pouvant se retrouver dans des données historiques.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.nom IS 'Nom de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.autre_nom IS 'Autre nom de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.auteur_site IS 'Auteur ayant caractérisé la zone humide pour la première fois.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.auteur_geom IS 'Auteur ayant définis la géometrie actuelle de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.auteur_last_maj IS 'Auteur le plus récent ayant défini les attributs de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.date_site IS 'Date de la première description de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.date_geom IS 'Date de la géométrie.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.date_last_maj IS 'Date des attributs les plus récents.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.type_milieu IS 'Caractérisation du milieu.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.type_site IS ''; +COMMENT ON COLUMN zones_humides.v_zoneshumides.typo_sdage IS 'Typologie sdage de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_site IS 'Remarques générale concernant la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_fct_majeur IS 'Remarques sur les fonctions majeurs de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_interet_patri IS 'Remarques sur les intérêts patrimoniaux de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_bilan_menace IS 'Remarques globales sur les menaces qui concernent la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_orient_act IS 'Remarques sur les orientations et les actes de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_usage_process IS 'Remarques concernant les usages et les processus naturels de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.code_cb IS 'Liste des codes CORINE Biotopes identifiés sur la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.lib_cb IS 'Liste des habitats CORINE Biotopes identifiés sur la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.activ_hum IS 'Liste des activités humaines identifiées sur la zone humide. Chaque élément de la liste est séparé par un '';'' et est lié à la colonne ''impacts'' et ''position''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.impact IS 'Liste des impacts identifiés et engendrés par les activités humaine sur la zone humide. Chaque élément de la liste est séparé par un '';'' et est lié à la colonne ''activ_hum'' et ''position''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides."position" IS 'Liste des potionnements des différentes activités humaines sur la zone humide. Chaque élément de la liste est séparé par un '';'' et est lié à la colonne ''activ_hum'' et ''impact''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.rmq_activ_hum IS 'Remarques concernant activités humaines identifiées sur la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.connexion IS 'Connexion de la zone humide dans son environnement.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.subm_orig IS 'Origine de l''eau submerssive.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.subm_freq IS 'Fréquence de la submersion de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.subm_etend IS 'Etendue de la submersion de la zone humide.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.fct_bio IS 'Liste des fonctions biologiques de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.fct_hydro IS 'Liste des fonctions hydrologiques de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.int_patri IS 'Liste des interêts patrimoniaux de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides."val_socioEco" IS 'Liste des valeurs socio-economiques de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.crit_delim IS 'Liste des critères de délimitation de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.crit_def_esp IS 'Liste des critères de l''espace de fonctionnalité de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.entree_eau_reg IS 'Liste des entrées d''eau du régime hydrique de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.entree_eau_perm IS 'Liste des permanances respectivement de chaque entrée d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.entree_eau_topo IS 'Liste de la toponymie respectivement de chaque entrée d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.sortie_eau_reg IS 'Liste des sorties d''eau du régime hydrique de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.sortie_eau_perm IS 'Liste des permanances respectivement de chaque sortie d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +COMMENT ON COLUMN zones_humides.v_zoneshumides.sortie_eau_topo IS 'Liste de la toponymie respectivement de chaque sortie d''eau de la zone humide. Chaque élément de la liste est séparé par un '';''.'; +""" +with con.begin() as cnx: + cnx.execute(text(v_zoneshumides)) + cnx.execute(grant) + cnx.execute(comment) + + +v_rhomeosite = """ +DROP VIEW IF EXISTS zones_humides.v_rhomeosite; +CREATE OR REPLACE VIEW zones_humides.v_rhomeosite +AS +SELECT + v.site_code||' - '||v.nom "NAME", + SPLIT_PART(v.auteur_geom,' (',1) "REFERENT", + REPLACE(SPLIT_PART(v.auteur_geom,' (',2),')','') "ORG", + SPLIT_PART(v.typo_sdage,' - ',1) "TYPE", + CASE WHEN r.nom::text = 'alpin' THEN '1' + WHEN r.nom::text = 'continental' THEN '2' + WHEN r.nom::text = 'mediterraneen' THEN '4' + END "ODONATE", + v.geom +FROM zones_humides.v_zoneshumides v, ref_territoire.ref_biogeo r +WHERE st_intersects(v.geom, r.geom) +""" +grant = """ +GRANT ALL ON TABLE zones_humides.v_rhomeosite TO grp_admin; +GRANT SELECT ON TABLE zones_humides.v_rhomeosite TO grp_consult; +""" +with con.begin() as cnx: + cnx.execute(text(v_rhomeosite)) + cnx.execute(grant) + + +test = """ +SELECT * FROM zones_humides.v_zoneshumides; +""" +data = pd.read_sql_query( + sql = text(test), + con = con) + + +data[~data.index.isin(data.drop_duplicates(subset='site_code',keep=False).index)] +data[data.duplicated(subset='site_code',keep=False)] \ No newline at end of file diff --git a/3_AZALEE/del_psduplicated.py b/3_AZALEE/del_psduplicated.py new file mode 100644 index 0000000..6a5bf25 --- /dev/null +++ b/3_AZALEE/del_psduplicated.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : del_psduplicated.py +#Description : Suppessio, des géométries en doubon sous un nom différents et avec des erreurs d'intégration des données +#Copyright : 2022-07-20, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import geopandas as gpd +import pycen +from pycen import con,update_to_sql + +df1 = gpd.read_postgis('SELECT * FROM ps.v_covered_ps ORDER BY 2',con,geom_col='geom_old') +df2 = gpd.read_postgis('SELECT * FROM ps.v_covered_ps ORDER BY 2',con,geom_col='geom_new') +# df.reset_index(inplace=True, drop=False) +df1.to_postgis('covered_ps_old',con,'ps','replace',False,geom_col='geom_old') +df2.to_postgis('covered_ps_new',con,'ps','replace',False,geom_col='geom_new') + +df = df1.copy() + +# ('38BDAU0907','38BIEV0032','38BIEV0033','38BDAU0905','38PCHA1088','38PCHA1089','38PCHA1002','38PCHA0908','38VERC3112','38PCHA0316','38VERC3171','38PCHA0325','38VERC2662','38PCHA0232','38PCHA0244','38VERC2732','38PCHA1086','38PCHA1087','38VERC3206','38PCHA0332','38PCHA0297','38VERC3013','38VERC2983','38PCHA0291','38VERC2961','38PCHA0286','38VERC3212','38PCHA0333','38PCHA0326','38VERC3175','38PCHA1005','38PCHA0018','38PCHA0474','38VERC3681','38PCHA0044','38PCHA1007','38PCHA0910','38PCHA1003','38PCHA0274','38VERC2902','38PCHA0032','38PCHA1006','38PCHA0121','38VERC1824','38VERC1743','38PCHA0111','38PCHA1004','38PCHA0928','38VERC1848','38PCHA0123','38PCHA0057','38PCHA1008','38PCHA0065','38PCHA1009','38BDAU0908','38VERC2008','38VERC2495','38BELL0154','38GRES0637','38VERC3451','38VERC0383','38VERC3222','38VERC3348','38GRES0629','38VERC2912','38CHAR0502','38CHAR0045','38VERC2058','38VERC1931','38MATH0010','38VERC1924','38CHAR0040','38GRES0006','38CHAR0900','38BELL0030','38VERC2163','38GRES0066','38VERC2100','38CHAR0049','38VERC2187','38GRES0077','38VERC2272','38VERC2807','38CHAR0079','38VERC2658','38CHAR0066','38CHAR0100','38VERC3008','38CHAR0903','38BELL0005','38CHAR0901','38CHAR0008','38VERC2414','38VERC0219','38VERC2514','38GRES0084','38VERC3076','38GRES0119','38VERC3311','38GRES0132','38VERC2718','38CHAR0072','38VERC3205','38CHAR0115','38VERC3108','38BELL0049','38VERC2377','38VERC0215','38VERC2387','38VERC0216','38VERC2431','38BELL0035','38VERC2408','38GRES0080','38GRES0686','38VERC3623','38CHAR0468','38VERC2384','38MATH0111','38VERC2649','38BELL0198','38VERC2661','38GRES0437','38VERC1748','38VERC2614','38GRES0472','38GRES0462','38VERC2446','38TRIE0215','38VERC2356','38VERC2329','38TRIE0211','38VERC2315','38TRIE0210','38TRIE0222','38VERC2405','38VERC2439','38TRIE0227','38TRIE0228','38VERC2443','38TRIE0229','38VERC2455','38CHAR0902','38TRIE0022','38GRES0141','38VERC3408','38VERC3159','38CHAR0113','38CHAR0001','38CHAR0899','38CHAR0905','38GRES0028','38CHAR0906','38CHAR0012','38CHAR0017','38CHAR0908','38GRES0136','38VERC3376','38VERC1668','38GRES0053','38GRES0127','38VERC3197','38VERC1549','38GRES0045','38GRES0030','38CHAR0907','38GRES1039','38GRES0031','38BIEV0004','38VERC3399','38CHAR0904','38GRES0025','38CHAR0029','38VERC1533','38CHAR0133','38VERC3354','38CHAR0031','38VERC1618','38VERC3369','38CHAR0135','38VERC3464','38GRES0143','38GRES0016','38GRES1038','38BELL0192','38VERC2556','38VERC2513','38CHAR0402','38BELL0164','38BELL0492','38MATH0024','38VERC2487','38VERC2646','38TRIE0250','38VERC3523','38TRIE0377','38BELL0489','38BELL0155','38BELL0491','38MCHA0009','38BELL0490','38BELL0162','38VERC1687','38CHAR0034','38VERC1721','38BELL0025','38BELL0016','38CHAR0909','38MATH0005','38VERC1756','38BELL0043','38VERC2641','38CHAR0027','38CHAR0910','38GRES0089','38VERC2601','38GRES0092','38VERC2629') +# ('38BIEV0032','38BDAU0907','38BDAU0905','38BIEV0033','38PCHA1089','38PCHA1088','38PCHA1002','38PCHA0908','38VERC3112','38PCHA0316','38VERC3171','38PCHA0325','38VERC2662','38PCHA0232','38VERC2732','38PCHA0244','38PCHA1086','38PCHA1087','38VERC3206','38PCHA0332','38VERC3013','38PCHA0297','38PCHA0291','38VERC2983','38VERC2961','38PCHA0286','38PCHA0333','38VERC3212','38PCHA0326','38VERC3175','38PCHA1005','38PCHA0018','38VERC3681','38PCHA0474','38PCHA1007','38PCHA0044','38PCHA0910','38PCHA1003','38VERC2902','38PCHA0274','38PCHA1006','38PCHA0032','38VERC1824','38PCHA0121','38PCHA0111','38VERC1743','38PCHA0928','38PCHA1004','38VERC1848','38PCHA0123','38PCHA0057','38PCHA1008','38PCHA1009','38PCHA0065','38BDAU0908','38VERC2008','38VERC2495','38BELL0154','38GRES0637','38VERC3451','38VERC0383','38VERC3222','38GRES0629','38VERC3348','38CHAR0502','38VERC2912','38VERC3623','38GRES0686','38CHAR0468','38VERC2384','38TRIE0215','38VERC2356','38VERC2329','38TRIE0211','38VERC2315','38TRIE0210','38VERC2405','38TRIE0222','38VERC2439','38TRIE0227','38VERC2443','38TRIE0228','38VERC2455','38TRIE0229','38VERC2487','38MATH0024','38VERC2646','38TRIE0250','38VERC3523','38TRIE0377') +# ('38BDAU0907','38BIEV0032','38BDAU0905','38BIEV0033','38PCHA1089','38PCHA1088','38PCHA1002','38PCHA0908','38PCHA0316','38VERC3112','38VERC3171','38PCHA0325','38PCHA0232','38VERC2662','38VERC2732','38PCHA0244','38PCHA1086','38PCHA1087','38VERC3206','38PCHA0332','38VERC3013','38PCHA0297','38PCHA0291','38VERC2983','38PCHA0286','38VERC2961','38PCHA0333','38VERC3212','38PCHA0326','38VERC3175','38PCHA0018','38PCHA1005','38PCHA0474','38VERC3681','38PCHA0044','38PCHA1007','38PCHA0910','38PCHA1003','38VERC2902','38PCHA0274','38PCHA0032','38PCHA1006','38VERC1824','38PCHA0121','38VERC1743','38PCHA0111','38PCHA0928','38PCHA1004','38PCHA0123','38VERC1848','38PCHA0057','38PCHA1008','38PCHA0065','38PCHA1009') +# ('38BDAU0907','38BIEV0032','38BDAU0905','38BIEV0033','38PCHA1088','38PCHA1089','38PCHA0316','38VERC3112','38PCHA0325','38VERC3171','38PCHA0232','38VERC2662','38PCHA0244','38VERC2732','38PCHA1087','38PCHA1086','38VERC3206','38PCHA0332','38VERC3013','38PCHA0297','38PCHA0291','38VERC2983','38PCHA0286','38VERC2961','38PCHA0333','38VERC3212','38VERC3175','38PCHA0326','38PCHA0018','38PCHA1005','38VERC3681','38PCHA0474','38PCHA0044','38PCHA1007','38PCHA0274','38VERC2902','38PCHA1006','38PCHA0032','38VERC1824','38PCHA0121','38VERC1743','38PCHA0111','38VERC1848','38PCHA0123','38PCHA0057','38PCHA1008','38PCHA0065','38PCHA1009') +# ('38BDAU0907','38BIEV0032','38BDAU0905','38BIEV0033','38PCHA1089','38PCHA1088','38PCHA0316','38VERC3112','38PCHA0325','38VERC3171','38PCHA0232','38VERC2662','38VERC2732','38PCHA0244','38PCHA1086','38PCHA1087','38VERC3206','38PCHA0332','38VERC3013','38PCHA0297','38VERC2983','38PCHA0291','38VERC2961','38PCHA0286','38PCHA0333','38VERC3212','38VERC3175','38PCHA0326','38PCHA0018','38PCHA1005','38PCHA0274','38VERC2902','38PCHA0121','38VERC1824','38PCHA0111','38VERC1743','38VERC1848','38PCHA0123','38PCHA1008','38PCHA0057','38PCHA1009','38PCHA0065') +# ('38BIEV0032','38BDAU0907','38BIEV0033','38BDAU0905','38PCHA1088','38PCHA1089','38PCHA1087','38PCHA1086') + +v_ps = gpd.read_postgis( + "SELECT * FROM ps.v_pelouseseches WHERE site_code in ('%s') ORDER BY source,geom"%"','".join(df.id_site.tolist()) + ,pycen.con) +v_ps.set_index(['site_code','district_nat'],inplace=True) +v_ps.dropna(axis=1,how='all', inplace=True) +v_ps.fillna('NA', inplace=True) + +for col in v_ps.columns[v_ps.columns.str.startswith('auteur')]: + tmp = v_ps[col].str.split(';') + for x in tmp: x.sort() + v_ps[col] = [';'.join(x) for x in tmp] + +vp_clean = v_ps.drop_duplicates() + + +v_ps.loc[~v_ps.index.isin(vp_clean.index)] +v_ps.loc[~v_ps.index.isin(vp_clean.index)].droplevel(-1).index.tolist() +# ('38CHAR0915','38CHAR0919','38CHAR0922','38CHAR0936','38CHAR0938','38CHAR0940','38CHAR1195','38CHAR1197','38CHAR1203','38CHAR1220','38CHAR1221','38CHAR1222','38CHAR1223','38CHAR1224','38CHAR1225','38CHAR1226','38CHAR1227','38CHAR1228','38CHAR1229','38CHAR1230','38CHAR1231','38CHAR1232','38CHAR1233','38CHAR1234','38CHAR1235','38CHAR1236','38CHAR1237','38CHAR1238','38CHAR1239','38CHAR1240','38CHAR1241','38CHAR1242','38CHAR1243','38CHAR1244','38CHAR1245','38GRES1073','38GRES1074','38GRES1075','38MCHA0049','38PCHA1083','38PCHA1208','38PCHA1210','38PCHA1215','38PCHA1221','38TRIE1173','38TRIE2416','38TRIE2481','38TRIE2548','38TRIE2563','38TRIE2590','38TRIE2592','38TRIE2594','38TRIE2595','38TRIE2596','38TRIE2597','38TRIE2598','38TRIE2599','38TRIE2602','38TRIE2604','38TRIE2605','38TRIE2607','38TRIE2608','38TRIE2610','38TRIE2611','38TRIE2614','38TRIE2615','38TRIE2616','38TRIE2618','38TRIE2619','38TRIE2620','38TRIE2621','38TRIE2622','38TRIE2623','38TRIE2624','38TRIE2625','38TRIE2626','38TRIE2627','38TRIE2628','38TRIE2629','38TRIE2631','38TRIE2632','38TRIE2633','38TRIE2634','38TRIE2635','38TRIE2636','38TRIE2637','38TRIE2638','38TRIE2639','38TRIE2640','38TRIE2641','38TRIE2644','38TRIE2645','38TRIE2646','38TRIE2647','38TRIE2648','38TRIE2650','38TRIE2651','38TRIE2652','38TRIE2653','38TRIE2654','38TRIE2656','38TRIE2657','38TRIE2658','38TRIE2659','38TRIE2662','38TRIE2663','38TRIE2664','38TRIE2665','38TRIE2666','38TRIE2667','38TRIE2668','38TRIE2669','38TRIE2670','38TRIE2671','38TRIE2672','38TRIE2674','38TRIE2675','38TRIE2676','38TRIE2677','38TRIE2678','38TRIE2679','38TRIE2680','38TRIE2682','38TRIE2683','38TRIE2688','38TRIE2689','38TRIE2690','38TRIE2693','38TRIE2695','38TRIE2696','38TRIE2697','38TRIE2698','38TRIE2699','38TRIE2700','38TRIE2701','38TRIE2702','38TRIE2703','38TRIE2704','38TRIE2705','38TRIE2707','38TRIE2708','38TRIE2710','38TRIE2712','38TRIE2713','38TRIE2714','38TRIE2715','38TRIE2720','38TRIE2721','38TRIE2722','38TRIE2723','38TRIE2724','38TRIE2727','38TRIE2731','38TRIE2735','38TRIE2736','38TRIE2739','38TRIE2741','38TRIE2743','38TRIE2744','38TRIE2749','38TRIE2750','38TRIE2758','38TRIE2760','38TRIE2762','38TRIE2764','38TRIE2765','38TRIE2766','38TRIE2767','38TRIE2768','38TRIE2769','38TRIE2771','38TRIE2776','38TRIE2780','38TRIE2788','38VERC3691','38VERC3692','38VERC3698','38VERC3701','38VERC3903','38VERC3904','38VERC3906','38VERC3921','38VERC3924','38VERC3936','38VERC3937','38VERC3940','38VERC3941','38VERC3943','38VERC3945','38VERC3946','38VERC3948','38VERC3949','38VERC3950','38VERC3953','38VERC4017','38VERC4104','38VERC4123','38VERC4124','38VERC4180','38VERC4187','38VERC4188','38VERC4197','38VERC4205','38VERC4218','38VERC4290','38VERC4294','38VERC4364','38VERC4392','38VERC4396','38VERC4405','38VERC4406','38VERC4407','38VERC4408','38VERC4409','38VERC4446','38VERC4447','38VERC4448','38VERC4453','38VERC4462','38VERC4464','38VERC4469','38VERC4484','38VERC4485','38VERC4492','38VERC4498','38VERC4512','38VERC4526','38VERC4527','38VERC4530','38VERC4544','38VERC4558','38VERC4576','38VERC4577','38VERC4578','38VERC4579','38VERC4585','38VERC4588','38VERC4590','38VERC4627','38VERC4628','38VERC4636','38VERC4638','38VERC4639','38VERC4640','38VERC4643','38VERC4649','38VERC4650','38VERC4654','38VERC4655','38VERC4656','38VERC4657','38VERC4658','38VERC4659','38VERC4660','38VERC4661','38VERC4662','38VERC4663','38VERC4664','38VERC4665','38VERC4666','38VERC4667','38VERC4668','38VERC4669','38VERC4670','38VERC4671','38VERC4672','38VERC4673','38VERC4674','38VERC4675','38VERC4676','38VERC4678','38VERC4679','38VERC4680','38VERC4681','38VERC4685','38VERC4686','38VERC4692') + + +'38GRES1029','38GRES1031','38VERC0811','38VERC0861','38VERC1045','38VERC1427','38VERC1433','38VERC1453','38VERC1456','38VERC1465','38VERC1489','38VERC1494','38VERC3704','38VERC3708','38VERC4129','38VERC4134' +'38BIEV0032','38BDAU0907','38BDAU0905','38BIEV0033','38PCHA1089','38PCHA1088','38PCHA1002','38PCHA0908','38VERC3112','38PCHA0316','38PCHA0906','38PCHA1001','38PCHA0325','38VERC3171','38PCHA0232','38VERC2662','38VERC2732','38PCHA0244','38PCHA1086','38PCHA1087','38PCHA0332','38VERC3206','38VERC3013','38PCHA0297','38VERC2983','38PCHA0291','38VERC2961','38PCHA0286','38PCHA0333','38VERC3212','38VERC3175','38PCHA0326','38PCHA0018','38PCHA1005','38PCHA0474','38VERC3681','38PCHA0044','38PCHA1007','38PCHA0910','38PCHA1003','38PCHA0274','38VERC2902','38PCHA1006','38PCHA0032','38PCHA0121','38VERC1824','38PCHA0111','38VERC1743','38PCHA1004','38PCHA0928','38PCHA0123','38VERC1848','38PCHA1008','38PCHA0057','38PCHA0065','38PCHA1009','38VERC2008','38BDAU0908','38BELL0154','38VERC2495','38GRES0637','38VERC3451','38VERC0383','38VERC3222','38VERC3348','38GRES0629','38VERC2912','38CHAR0502','38CHAR0045','38VERC2058','38VERC1931','38MATH0010','38CHAR0040','38VERC1924','38GRES0006','38CHAR0900','38VERC2163','38BELL0030','38VERC2100','38GRES0066','38VERC2187','38CHAR0049','38VERC2272','38GRES0077','38VERC2807','38CHAR0079','38VERC2658','38CHAR0066','38CHAR0100','38VERC3008','38BELL0005','38CHAR0903','38CHAR0008','38CHAR0901','38VERC0219','38VERC2414','38VERC2514','38GRES0084','38VERC3076','38GRES0119','38GRES0132','38VERC3311','38CHAR0072','38VERC2718','38CHAR0115','38VERC3205','38VERC3108','38BELL0049','38VERC0215','38VERC2377','38VERC2387','38VERC0216','38BELL0035','38VERC2431','38GRES0080','38VERC2408','38GRES0686','38VERC3623','38VERC2384','38CHAR0468','38MATH0111','38VERC2649','38VERC2661','38BELL0198','38VERC1748','38GRES0437','38GRES0472','38VERC2614','38GRES0462','38VERC2446','38TRIE0215','38VERC2356','38TRIE0211','38VERC2329','38VERC2315','38TRIE0210','38TRIE0222','38VERC2405','38VERC2439','38TRIE0227','38VERC2443','38TRIE0228','38VERC2455','38TRIE0229','38TRIE0022','38CHAR0902','38GRES0141','38VERC3408','38CHAR0113','38VERC3159','38CHAR0001','38CHAR0899','38CHAR0905','38GRES0028','38CHAR0012','38CHAR0906','38CHAR0017','38CHAR0908','38GRES0214','38VERC2141','38VERC3072','38VERC0290','38VERC3376','38GRES0136','38GRES0053','38VERC1668','38GRES0127','38VERC3197','38GRES0183','38GRES1040','38VERC1549','38GRES0045','38GRES0030','38CHAR0907','38GRES1039','38GRES0031','38VERC3399','38BIEV0004','38CHAR0904','38GRES0025','38VERC1533','38CHAR0029','38CHAR0133','38VERC3354','38CHAR0031','38VERC1618','38MATH0083','38VERC1830','38GRES0203','38VERC1858','38MATH0082','38VERC1820','38CHAR0176','38GRES1041','38GRES0251','38VERC2935','38VERC0254','38VERC1926','38VERC3369','38CHAR0135','38VERC3464','38GRES0143','38GRES1038','38GRES0016','38VERC3139','38GRES0278','38VERC2556','38BELL0192','38VERC2513','38CHAR0402','38BELL0164','38BELL0492','38MATH0024','38VERC2487','38TRIE0250','38VERC2646','38TRIE0377','38VERC3523','38BELL0489','38BELL0155','38BELL0491','38MCHA0009','38BELL0162','38BELL0490','38CHAR0034','38VERC1687','38VERC1721','38BELL0025','38BELL0016','38CHAR0909','38MATH0005','38VERC1756','38VERC2641','38BELL0043','38CHAR0027','38CHAR0910','38VERC2601','38GRES0089','38GRES0092','38VERC2629' + +s = gpd.pd.Series([ +'CHA01','CHA01','CHA17','CHA17','STAP01','STAP01','CHE30','CHE30','STBO03','STBO03','STAN26','STAN26','STAN36','STAN36','STAN34','STAN34','STBO05','STBO05','STBO06','STBO06','STLA039','STLA039','STAN22','STAN22','MON50','MON50','MON56','MON56','MON55','MON55','STLA063','STLA063','STLA070','STLA070', +]) + +sql = ''' +SELECT + v1.site_code, + v1.geom, + v1."source", + v1.rmq_usage_process, + CASE WHEN v1.id_origine IS NULL THEN v2.id_origine ELSE v1.id_origine END id_origine + --v1.id_origine --, v2.geom geom_new +FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2 +WHERE v1.geom = v2.geom + AND v1.site_code <> v2.site_code +ORDER BY 3,2; +''' +upd = gpd.read_postgis(sql,con) +upd.drop_duplicates(inplace=True) +upd.rename(columns={'site_code':'id_site'},inplace=True) +upd.loc[upd.source=='PS_CHAMBARAN_CEN38_2013','rmq_usage_process'] = \ + upd.loc[upd.source=='PS_CHAMBARAN_CEN38_2013','rmq_usage_process'].replace(["\|",'\/','fasciès'],[" x "," x ",'faciès'],regex=True) +upd = upd[['id_site','rmq_usage_process']] +update_to_sql(upd,con,'r_sites_geom','sites','id_site') + +'38MCHA0009','38BELL0491','38BELL0164','38BELL0492','38BELL0154','38VERC2495','38GRES1041','38CHAR0176','38VERC2614','38GRES0472','38VERC2649','38MATH0111','38BELL0198','38VERC2661','38GRES0437','38VERC1748','38MATH0082','38VERC1820','38VERC1830','38MATH0083','38PCHA1087','38PCHA1088','38PCHA1086','38PCHA1089','38VERC1858','38GRES0203','38BDAU0908','38VERC2008','38VERC1926','38VERC0254','38GRES0462','38VERC2446','38VERC2513','38CHAR0402','38BELL0192','38VERC2556','38VERC2384','38CHAR0468','38CHAR0502','38VERC2912','38GRES0214','38VERC2141','38VERC2935','38GRES0251','38VERC3222','38VERC0383','38VERC3072','38VERC0290','38GRES0637','38VERC3451','38VERC3348','38GRES0629','38GRES0278','38VERC3139','38VERC3623','38GRES0686','38BELL0489','38BELL0155','38BDAU0905','38BDAU0907','38BIEV0032','38BIEV0033','38GRES0183','38GRES1040','38BELL0490','38BELL0162','38VERC2629','38GRES0092','38VERC2058','38CHAR0045','38CHAR0115','38VERC3205','38CHAR0901','38CHAR0008','38VERC2431','38BELL0035','38GRES0080','38VERC2408','38VERC0216','38VERC2387','38VERC0215','38VERC2377','38BELL0043','38VERC2641','38CHAR0072','38VERC2718','38CHAR0066','38VERC2658','38CHAR0100','38VERC3008','38BELL0049','38VERC3108','38BELL0005','38CHAR0903','38GRES0119','38VERC3076','38GRES0084','38VERC2514','38CHAR0909','38BELL0016','38VERC3311','38GRES0132','38VERC1687','38CHAR0034','38VERC1756','38MATH0005','38VERC2807','38CHAR0079','38VERC2414','38VERC0219','38BELL0025','38VERC1721','38CHAR0910','38CHAR0027','38GRES0077','38VERC2272','38VERC2187','38CHAR0049','38GRES0066','38VERC2100','38VERC2163','38BELL0030','38GRES0089','38VERC2601','38GRES0006','38CHAR0900','38CHAR0040','38VERC1924','38MATH0010','38VERC1931','38VERC3112','38PCHA0316','38VERC3171','38PCHA0325','38PCHA0906','38PCHA1001','38VERC2732','38PCHA0244','38TRIE0250','38VERC2646','38VERC3399','38BIEV0004','38CHAR0904','38GRES0025','38CHAR0031','38VERC1618','38CHAR0908','38CHAR0017','38GRES0028','38CHAR0905','38TRIE0022','38CHAR0902','38GRES0141','38VERC3408','38CHAR0029','38VERC1533','38VERC3354','38CHAR0133','38GRES0143','38VERC3464','38GRES1038','38GRES0016','38VERC3369','38CHAR0135','38VERC1668','38GRES0053','38VERC3376','38GRES0136','38CHAR0001','38CHAR0899','38CHAR0113','38VERC3159','38VERC1549','38GRES0045','38GRES0127','38VERC3197','38CHAR0907','38GRES0030','38GRES1039','38GRES0031','38CHAR0012','38CHAR0906','38VERC1824','38PCHA0121','38PCHA0928','38PCHA1004','38VERC1848','38PCHA0123','38PCHA0111','38VERC1743','38VERC2487','38MATH0024','38VERC2315','38TRIE0210','38TRIE0211','38VERC2329','38TRIE0215','38VERC2356','38TRIE0222','38VERC2405','38TRIE0377','38VERC3523','38VERC2439','38TRIE0227','38TRIE0229','38VERC2455','38TRIE0228','38VERC2443','38VERC2902','38PCHA0274','38VERC3013','38PCHA0297','38PCHA0286','38VERC2961','38VERC2983','38PCHA0291','38VERC2662','38PCHA0232','38VERC3206','38PCHA0332','38VERC3212','38PCHA0333','38PCHA0326','38VERC3175','38PCHA1003','38PCHA0910','38PCHA1007','38PCHA0044','38VERC3681','38PCHA0474','38PCHA1006','38PCHA0032','38PCHA0018','38PCHA1005','38PCHA0057','38PCHA1008','38PCHA1009','38PCHA0065','38PCHA0908','38PCHA1002' + +drop_site = [ + '38VERC2141','38VERC3072','38GRES1040','38VERC1830','38VERC1858','' +] +'38VERC2141','38VERC3072','38GRES1040','38VERC1830','38VERC1858','38VERC1820','38GRES1041','38VERC2935','38VERC1926','38VERC3139' +('38VERC2058','38VERC1931','38VERC1924','38CHAR0900','38VERC2163','38VERC2100','38VERC2187','38VERC2272','38VERC2807','38VERC2658','38VERC3008','38CHAR0903','38CHAR0901','38VERC2414','38VERC2514','38VERC3076','38VERC3311','38VERC2718','38VERC3205','38VERC3108','38VERC2377','38VERC2387','38VERC2431','38VERC2408','38CHAR0902','38VERC3408','38VERC3159','38CHAR0899','38CHAR0905','38CHAR0906','38CHAR0908','38VERC3376','38VERC1668','38VERC3197','38VERC1549','38CHAR0907','38GRES1039','38VERC3399','38CHAR0904','38VERC1533','38VERC3354','38VERC1618','38VERC3369','38VERC3464','38GRES1038','38VERC1687','38VERC1721','38CHAR0909','38VERC1756','38VERC2641','38CHAR0910','38VERC2601','38VERC2629') +('38VERC2649','38VERC2661','38VERC1748','38VERC2614','38VERC2446','38VERC2556','38VERC2513','38BELL0492','38BELL0489','38BELL0491','38BELL0490') +'38VERC2495','38VERC2008' +('38VERC2356','38VERC2329','38VERC2315','38VERC2405','38VERC2439','38VERC2443','38VERC2455','38VERC2487','38VERC2646','38VERC3523') +('38VERC3451','38VERC3222','38VERC3348','38VERC2912','38VERC3623','38VERC2384') +'38VERC3681','38PCHA1007','38PCHA1006' +('38VERC3112','38VERC3171','38VERC2662','38VERC2732','38VERC3206','38VERC3013','38VERC2983','38VERC2961','38VERC3212','38VERC3175','38PCHA1005','38VERC2902','38VERC1824','38VERC1743','38VERC1848','38PCHA1008','38PCHA1009') +('38PCHA1087','38PCHA1088') + + + +v_ps[(v_ps.source=='PS_CHAMBARAN_CEN38_2013')& ~(v_ps.auteur_site.isin([ + 'COSQUER Mélanie (CEN Isère)' + ]))].reset_index().site_code.tolist() + + + + + + +path='/home/colas/Documents/9_PROJETS/2_PS/CBNA/Trieves_Vercors/' +file = 'ps_aInserer_for INS1.gpkg' +df = gpd.read_file(path+file) +df.rename_geometry('geom', inplace=True) + +df['date'] = '2014-01-01' +df['date'] = gpd.pd.to_datetime(df['date']) +df['structure'] = 'CBNA' +# df.rename(columns={"obs":'auteur'}, inplace=True) +df['auteur'] = 'CBNA' +df.loc[df.statut=="Habitat d'intérêt communautaire",'statut'] = \ +"Communautaire" +df.loc[df.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \ +"Prioritaire" +df.loc[df.statut=="Habitat d'intérêt communautaire, retenu prioritaire",'statut'] = \ +"Prioritaire" +df.loc[ +df.statut=="""Habitat d'intérêt communautaire, retenu prioritaire pour les sites riches en orchidées""", +'statut'] = "Prioritaire" +df.loc[ +df.statut=="""Habitat d'intérêt communautaire retenu prioritaire pour les sites riches en orchidées""", +'statut'] = "Prioritaire" +df.loc[ +df.statut=="""Habitat communautaire, retenu prioritaire pour les sites riches en orchidées""", +'statut'] = "Prioritaire" +df.loc[df.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \ +"Prioritaire" +df.to_file(path+file,driver='GPKG') + +'38TRIE1043', '38TRIE1058', '38VERC1189', '38VERC1190','38VERC1191', '38VERC1196', '38VERC1199', '38VERC1200','38VERC1201', '38VERC1202', '38VERC1203', '38VERC1204','38VERC1205', '38VERC1209', '38VERC1210', '38VERC1211','38VERC1212', '38VERC1213', '38VERC1214', '38VERC1216','38VERC1218', '38VERC1227', '38VERC3738', '38VERC3952','38VERC4550', '38VERC4551', '38VERC4552', '38VERC4553','38VERC4554', '38VERC4555', '38VERC4556', '38VERC4557','38VERC4559', '38VERC4560', '38VERC4563', '38VERC4564','38VERC4565', '38VERC4566', '38VERC4568', '38VERC4569','38VERC4570', '38VERC4571', '38VERC4572', '38VERC4573','38VERC4574' +df_maj.loc[df_maj.id_site.isin(['38TRIE0788']),'code_hab2'] = '41.13 (43)' + + + + +df = gpd.read_postgis('SELECT * FROM pelouse_seche."PB_codehab_nonPresent_dans_corineBiotope"',pycen.con_bdcen) +df = df.replace(['38.81','31.8 t'],['31.81','31.8122'],regex=True) +df.date = gpd.pd.to_datetime(df.date) +df = df[~df.id_site.isin(lst_error2)] +'38PCHA0291','38PCHA0333','38PCHA0326','38MATH0083','38PCHA0910','38GRES0025','38CHAR0029','38PCHA0111','38GRES0030','38CHAR0012','38GRES0028','38CHAR0027','38GRES0053','38MATH0005','38BELL0030','38BELL0025','38CHAR0049','38MATH0010','38CHAR0079','38GRES0119','38GRES0127','38BELL0049','38BIEV0004','38GRES0006','38CHAR0176','38GRES0031','38GRES0183','38VERC0254','38GRES0214','38VERC0290','38GRES0278','38MATH0082','38GRES0203','38GRES0251','38CHAR0031','38VERC0383','38GRES0136','38GRES0084','38CHAR0135','38CHAR0133','38GRES0045','38GRES0141','38BELL0192','38BDAU0847','38CHAR0402' +update_to_sql( + df[['id_site','remarques']].rename(columns={'remarques':'rmq_interet_patri'}), + con,'r_sites_geom','sites','id_site' +) + + +df = gpd.read_postgis('SELECT * FROM pelouse_seche."PB_codehabCBNA_nonPresent_dans_corineBiotope"',pycen.con_bdcen) +df = df.replace(['43.171','43.13','31.4B','37.831','52 à 54'],['43','43','31.4','31.831','54'],regex=True) +'38CHAR1238','38CHAR1091','38TRIE2732','38OISA0273','38OISA0272','38GROU0015','38OISA0274','38VERC3785','38TRIE1399','38TRIE1402','38TRIE2443','38TRIE2444','38VERC1467','38VERC1382','38VERC1059','38TRIE0942','38TRIE0788','38OISA0190','38OISA0188','38OISA0189','38VERC0918','38TRIE1010','38TRIE0943','38BVIS0042','38VERC1253','38VERC1322','38VERC1323','38VERC3787' + + + + +gpd.read_postgis("SELECT * FROM sites.r_sites_geom WHERE id_site IN ('%s')"%"','".join(DF.id_site),pycen.con) + + +'38OISA0245','38TRIE1045','38TRIE1165','38TRIE2421','38VERC1401','38VERC1447','38VERC3718','38VERC3766','38VERC4003' +'38VERC4697','38VERC4698','38VERC4699','38VERC4700','38VERC4701','38VERC4704','38VERC4705','38VERC4706','38VERC4707','38VERC4708','38VERC4709','38VERC4710','38VERC4711','38VERC4712','38VERC4713','38VERC4714','38VERC4715','38VERC4716','38VERC4717','38VERC4718','38VERC4719','38VERC4720','38VERC4721','38VERC4722','38VERC4723','38VERC4724','38VERC4726','38VERC4727','38VERC4729','38VERC4730','38VERC4731','38VERC4732','38VERC4733','38VERC4734','38VERC4735','38VERC4736','38VERC4737','38VERC4738','38VERC4739','38VERC4741','38VERC4743','38VERC4746','38VERC4747','38VERC4748' + + +cvr = gpd.read_postgis("SELECT * FROM ps.v_pelouseseches WHERE site_code IN ('38VERC4740','38VERC4725','38VERC1271','38VERC4739')",pycen.con) +df = gpd.read_postgis("SELECT * FROM ps.v_pelouseseches WHERE site_code IN ('38VERC1249','38VERC1251','38VERC1272')",pycen.con) + +tt = gpd.overlay(df,cvr.unary_union,how='difference') +# tt = df.copy() +tt = pycen.tools.Polygons_to_MultiPolygon(tt) +tt.drop(columns=['auteur_geom', 'auteur_attrs'],inplace=True) +tt.drop(columns=['date_geom', 'date_attrs'],inplace=True) +tt.rename(columns={'auteur_site':'auteur'},inplace=True) +tt.rename(columns={'date_site':'date'},inplace=True) +tt.columns = tt.columns.str.replace('cb_hab','code_hab',regex=True) +tt.rename(columns={'site_code':'id_site'},inplace=True) +tt[['auteur','structure']] = tt.auteur.str.split('(',expand=True).sort_index() +tt.auteur = tt.auteur.str.strip() +tt.structure = tt.structure.str.strip().str.replace('\)','',regex=True) +# tt.auteur = 'GRANGE Benjamin' +# tt['structure'] = 'CEN Isère' + + +'38VERC4697','38VERC4698','38VERC4699','38VERC4700','38VERC4701','38VERC4704','38VERC4705','38VERC4706','38VERC4707','38VERC4708','38VERC4709','38VERC4710','38VERC4711','38VERC4712','38VERC4713','38VERC4714','38VERC4715','38VERC4716','38VERC4717','38VERC4718','38VERC4719','38VERC4720','38VERC4721','38VERC4722','38VERC4723','38VERC4724','38VERC4726','38VERC4727','38VERC4729','38VERC4730','38VERC4731','38VERC4732','38VERC4733','38VERC4734','38VERC4735','38VERC4736','38VERC4737','38VERC4738','38VERC4739','38VERC4741','38VERC4743','38VERC4746','38VERC4747','38VERC4748' + + +'38VERC4702','38VERC4703','38VERC4725','38VERC4728','38VERC4740','38VERC4742','38VERC4744','38VERC4745' +'38BVIS0047','38VERC4082','38VERC4083','38VERC0618','38VERC1266','38VERC1267','38VERC1232','38VERC1240','38VERC4158','38VERC4158','38VERC4068','38VERC4086','38VERC4083' + +'38BVIS0049', '38VERC1231', '38VERC1234', '38VERC1237','38VERC1241', '38VERC1246', '38VERC1247', '38VERC1248','38VERC1250', '38VERC1255', '38VERC1256', '38VERC1257','38VERC1261', '38VERC1262', '38VERC1263', '38VERC1265','38VERC1268', '38VERC1269', '38VERC1271', '38VERC1306','38VERC1322', '38VERC4051', '38VERC4052', '38VERC4056','38VERC4057', '38VERC4084' \ No newline at end of file diff --git a/3_AZALEE/get_ps_byhab.py b/3_AZALEE/get_ps_byhab.py new file mode 100644 index 0000000..ebbbc17 --- /dev/null +++ b/3_AZALEE/get_ps_byhab.py @@ -0,0 +1,67 @@ +from pycen import ps,con_bdcen +import geopandas as gpd + +# Liste des habitats souhaités +list_habs = ['35.21','35.23','34.12','64.11','64.12'] +habs = '|'.join(list_habs) + +# v_pelouseseches +dfps = ps.v_pelouseseches() +cb_col = dfps.columns[dfps.columns.str.startswith('cb_hab')] +cdate = dfps.columns[dfps.columns.str.contains('date')] +for c in cdate: + dfps[c] = gpd.pd.to_datetime(dfps[c]) +dfps['date'] = dfps[cdate].max(axis=1) + +dfps['auteur'] = None +for c in cdate: + typ = c.removeprefix('date') + dfps.loc[dfps[c].eq(dfps.date),'auteur'] = dfps[dfps[c].eq(dfps.date)]['auteur'+typ] + +dfps.drop( + columns=[*cdate,*'auteur' + cdate.str.removeprefix('date')], + inplace=True +) + + +df = gpd.GeoDataFrame() +for c in cb_col: + res = dfps[dfps[c].str.contains(habs,na=False)].copy() + df = gpd.pd.concat([df,res]) +df.drop_duplicates(inplace=True) +print('Extract %s unités parcellaires ...'%df.shape[0]) + +# PB_codehab_nonPresent_dans_corineBiotope +sql = 'SELECT * FROM pelouse_seche."PB_codehab_nonPresent_dans_corineBiotope"' +dfpb = gpd.read_postgis(sql,con_bdcen) +dfps['date'] = gpd.pd.to_datetime(dfps['date']) +dfpb.rename(columns={ + 'id_site' : 'site_code', + 'pratiques':'pratique', + 'niv_embrous':'embrouss', + 'recouvmnt':'recouvrement', + 'remarques':'rmq_site', + 'code_hab1':'cb_hab1', + 'code_hab2':'cb_hab2', + 'code_hab3':'cb_hab3', + 'code_hab4':'cb_hab4', + 'code_hab5':'cb_hab5', +}, inplace=True) +samecol = dfpb.columns[dfpb.columns.isin(dfps.columns)] +diffcol = dfpb.columns[~dfpb.columns.isin(dfps.columns)] +dfpb = dfpb[samecol] +cb_colpb = dfpb.columns[dfpb.columns.str.startswith('cb_hab')] + + +list_habspb = ['PS sur sable','Friche sableuse'] +habspb = '|'.join(list_habspb) +for c in cb_colpb: + res = dfpb[dfpb[c].str.contains(habspb,na=False)].copy() + df = gpd.pd.concat([df,res]) +df.drop_duplicates(inplace=True) +df['date'] = gpd.pd.to_datetime(df['date']).astype(str) +# df['date'] = gpd.pd.to_datetime(df['date'], unit='D') +print('Extract %s unités parcellaires ...'%df.shape[0]) + +df.to_file('/home/colas/Documents/9_PROJETS/2_PS/EXPORT/PNA_pelouse_sableuse.gpkg',driver='GPKG') + diff --git a/3_AZALEE/import_habCBNA_bdcen38.py b/3_AZALEE/import_habCBNA_bdcen38.py new file mode 100644 index 0000000..aa1a39a --- /dev/null +++ b/3_AZALEE/import_habCBNA_bdcen38.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : recup_ps.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + + + +# from _typeshed import IdentityFunction +import pandas as pd +import geopandas as gpd +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import pycen +# Parametres bdd IN +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '192.168.0.189' +base = 'bd-cen-38' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) + +dic = { + 'Ssp':'IDFINAL', + 'cod_cb':'code_cb', + 'ccorineb':'code_cb', + 'id_orig':'idorig', + 'pourcentag':'part', + 'carte_cb':'type', + 'lib_cb':'libelle', + 'id':'idorig', + 'hectare':'hectares', + 'nbre_hc':'hectares', + 'observat':'obs', + 'surf_m2':'surface', + 'statut_car':'statutcart', +} +# table = '"PS_CHAMBARAN_CEN38_2013"' +con = create_engine(url) +path = '/home/colas/Documents/9_PROJETS/2_PS/CBNA/' +fV = path + 'PNR_Vercors_SHP/PNRV_data_04_2012.dbf' +fgV = path + 'PNR_Vercors_SHP/PNRV_poly_10_2008_L93_region.shp' +nV = 'cr_VERCORS_habitats_CBNA_1999-2007' +fC = path + 'PNR_Chartreuse_SHP/Chartreuse_2012_data.dbf' +fgC = path + 'PNR_Chartreuse_SHP/Chartreuse_2012_poly_region.shp' +nC = 'cr_CHARTREUSE_habitats_CBNA_2000-2012' +fT = path + 'Trieves_Vercors/TRIEVES_DATA.dbf' +fgT = path + 'Trieves_Vercors/TRIEVES_VERCORS.shp' +nT = 'cr_TRIEVES+VERCORS_habitats_CBNA' + +fE = path + 'PNE/DELPHINE_DATA.dbf' +fgE = path + 'PNE/DELPHINE_POLY_L93.shp' +nE = 'cr_ECRIN_habitats_CBNA' + + + +fileG = fgV +file = fV +name = nV + +gdf = gpd.read_file(fileG) +gdf = pycen.tools.dropZ(gdf,geom_col='geometry') +dbf = gpd.read_file(file) +dbf.rename(columns=dic,inplace=True) +gdf.rename(columns=dic,inplace=True) +if gdf.columns[0] != 'IDFINAL': + gdf.rename(columns={gdf.columns[0]:'IDFINAL'},inplace=True) +# if dbf.columns[0] != 'IDFINAL': +# dbf.rename(columns={dbf.columns[0]:'IDFINAL'},inplace=True) +dbf.sort_values('IDFINAL', inplace=True) +gdf.sort_values('IDFINAL', inplace=True) +dbf.drop(columns=['geometry'], inplace=True) +same_cols = gdf.columns[gdf.columns.isin(dbf.columns)] +gdf.rename_geometry('geom',inplace=True) +dbf.drop_duplicates(inplace=True) +dbf.reset_index(drop=True,inplace=True) +dbf.columns = dbf.columns.str.lower() +gdf.columns = gdf.columns.str.lower() +dbf.rename(columns=dic,inplace=True) +gdf.rename(columns=dic,inplace=True) +dbf.drop(columns=[ + 'clemax','cle_code','codevegfin','code_final','lettre','hectares','lib_delphi','lien_habit','lien_typol','evolutif_m','codedelphi', + 'cb_carte','cb_2','ch_eur25','ch_stat','cb_choix','choix','codedelphine',], inplace=True, errors='ignore') +gdf.drop(columns=[ + 'surface','hectares','support','echelle','perimetr', + 'part','lettre','phyto','code_cb','cd_eur15','statut','type1','type2','cod_mil',], inplace=True, errors='ignore') +dbf.loc[dbf.statut=='Habitat non désigné','statut'] = None +dbf.loc[dbf.statut=='A préciser','statut'] = None +dbf.loc[dbf.code_cb=='A préciser','code_cb'] = None +dbf.loc[dbf.phyto=='A préciser','phyto'] = None +dbf.loc[dbf.phyto=='Néant','phyto'] = None +#### PNE #### +# dbf.loc[dbf.eur25=='Néant','eur25'] = None +# dbf.loc[dbf.l_eur25=='Néant','l_eur25'] = None +# dbf.loc[dbf.code_cb=='Néant','code_cb'] = None +# dbf.part = dbf.part.astype(int).astype(str) +# dbf.sort_values(['idfinal','part'], ascending=[True,False], inplace=True) + +# dbf.dropna(axis=0, subset=['code_cb'], inplace=True) + +dbf.sort_values(['idfinal','part'],ascending=[True,False],inplace=True) +tt = [list(range(1,dbf[dbf.idfinal==i].shape[0]+1)) for i in dbf.idfinal.unique() ] +dbf['num'] = [item for sublist in tt for item in sublist] + +# dbf.rename(columns={'code_eur27':'code_eur27.'}, inplace=True) +dbf.rename(columns={'code_eur15':'code_eur15.'}, inplace=True) +# dbf.rename(columns={'eur25':'code_eur25.'}, inplace=True) +# dbf.rename(columns={'l_eur25':'l_eur25.'}, inplace=True) + +gdf.set_index(['idfinal','idorig','secteur'], inplace=True) +dbf.set_index(['idfinal','idorig','secteur'], inplace=True) +# gdf.set_index(['idfinal','idorig'], inplace=True) +# dbf.set_index(['idfinal','idorig'], inplace=True) +dbf.set_index('num',append=True, inplace=True) +dbf2 = dbf.unstack().copy() +dbf2.dropna(axis=1,how='all', inplace=True) +lst_col = dbf2.columns.get_level_values(0).unique() +stt = ['statut%s'%i for i in range(1,dbf2.iloc[:,dbf2.columns.get_level_values(0)=='statut'].shape[1]+1)] +stc = ['statutcart%s'%i for i in range(1,dbf2.iloc[:,dbf2.columns.get_level_values(0)=='statutcart'].shape[1]+1)] +ste = ['site%s'%i for i in range(1,dbf2.iloc[:,dbf2.columns.get_level_values(0)=='site'].shape[1]+1)] +dbf2.columns = [x+str(j) for x,j in dbf2.columns] +for s in stt: + dbf2.loc[dbf2.statut1.isna(),'statut1'] = dbf2.loc[dbf2.statut1.isna(),s] +for s in stc: + dbf2.loc[dbf2.statutcart1.isna(),'statutcart1'] = dbf2.loc[dbf2.statutcart1.isna(),s] +dbf2.drop(columns=[ + *stt[1:],*ste[1:],*stc[1:] + ],inplace=True) +dbf2.rename(columns={ + 'statut1':'statut', + 'statutcart1':'statutcart', + 'site1':'site' + }, inplace=True) +dbf2.loc[dbf2.statut.isna(),'statut'] = 'N.D' +dbf2.part1 = dbf2.part1.astype(int) +dbf2.columns = dbf2.columns.str.replace('code_cb','code_hab',regex=True) +dbf2.columns = dbf2.columns.str.replace('part','n_hab',regex=True) +df = pd.merge(gdf,dbf2,how='inner',right_index=True,left_index=True) +df.geom = df.buffer(0) +df = pycen.tools.Polygons_to_MultiPolygon(df) +df.to_postgis( + name=name, + con=con, + schema='habitat', + if_exists='replace', + index=True, + geom_col='geom', +) + + + + +# dbf2[dbf2.columns[dbf2.columns.str.contains('type')]] +# dbf2[dbf2.columns[dbf2.columns.str.contains('phyto')]] +# dbf2[dbf2.columns[dbf2.columns.str.contains('phyto')]][~dbf2.phyto12.isna()] +# dbf2[dbf2.columns[dbf2.columns.str.contains('libelle')]] +# dbf2[dbf2.columns[dbf2.columns.str.contains('libelle')]][~dbf2.libelle13.isna()] +# dbf2[dbf2.columns[dbf2.columns.str.contains('codeveg')]] +# dbf2[dbf2.columns[dbf2.columns.str.contains('codeveg')]][~dbf2.codeveg13.isna()] +# dbf2.iloc[:,dbf2.columns.get_level_values(0)=='type'] +# dbf2.iloc[:,dbf2.columns.get_level_values(0)=='statut'] + + +# dbf2.columns = [x+str(j) for x,j in dbf2.columns] +# dbf2.part1 = dbf2.part1.astype(int) + diff --git a/3_AZALEE/insert_ps.py b/3_AZALEE/insert_ps.py new file mode 100644 index 0000000..b18b84d --- /dev/null +++ b/3_AZALEE/insert_ps.py @@ -0,0 +1,1479 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : recup_ps.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +# import re +from contextlib import AsyncExitStack +from os import register_at_fork +import types +from numpy.lib.shape_base import expand_dims +import pandas as pd +import geopandas as gpd +from pandas.io.pytables import incompatibility_doc +from shapely import wkb +import numpy as np +from sqlalchemy.sql.expression import column +# from pycen import bdd +from sys import exit +import pycen +# from sqlalchemy import create_engine +# from sqlalchemy.engine import URL +# from geoalchemy2 import Geometry + + + +# Parametres bdd IN +# user = 'cen_admin' +# pwd = '#CEN38@venir' +# adr = '192.168.0.189' +# base = 'bd-cen-38' +# schema = 'pelouse_seche' +# # schema = 'habitat' +# url = URL.create('postgresql+psycopg2', +# username=user, +# password=pwd, +# host=adr, +# database=base, +# ) +# con = create_engine(url) +con = pycen.con_bdcen +con_ps = pycen.con + +def get_pers(): + sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1' + ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme') + df_pers = pd.read_sql( + sql = sql, + con = pycen.con) + df_pers.replace([None],'',inplace=True) + df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom + df_pers['auteur'] = df_pers['auteur'].str.strip() + df_pers['organisme'] = df_pers['organisme'].str.strip() + return pycen.pers.get_auteur() + + +# def get_idgeomsite(lst_site): +# ''' +# :lst_site: tuple. +# ''' +# sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 ' +# 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id ' +# 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site) +# df = pd.read_sql( +# sql = sql, +# con = pycen.con) +# return df + +def format_legende(leg): + dic = { + + } + df = pd.DataFrame({'leg':leg}) + df.replace( + ['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'], + ['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'], + regex=True, inplace=True) + return df + +def ident_format_date(col): + return + + +# "%d/%m/%Y" +def format_date(df): + df = df.copy() + + check_date = [x.startswith('date') or x.endswith('date') for x in df.columns] + cols_date = df.columns[check_date] + if 'date' not in df.columns and not cols_date.empty: + # df[cols_date].replace({'/':None},inplace=True) + for col in cols_date: + df[col].replace({'/':None},inplace=True) + df[col] = pd.to_datetime(df[col]) + # df.loc[df[col].notna(),col] = pd.to_datetime(df[df[col].notna()][col]) + + df['date'] = df[cols_date].max(axis=1) + + if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"', + '"PS_CHAMBARAN_CEN38_2013"']: + df.date = pd.to_datetime(df.date,format='%d/%m/%Y') + + if table == '"PS_BELLEDONNE_CEN38_2014"': + df.date = pd.to_datetime(df.date,format='%d%b%Y') + + if table == '"PS_BIEVRE_CEN38_2016"': + df.date = df.date.astype(str).replace('\.','',regex=True) + df.date = df.date.str.rsplit(' ',1).str[0]\ + .replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True) + df.date = pd.to_datetime(df.date,format='%d %b %Y') + + if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"': + # df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int) + df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str) + df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date']) + + if 'annee' in df.columns and 'date' not in df.columns: + # Si pas de date mais année précisée + df['annee'] = df['annee'].astype(int) + df['date'] = df['annee'].astype(str) + '-06-01' + df.date = pd.to_datetime(df.date,format='%Y-%m-%d') + elif any(df.date.astype(str).str.len() <= 4): + # Si dates non-homogènes, cohexistance date&année + d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique() + if None in d: + df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01' + else: + df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01' + df.date = pd.to_datetime(df.date,format='%Y-%m-%d') + else: + df.date = pd.to_datetime(df.date) + + return df + + +def normalize_auteur(lst_auteur): + ''' + lst_auteur : pd.Series + ''' + dic = { + 'Lise Duconte':'DUCONTE Lise', + 'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie', + 'CHABERT Chloé (AVENIR)':'CHABERT Chloé', + 'CHABERT Chloé (AVNEIR)':'CHABERT Chloé', + 'PACHE Gilles (CBNA)':'PACHE Gilles', + 'Gilles PACHE':'PACHE Gilles', + 'JAMEAU Laura (AVENIR)':'JAMEAU Laura', + 'MARCIAU Roger (AVENIR)':'MARCIAU Roger', + 'Roger MARCIAU':'MARCIAU Roger', + 'Aude Massa':'MASSA Aude', + 'Aude MASSA':'MASSA Aude', + 'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse', + 'Romain BARTHELD':'BARTHELD Romain', + 'VILLARET J.C.':'VILLARET Jean-charles', + 'Jean-Charles VILLARET':'VILLARET Jean-charles', + 'Fabien ANTHELME':'ANTHELME Fabien', + 'Jean-Christophe GATTUS':'GATTUS Jean-christophe', + 'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis', + 'Jonathan MALINEAU':'MALINEAU Jonathan', + 'Aurélien DAUTREY':'DAUTREY Aurélien', + 'Etienne MARY':'MARY Etienne', + 'Alix GUEDOU': 'GUEDOU Alix', + 'BEGUIN Lucile': 'BÉGUIN Lucile', + } + lst_aut = lst_auteur.copy() + lst_aut.replace(' & | - ',',',regex=True,inplace=True) + + if any(lst_aut.str.contains(',')): + lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1) + lst_aut.replace(dic,inplace=True) + + df_aut = lst_aut.str.rsplit(' ',n=1,expand=True) + if df_aut.shape[1]==2: + df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower() + df_aut[0] = df_aut[0].str.upper() + df_aut[2] = df_aut[0] + df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1] + lst_aut = df_aut[2].copy() + else : lst_aut = df_aut[0].copy() + aut = lst_aut.unique() + res = [x for x in aut if x not in [*get_pers().nom_prenom]] + if res: + print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res) + new = new_auteur(res) + lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True) + lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x)) + return lst_aut + + +def new_auteur(new): + ''' + new : list + ''' + df = pd.DataFrame({'auteur': new}) + dic = {'aut':{}, 'org':[]} + + # Pour chaque auteur + for pers in df.auteur: + print(pers) + + while True: + newaut = input("Insérer un nouvel auteur ? (Y/N)\n") + if not newaut[0].upper() in ['Y','N']: + continue + else: break + # Si : pas d'insertion d'un nouvel auteur + if newaut[0].upper() == 'N': + while True: + rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n") + if not rep[0].upper() in ['Y','N']: + continue + else: break + if rep[0].upper() == 'Y': + while True: + org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] + reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) + if not reporg in [*org.keys()]: + continue + else: break + while True: + aut = get_pers().sort_values('id') + aut = aut[aut.id_organisme == reporg] + aut = aut.nom_prenom.to_dict() + repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut)) + if not repaut in [*aut.keys()]: + continue + else: break + dic['aut'] = {**dic['aut'],pers:aut[repaut]} + dic['org'] = [*dic['org'],org[reporg]] + # Autrement : insertion d'un nouvel auteur + else: + while True: + print("Auteur : %s"%pers) + rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n") + if not rep[0].upper() in ['Y','N']: + continue + elif rep[0].upper() == 'Y': + repaut = input("Saisisser un nouveau nom :\n") + dic['aut'] = {**dic['aut'],pers:repaut} + pers = repaut + break + else: + dic['aut'] = {**dic['aut'],pers:pers} + break + + while True: + org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] + org = {-1: 'Autre', **org} + reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) + if not reporg in [*org.keys()]: + continue + elif reporg == -1: + reporg = input("Saisisser un nouvel organisme :\n") + dic['org'] = [*dic['org'],reporg] + insert_newOrganisme(reporg) + org = pycen.pers.get_organisme() + idorg = org[org.nom==reporg].index[0] + insert_newAuteur(pers,idorg) + break + else: + dic['org'] = [*dic['org'],org[reporg]] + insert_newAuteur(pers,reporg) + break + + return dic + + +def insert_newAuteur(aut,idorg): + aut = pd.Series(aut) + idorg = pd.Series(idorg) + df = pd.DataFrame({'aut':aut,'id_organisme':idorg}) + # print(df) + tmp = df.aut.str.rsplit(' ',1,expand=True) + if tmp.shape[1] == 1: + df[['nom']] = tmp + elif tmp.shape[1] == 2: + df[['nom', 'prenom']] = tmp + else: + raise ValueError('NOM Prénom est de longueur inconnu :\ntmp') + del df['aut'] + try: + df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema, + if_exists='append', index=False) + print("Nouvel auteur inséré : '%s'"%aut) + except: + print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut) + + + +def insert_newOrganisme(nom): + nom = pd.Series(nom) + df = pd.DataFrame({'nom':nom}) + try: + df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema, + if_exists='append', index=False) + print("Nouvel organisme inséré : '%s'"%nom) + except: + print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom) + + + +def normalize_colname(df): + df.columns = df.columns.str.lower() + df.columns = df.columns.str.replace('__','_',regex=True) + if df.columns.str.contains('cb_hab').any(): + df.columns = df.columns.str.replace('cb_hab','code_hab',regex=True) + if df.columns.str.contains('cod_hab').any(): + df.columns = df.columns.str.replace('cod_hab','code_hab',regex=True) + return df.rename(columns={ + 'n_polygone':'ident', + # 'id_site':'ident', + 'code-ug':'ident', + 'ident_':'ident', + 'id':'ident', + 'idfinal':'id_origine', + 'date_':'date', + 'obs':'auteur', + 'structur':'structure', + 'pratiq_' :'pratiques', + 'pratique' :'pratiques', + 's_p_brous' :'%_embrous', + 's_p_brouss':'%_embrous', + 'taux_embrou':'%_embrous', + 'niv__embro':'niv_embrous', + 'niv_embro' :'niv_embrous', + 'niv_embrou' :'niv_embrous', + 'niv_emb' :'niv_embrous', + 'embroussaillement' :'niv_embrous', + 'taux_recvmt':'%_recouvmnt', + 'recouvrement':'recouvmnt', + 'recouvreme':'recouvmnt', + 'recouvr':'recouvmnt', + 'recouv' :'recouvmnt', + 'recouvr_' :'recouvmnt', + 'remarque' :'remarques', + 'remarq_' :'remarques', + 'legendes' :'legende', + 'legend' :'legende', + 'sources' :'source', + 'surf' :'surface', + 'geometry' :'geom', + }) + + +def get_id_auteur(lst_author): + sch = 'personnes' + tabP = 'personne' + tabO = 'organisme' + # sql = """ + # WITH str as + # (SELECT id FROM {sch}.{tabO} + # WHERE lower(nom) = lower(%(structure)s) OR lower(abbrev) = lower(%(structure)s)), + # tmp as + # (SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur, id_organisme FROM {sch}.{tabP}) + # SELECT tmp.* FROM tmp,str WHERE auteur IN %(lst_author)s + # AND id_organisme IN (str.id) """.format(sch=sch,tabP=tabP,tabO=tabO) + + sql = """ + WITH tmp AS ( + SELECT + pers.id, + TRIM(CONCAT(pers.nom,' ',pers.prenom)) AS auteur, + pers.id_organisme, + orga.nom organisme, + orga.abbrev orga_abbrev + FROM {sch}.{tab_auth} pers JOIN {sch}.{tab_orga} orga ON pers.id_organisme = orga.id + ) + SELECT * FROM tmp + WHERE (auteur,organisme) IN %(lst_author)s + OR (auteur,orga_abbrev) IN %(lst_author)s + """.format(sch=sch, tab_auth=tabP, tab_orga=tabO) + + X = lst_author.to_dict('index') + XX = [tuple(X[x].values()) for x in X] + + df = pd.read_sql( + sql = sql, + con = pycen.con, + params = { + 'lst_author': tuple(XX) + }) + return df + + +def existing_ps(df): + gdf = pycen.ps.get_sitesGeom() + gdf['area'] = gdf.area + # ovl = gpd.overlay(df,gdf) + ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference' + ovl['ovl_area'] = ovl.area + ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area'] + + return df + +def define_siteOrg(df): + gdf = pycen.get_districtNat()[['abrev','geom']] + tmp = df[['geom']].copy().set_geometry('geom') + tmp.reset_index(0, inplace=True) + tmp = gpd.overlay(tmp,gdf, how='intersection') + tmp['area'] = tmp.area + tmp.set_index('area',inplace=True) + tmp.sort_index(inplace=True) + tmp.drop_duplicates(subset=['index'],keep='last',inplace=True) + tmp.reset_index(drop=True,inplace=True) + del tmp['geometry'] + df = df.merge(tmp,how='left',left_index=True,right_on='index') + del df['index'] + df.rename(columns={'abrev':'org'}, inplace=True) + return df + # tmp[tmp.id_site==334] + +# 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by' +def ident_newsite(df,rcvmt=10): + ''' + Identification des nouvelles géometries ou des mises + à jours des géométries par recouvrement. + MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0) + ''' + maj = [] + if 'id_site' in df.columns: + maj = df[~df.id_site.isna()] + df = df[df.id_site.isna()] + if df.empty: + return df,df,maj + # Récupération de la couche pelouses_seches en bdd. + sql = 'SELECT site_code,geom FROM ps.v_pelouseseches' + v_ps = gpd.read_postgis(sql, pycen.con) + # Identification des superpositions new_site/old_site + df_inters = gpd.sjoin(df,v_ps, how='left') + del df_inters['index_right'] + news1 = df_inters[df_inters.site_code.isna()].id_origine + lst_old_site = df_inters.site_code.unique() + v_ps = v_ps[v_ps.site_code.isin(lst_old_site)].copy() + v_ps.loc[:,'surf'] = v_ps.area + # Re-définition des columns géometriques + # if not isinstance(df,gpd.GeoDataFrame): + # df.set_geometry('geom', inplace=True, crs=2154) + # if not isinstance(v_ps,gpd.GeoDataFrame): + # v_ps.set_geometry('geom', inplace=True, crs=2154) + # Calcul du recouvrement + tmp = gpd.overlay(v_ps,df[['id_origine','geom']],how='intersection',make_valid=True,keep_geom_type=False) + if isinstance(tmp, gpd.GeoDataFrame) and tmp.geometry.name !='geom': + tmp.rename_geometry('geom',inplace=True) + tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf'] + # Identification des sites : MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0) + # Limite recouvrement = 10% + tmpp = tmp[tmp.perc_rcvmt > rcvmt] + site_maj = tmpp[ + (~tmpp.id_origine.duplicated(keep=False)) & + (~tmpp.site_code.duplicated(keep=False)) + ].id_origine + # site_repl = tmp[(tmp.perc_rcvmt > 10)&(tmp.id_origine.duplicated(keep=False))].id_origine.unique() + # news2 = tmp[(tmp.perc_rcvmt <= 10)&(~tmp.id_origine.isin([*site_maj,*site_repl]))].id_origine + + # Isolement des sites à mettre à jour + # maj2 = gpd.sjoin( + # df[df.id_origine.isin(site_maj)], + # v_ps.loc[v_ps.site_code.isin(tmpp.site_code),['site_code','geom']]) \ + # .drop(columns=['index_right']) + maj2 = df[df.id_origine.isin(site_maj)].merge( + tmpp[['site_code','id_origine']], on=['id_origine']) + maj2['id_site'] = maj2['site_code'] + del maj2['site_code'] + if maj : df_maj = pd.concat([maj,maj2]) + else : df_maj = maj2 + # Isolement des nouveaux sites + orig_maj_all = df_maj.id_origine + id_maj_all = df_maj.id_site + df_new = df[~df.id_origine.isin(orig_maj_all)] + # Isolement des correspondance new_site / old_site_toClose + df_cor = tmpp.loc[ + (~tmpp.site_code.isin(id_maj_all))&(~tmpp.id_origine.isin(orig_maj_all)), + ['site_code','id_origine'] + ] + df_cor.rename(columns={'site_code':'id_site_old'}, inplace=True) + + return df_new, df_cor, df_maj + + + +def define_siteName(df,rcvmt=10): + ''' + Return df_new, df_replace, df_maj + ''' + dfnew, dfcor, df_maj = ident_newsite(df,rcvmt) + df2 = df[~df.index.isin(df_maj.index)].copy() + if not df2.empty: + df2 = define_siteOrg(dfnew.copy()) + df2['dept'] = '38' + # df2['num'] = None + if 'org' in df2.columns: + for org in df2.org.unique(): + tmp = pd.concat([ + pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu), + pycen.sites.get_sitesInfos(milieu='Trame thermophiles') + ]) + # tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0]) + tmp = tmp[tmp.org == org] + if not tmp.empty: + num = tmp.num.astype(int).max() + seq = range(num+1, num + df2.loc[df2.org==org].shape[0]+1) + df2.loc[df2.org==org,['num']] = list(seq) + else: + seq = range(1, df2.loc[df2.org==org].shape[0]+1) + df2.loc[df2.org==org,['num']] = list(seq) + else: + tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu) + + df2.loc[~df2.num.isna(),'num'] = df2[~df2.num.isna()].num.astype(int).astype(str) + df2.loc[df2.num.str.len() == 1,'num'] = '000'+ df2.loc[df2.num.str.len() == 1,'num'] + df2.loc[df2.num.str.len() == 2,'num'] = '00' + df2.loc[df2.num.str.len() == 2,'num'] + df2.loc[df2.num.str.len() == 3,'num'] = '0' + df2.loc[df2.num.str.len() == 3,'num'] + df2['id_site'] = df2['dept'] + df2['org'] + df2['num'] + + # Jointure id_site_new / id_site_old + df_new = df2[~df2.id_origine.isin(dfcor.id_origine)] + df_rep = df2[df2.id_origine.isin(dfcor.id_origine)] + df_cor = pd.merge(dfcor,df2[['id_origine','id_site']], on='id_origine') + + + df_cor.drop(columns=['id_origine'], inplace=True) + df_cor.rename(columns={'id_site':'id_site_new'}, inplace=True) + df_replace = { + 'df' : df_rep, + 'cor': df_cor + } + else: + df_new = dfnew + df_replace = {'df' : dfcor} + return df_new, df_replace, df_maj + + + +def normalize_formatIdsite(id_site): + import re + df = pd.DataFrame({'id_site':id_site}) + df['dept'] = '38' + df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]] + del df['other'] + df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] + df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] + df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] + df['id_site'] = df['dept'] + df['org'] + df['num'] + return df.id_site + + +def normalize_paturage(pat): + return pat.str.lower().replace( + ['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'], + ['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True) + + +def normalize_pratiques(prat): + dic = { + 'pâturage equin':'pâturage équin', + 'pâturage bovin':'pâturage bovin', + 'paturâge cervidé':'pâturage cervidé', + 'paturâge anin':'pâturage asin', + 'broyer': 'broyage', + 'truffier': 'broyage', + 'plantation': 'broyage', + 'culture' : 'labourage', + 'abandon recent': 'abandon', + 'abandon verger': 'abandon', + 'remblais et feu' : 'incertain', + 'remblais, feu' : 'incertain', + 'caprin': 'pâturage caprin', + 'ovin': 'pâturage ovin', + 'équin': 'pâturage équin', + 'equin': 'pâturage équin', + 'bovin': 'pâturage bovin', + 'paturage': 'pâturage', + 'paturâge': 'pâturage', + 'pâturage difficile': 'pâturage', + 'f' : 'fauche', + 'p' : 'pâturage', + '0' : 'N.D', + 'i' : 'incertain', + 'a' : 'abandon', + 'pv': 'pâturage', + 'b' : 'broyage', + 'pc': 'pâturage caprin', + 'po': 'pâturage ovin', + 'pe': 'pâturage équin', + 'pb': 'pâturage bovin', + '-' : 'N.D', + '0' : 'N.D', + 'ND' : 'N.D', + 'n.d' : 'N.D', + 'chemin carrossable non goudronné' : 'N.D', + 'plantation de châtaigniers' : 'broyage', + } + p = prat.replace(['.et','/'],[',',', '],regex=True).copy() + p = p.str.lower() + p = normalize_paturage(p) + p = p.replace(dic) + p.fillna('N.D', inplace=True) + p[p.str.contains('\?| ou ',na=False)] = 'incertain' + pp = p.str.split(',',expand=True) + pp.columns = 'p' + pp.columns.astype(str) + for c in pp.columns: + pp[c] = pp[c].str.strip() + pp[c] = pp[c].replace(dic) + return pp + + +def select_ID(df,sch,tab): + df = df.copy().dropna(axis=1,how='any') + pk = pycen.tools.__get_pkey__(pycen.con,tab,sch) + if pk['constrained_columns']: + pkey = pk['constrained_columns'] + else : pkey = [] + sql = 'SELECT %s FROM %s.%s'%(','.join([*pkey,*df.columns]),sch,tab) + df.replace("'","''",regex=True,inplace=True) + if 'geom' in df.columns: + del df['geom'] + if 'date' in df.columns: + df['date'] = df['date'].astype(str) + elif 'date_deb' in df.columns: + df['date_deb'] = df['date_deb'].astype(str) + if 'id_cb' in df.columns: + df['id_cb'] = df['id_cb'].astype(str) + column = '(%s)'%','.join(df.columns) + df_tuple = tuple(df.itertuples(index=False, name=None)) + sql = sql + ' WHERE %s IN %s'%(column,df_tuple) + if 'nan' in sql: sql = sql.replace('nan','NULL') + if ',)' in sql: sql = sql.replace(',)',')') + if '"' in sql: sql = sql.replace('"',"'") + + # print(sql) + return pd.read_sql( + sql=sql, + con=pycen.con) + + +def insertAutorAttrs(df,colname_rsite, sch, tab): + df = df.copy() + df = df[['id','auteur','structure']].copy() + # structure = [df.structure.unique()] + # lst_author = df[['auteur','structure']].drop_duplicates() + if any(df.auteur.str.contains(',')): + tmp = df.auteur.str.split(',',expand=True)\ + .stack().droplevel(-1)\ + .str.strip() + tmp = pd.DataFrame({'auteur':tmp}) + df = pd.merge(df[['id','structure']],tmp,how='left',left_index=True,right_index=True) + lst_author = df[['auteur','structure']]\ + .drop_duplicates()\ + .reset_index(drop=True) + df_autor = get_id_auteur(lst_author=lst_author) + for r,row in df_autor.iterrows(): + df.loc[ + (df.auteur.str.contains(row.auteur)) & + ((df.structure==row.organisme) | (df.structure==row.orga_abbrev)), + 'auteur'] = row.id + del df['structure'] + # df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) + df.columns = [colname_rsite, 'id_auteur'] + df.drop_duplicates(inplace=True) + try : + df.to_sql( + name=tab, + con=pycen.con, + schema=sch, + index=False, + if_exists='append', + method='multi' + ) + print('Insert author sites data OK !') + return 'Insert author sites data OK !' + except Exception as e: + print('ERROR : News author insert impossible !') + print(e) + + +def _altertype(df1,df2,con=None,table_name=None, schema=None): + if con and table_name and schema: + tp = pycen.tools.__get_dtype__(con,table_name, schema) + same_col = df1.columns[df1.columns.isin(df2.columns)] + if 'date' in df2.columns: + df2['date'] = df2['date'].astype(str) + if 'geom' in same_col: + same_col = same_col.drop('geom') + for col in same_col: + if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int: + if df1.shape[0] == df2.shape[0]: + df1[col] = df1[col].astype(int) + else : + df2[col] = df2[col].astype(float) + elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float: + df1[col] = df1[col].astype(float) + elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str: + df1[col] = df1[col].astype(str) + elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == object: + df1[col] = df1[col].astype(str) + return [df1,df2] + + +def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur): + df_ins = df_ins.copy() + col_auth = ['auteur','structure'] + if 'date' in df_ins.columns: + df_ins['date'] = df_ins['date'].astype(str) + elif 'date_deb' in df_ins.columns: + df_ins['date_deb'] = df_ins['date_deb'].astype(str) + + tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema) + tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema) + if tab_data in tab_sit: + sch = pycen.sites.schema + elif tab_data in tab_ps: + sch = pycen.ps.schema + + # ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data) + ids = select_ID(df_ins[[id_data]],sch,tab_data) + if 'id_site' in ids.columns: + if tab_data == 'r_sites_geom': col = 'id' + else: col = 'id_geom_site' + tp = ids.groupby(['id_site'])[col].max() + ids = ids[ids[col].isin(tp)] + # join_on = [id_data] + same_col = df_ins.columns[df_ins.columns.isin(ids.columns)] + if len(same_col) > 1 and ids[same_col].duplicated().any(): + raise ValueError('Doublon détecté dans la jointure [df_ins VS ids] !') + if 'geom' in same_col: + same_col = same_col.drop('geom') + join_on = pd.Series([*same_col,id_data]).unique() + # if df_ins.shape[0] != ids.shape[0]: + # join_on = same_col + if 'date' in ids.columns: + ids['date'] = ids['date'].astype(str) + df_ins,ids = _altertype(df_ins,ids) + + if tab_data != 'sites': + # df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*same_col], how='left') # A REVOIR + df_ins = df_ins[[*same_col,*col_auth]].merge(ids, on=[*join_on], how='left') # A REVOIR + # df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=['id_site'], how='left') # A REVOIR + insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur) + + + +def insert_site(df): + df = df.copy() + if 'nom' not in df.columns: + df['nom'] = 'Inconnu' + if 'type_site' not in df.columns: + df['type_site'] = 'N.D.' + if 'type_milieu' not in df.columns: + df['type_milieu'] = 'Pelouses sèches' + # if 'remarques' not in df.columns: + # df['remarques'] = None + + df = df[['id_site','nom','date','auteur','type_site','type_milieu', + 'structure' + # 'remarques' + ]] + drop_col = ['auteur','structure'] + typs = pycen.sites._get_typ_site() + typm = pycen.sites._get_typ_milieux() + + for t in df.type_milieu.unique(): + df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0] + for s in df.type_site.unique(): + df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0] + + df['id_type_milieu'] = df['id_type_milieu'].astype(int) + # df['id_type_site'] = df['id_type_site'].astype(str) + + df['dept'] = df.id_site.str[:2] + df['org'] = df.id_site.str[2:-4] + df['num'] = df.id_site.str[-4:] + df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True) + + columns = [col['name'] for col in pycen.sites.columns_sitetab] + drop_col + c = df.columns[df.columns.isin(columns)] + df = df[c] + print(df) + + try: + df[df.columns.drop(drop_col)].to_sql( + name = 'sites', + con = pycen.con, + schema = pycen.sites.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news sites OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news sites impossible !') + return [df,False] + + +def check_org_lot(lot): + """ + :lot: list. liste des lots + + return + ------- + DataFrame des id_lot de la liste lot. + """ + tab_lot = pycen.sites._get_lots() + not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ] + if not_intab: + df = pd.DataFrame({'libelle':not_intab}) + df.to_sql( + name='lots', + con=pycen.con, + schema='sites', + index=False, + if_exists='append', + method='multi', + ) + tab_lot = pycen.sites._get_lots() + tab_lot = tab_lot[tab_lot.libelle.isin(lot)] + return tab_lot[['id_lot','libelle']] + + + +def insert_geom(df): + drop_col = ['auteur','structure'] + dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'} + c = ['id_site','geom','date','remarques','legende','table_org','id_origine',*drop_col] + cc = df.columns[df.columns.isin(c)] + df = df[cc].copy() + df.set_geometry('geom', inplace=True) + + colindict = [ *df.columns[df.columns.isin(dic.keys())] ] + if colindict: + df.rename( + columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}, + inplace=True, + ) + if 'table_org' in df.columns: + tab = [*df.table_org.unique()] + lots = check_org_lot(tab) + df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left') + del df['table_org'] + del df['libelle'] + + try: + df[df.columns.drop(drop_col)].to_postgis( + name = 'r_sites_geom', + con = pycen.con, + schema = pycen.sites.schema, + if_exists = 'append', + index = False, + # geom_col = 'geom' + ) + print('Insert news geom OK !') + except Exception as e: + print(e) + print('PS : Insert news geom impossible !') + return [df,False] + + return [df,True] + + +def get_idGeomSite(lst_site,ps_statut='actif'): + ''' + :lst_site: list or pd.Series . + :ps_statut: str. ['actif', 'history', 'all']. + Statut des sites à récupérer, + 'actif'; Date_fin IS NULL + 'history'; A une Date_fin + 'all'; Tous les Sites + ''' + id_geom = pycen.ps.get_sitesGeom( + id_site=[*lst_site], statut=ps_statut) + tp = id_geom.groupby('id_site').id.max() + id_geom = id_geom[id_geom.id.isin(tp)] + if not id_geom.empty: + id_geom = id_geom[['id','id_site']] + id_geom.columns = ['id_geom_site','id_site'] + return id_geom + + +def insert_attrsPS(df,ps_statut='actif'): + params = [] + drop_col = ['auteur','structure'] + + for p in ['%_embrous','niv_embrous','%_recouvmnt','recouvmnt']: + if p in df.columns: + params = [*params, p] + c = ['id_site','date','pratiques',*params,*drop_col] + + for p in ['statut','pratiques']: + if p in df.columns: + c = [*c, p] + cc = df.columns[df.columns.isin(c)] + df = df[cc].copy() + + if 'pratiques' in df.columns and not all(df.pratiques.isna()): + df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns') + del df['pratiques'] + + for col in params: + df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True) + + if 'niv_embrous' in df.columns: + df.niv_embrous.replace(['nan'],['N.D'],inplace=True) + + if 'recouvmnt' in df.columns: + df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True) + + if 'statut' in df.columns: + df.statut.replace(['ND'],['N.D'],inplace=True) + # for t in df.dtypes[df.dtypes == int].index: + # df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str) + taux = False + if '%_embrous' in df.columns: + taux = True + df.niv_embrous = df.niv_embrous + ';' + df['%_embrous'] + del df['%_embrous'] + if '%_recouvmnt' in df.columns: + taux = True + df.recouvmnt = df.recouvmnt + ';' + df['%_recouvmnt'] + del df['%_recouvmnt'] + df.set_index(['id_site','date',*drop_col], inplace=True) + df = df.stack().reset_index(-1) + df.columns = ['type','param'] + df.loc[df.type.str.len() < 4,'type'] = 'pratique' + if taux: + df[['param','taux']] = df.param.str.split(';',expand=True) + df.taux.replace(['nan','None'],[None,None],inplace=True) + + df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True) + df.param.replace({'ND':'N.D'}, inplace=True) + df.reset_index(inplace=True,drop=False) + + param = pycen.ps.get_param() + typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param') + for t in df.type.unique(): + tmp = param[param.type == t] + df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id]) + df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description], [*tmp.id]) + df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.description.str.lower()], [*tmp.id]) + df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True) + df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left') + # del df['id_site'] # id_site inséré dans la table depuis 19-07-2022 + del df['type'] + df.rename(columns={'param':'id_param'}, inplace=True) + + # df.loc[df.id_param=='None','id_param'] = None + df.replace({'None':None}, inplace=True) + df.dropna(subset=['id_param'], inplace=True) + try: + df[df.columns.drop(drop_col)].to_sql( + name = 'r_site_param', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news attrs OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news attrs impossible !') + return [df,False] + + +def split_codehab(lst_codehab): + lst = lst_codehab.copy() + lst = lst.str.split('x|X|,|&|/|\(|et',expand=True) \ + .stack().str.strip() \ + .str.replace('\)','',regex=True)\ + .droplevel(-1).reset_index(drop=False) + return lst + + +def format_codehab2insert(lst_codehab): + lst = lst_codehab.copy() + lst.replace('x|X|,|et','&',regex=True, inplace=True) + lst.replace('&','&;',regex=True, inplace=True) + lst.replace('/','/;',regex=True, inplace=True) + lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1) + lst.columns = ['ordre', 'code_hab'] + lst.ordre = lst.ordre + 1 + lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&' + lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/' + lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip() + lst.reset_index(drop=False, inplace=True) + + suspect = lst.code_hab.str.split('(',expand=True) + if suspect.shape[1] > 1: + suspect[0] = suspect[0].str.strip() + suspect[1] = suspect[1].str.replace('\)','',regex=True) + suspect.columns = ['code_hab','suspect'] + del lst['code_hab'] + # raise Exception('habitat suspecté en développement') + else: + suspect.columns = ['code_hab'] + del suspect['code_hab'] + lst = pd.merge(lst,suspect,right_index=True,left_index=True) + lst.set_index('id', inplace=True) + + return lst + + +def insert_cbPS(df): + drop_col = ['auteur','structure'] + df = df.copy() + df.date = df.date.astype(str) + df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip() + df.code_hab = df.code_hab.replace(['None','nan','0'],[None,None,None]) + df.dropna(subset=['code_hab'],inplace=True) + ids = select_ID(df[df.columns.drop(['code_hab',*drop_col])],sch=pycen.ps.schema,tab='r_site_habitat') + same_col = df.columns[df.columns.isin(ids.columns)] + if ids[same_col].duplicated().any(): + raise ValueError('Doublon détecté dans la jointure [df VS ids] !') + df,ids = _altertype(df,ids) + df = df.merge(ids, on=[*same_col], how='left') + df = df[['id', 'code_hab']].copy() + df.set_index('id',inplace=True) + # df = df.code_hab.str.split('x|,|&',expand=True) \ + # .stack().str.strip() \ + # .droplevel(-1).reset_index(drop=False) + # df = split_codehab(df.code_hab) + # df.columns = ['id_sitehab', 'code_hab'] + df = format_codehab2insert(df.code_hab) + df.index.name = 'id_sitehab' + df.reset_index(drop=False,inplace=True) + + try: + df.to_sql( + name = 'r_hab_cb', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news codes habitats OK !') + except Exception as e: + print(e) + print('PS : Insert news codes habitats impossible !') + + +def insert_habPS(df,ps_statut='actif'): + import re + drop_col = ['auteur','structure'] + c = ['id_site','date',*drop_col] + cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ] + df = df[cc].copy() + c_nhab = df.columns[df.columns.str.contains('n_hab')] + df[c_nhab] = df[c_nhab].astype(float) + cc = df.columns[(~df.columns.str.contains('hab')) ] + df.set_index([*cc], inplace=True) + # df.set_index(['id_site','auteur','date'], inplace=True) + # for t in df.dtypes[df.dtypes == int].index: + # df[t] = df[t].astype(str) + df = df.stack().reset_index(-1) + df.columns = ['param','value'] + df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]] + del df['other'] + df['index'] = df['index'].astype(int) + df['value'] = df['value'].replace(['-'],[None]) + df.dropna(subset=['value'], inplace=True) + df = df.set_index(['index','param'], append=True).unstack() + df = df.droplevel(0,axis=1) + df.code_hab.replace(['0','None','nan'],[None,None,None], inplace=True) + df.dropna(subset=['code_hab'],inplace=True) + # df['n_hab'] = df['n_hab'].astype(int) # Désactiver pour insertion de données sans n_hab + df.reset_index(inplace=True,drop=False) + df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left') + # del df['id_site'] # id_site inséré dans la table depuis 19-07-2022 + + # Reclassification des codes habitats / proportions d'habitats + if 'n_hab' in df.columns: + df.sort_values(['id_geom_site','n_hab','index'],ascending=[True,False,True], inplace=True) + else: + df.sort_values(['id_geom_site','index'],ascending=[True,True], inplace=True) + lst = [ list(range(1, df[df.id_geom_site==d].shape[0]+1 )) for d in df.id_geom_site.unique()] + flat_list = [item for sublist in lst for item in sublist] + df['index'] = flat_list + + try: + df[df.columns.drop([*drop_col,'code_hab'])].to_sql( + name = 'r_site_habitat', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news attrs habitats OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news attrs impossible !') + return [df,False] + + +def insert_legendCartoPS(df,ps_statut='actif'): + c = ['id_site','auteur','date','leg_carto'] #,'legende'] + cc = df.columns[df.columns.isin(c)] + df = df[cc].copy() + param = pycen.ps.get_listLegendePS() + df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id]) + df = df.merge(get_idGeomSite(df.id_site,ps_statut=ps_statut), on=['id_site'], how='left') + del df['id_site'] + del df['leg_carto'] + + try: + df[df.columns.drop('auteur')].to_sql( + name = 'r_site_legcarto', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news legendes OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news legendes impossible !') + return [df,False] + + +def filter_saisierror(df): + import re + tmp = df.copy() + lst_iderror = [] + del tmp['geom'] + cc = tmp.columns[(~tmp.columns.str.contains('hab')) ] + tmp.set_index([*cc], inplace=True) + tmp = tmp.stack().reset_index(-1) + tmp.columns = ['param','value'] + tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]] + del tmp['other'] + tmp['index'] = tmp['index'].astype(int) + tmp['value'] = tmp['value'].replace(['-'],[None]) + tmp.dropna(subset=['value'], inplace=True) + tmp = tmp.set_index(['index','param'], append=True).unstack() + tmp = tmp.droplevel(0,axis=1) + tmp.reset_index(inplace=True,drop=False) + lst_error1 = [] + if '_hab' in tmp.columns: + tmp.hab_.replace( + ['Xerobromenion erecti', + 'Mesobromion', + 'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',], + ['Xerobromion erecti', + 'Mesobromion erecti', + 'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',], + inplace=True) + + for hab in tmp.hab_.sort_values().unique(): + print(hab) + print(tmp[tmp.hab_ == hab].code_hab.unique()) + for cod in tmp[tmp.hab_ == hab].code_hab.unique(): + perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count() + if perc < 0.6: + if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']: + continue + print('\t'+cod + ' : '+ perc.astype(str)) + lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()] + df[df.id_site.isin(lst_error1)].to_postgis( + name='incoherence_habVScodehab', + con=con, + schema=schema, + if_exists='replace', + index=False, + # geom_col='geom', + index_label='id_site', + ) + if lst_error1: + print('%i'%len(lst_error1)) + + + cb = pycen.ref_hab().get_CB() + tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip() + tmp.code_hab = tmp.code_hab.replace(['0'],[None]) + tmp.dropna(subset=['code_hab'], inplace=True) + tmpp = tmp[['id_site','index','code_hab']].copy() + tmpp.set_index(['id_site','index'], inplace=True) + # if any(tmpp.code_hab.str.contains('x|,|&')): + # tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1) + tmpp = split_codehab(tmpp.code_hab) + tmpp.columns = ['id_site','index','code_hab'] + tmpp = tmpp[~tmpp.code_hab.isin(cb.id)] + lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()] + if 'n_hab' in tmp.columns: + lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()] + + if lst_error2: + print('ERROR code_hab : %i'%len(lst_error2)) + df[df.id_site.isin(lst_error2)].to_postgis( + name='PB_codehab_nonPresent_dans_corineBiotope', + con=con, + schema=schema, + if_exists='append', + index=False, + # geom_col='geom', + index_label='id_site', + ) + + lst_iderror = [*lst_error1,*lst_error2] + lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index + df.drop(lst_iderrorindex,inplace=True) + return df.sort_values('id_site') + +def _insert_site_(df,ps_statut='actif'): + df_ins, ins = insert_geom(df) + if ins: + insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur') + + df_ins, ins = insert_attrsPS(df,ps_statut) + if ins: + insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur') + + df_ins, ins = insert_habPS(df,ps_statut) + if ins: + if df_ins.shape[0] > 4000: + df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy() + df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy() + insert_cbPS(df_ins1) + insert_cbPS(df_ins2) + else: + insert_cbPS(df_ins) + insert_authordata( + df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat', + colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur') + + +def maj_site(df): + _insert_site_(df) + + +def new_site(df): + df_ins, ins = insert_site(df) + if ins: + insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') + + _insert_site_(df) + + +def replace_site(df, cor): + # Insertion des nouveaux codes site + df_ins, ins = insert_site(df) + if ins: + insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') + # remplissage de la table de correspondance old/new site + cor.to_sql( + name = 'r_site_maj', + con = pycen.con, + schema = 'sites', + if_exists = 'append', + index = False, + method = 'multi' + ) + # Insertion de la date de cloture des anciens sites + df_updt = pd.merge( + cor, df[['id_site','date']], + how='left', + left_on='id_site_new', + right_on='id_site') + df_updt = df_updt[['id_site_old','date']] + df_updt.columns = ['id','date_fin'] + pycen.update_to_sql( + df = df_updt, + con = pycen.con, + table_name='sites', + schema_name='sites', + key_name='id' + ) + # Suite de l'insertion des sites + _insert_site_(df) + +def check_overlaps(): + ''' + Vérification de la superposition des polygons + ''' + sql = ''' + SELECT v1.site_code site_code_old, v1.geom geom_old, v2.site_code site_code_new, v2.geom geom_new + FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2 + WHERE ST_OVERLAPS(v1.geom,v2.geom) = TRUE + AND v1.date_geom < v2.date_geom + AND v1.site_code <> v2.site_code; + ''' + res = gpd.read_postgis(sql,con_ps,geom_col='geom_old') + return res + + +if __name__ == "__main__": + + isin_bdd = True + schema = "habitat" + lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"', + '"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"', + '"PS_VERCORS_CEN38_2011"' + ] + from_table = '"cr_ECRIN_habitats_CBNA_2014"' + from_table = '"cr_VERCORS_habitats_CBNA_1999-2007"' + from_file = 'fwdcouchesinventairepelousessches2022/PS2022_UG.gpkg' + path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/' + + # org = from_file.split('/')[1] + tutu = pd.DataFrame() + if from_table is not None: + for table in lst_tab: + sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table) + df = gpd.read_postgis( + sql = sql, + con = con + ) + if 'id' in df.columns and 'n_polygone' in df.columns: + df['id'] = df['n_polygone'] + del df['n_polygone'] + elif 'id' in df.columns and 'polygon' in df.columns: + df['id'] = df['polygon'] + del df['polygon'] + elif 'id' in df.columns and 'ident' in df.columns: + df['id'] = df['ident'] + del df['ident'] + df = normalize_colname(df) + if 'remarques' in df.columns and 'legende' not in df.columns: + df['legende'] = df.remarques + # df.rename(columns={'id':'id_site'}, inplace=True) + + if table == '"PS_VERCORS_CEN38_2011"': + df = df[df.date != '?'].copy() + + # df.legende = format_legende(df.legende) + df = format_date(df) + + if table == '"PS_CHAMBARAN_CEN38_2013"': + df.type_patur = normalize_paturage(df.type_patur) + df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True) + df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \ + df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \ + df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur'] + df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \ + df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \ + df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur'] + df['pratiques'] = df.p1 + df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2 + df.drop(columns=['type_patur','p1','p2'], inplace=True) + + df['table_org'] = table[1:-1] + tutu = pd.concat([tutu,df]) + + if from_file is not None: + table = from_file.split('/')[-1]#.split('.')[0] + df = gpd.read_file(path0+from_file) + df = normalize_colname(df) + df = format_date(df) + df['table_org'] = table + df['structure'] = 'APIE' + df['type_pat'].replace(['Indéterminé','/'],None,inplace=True) + df.loc[df.type_pat.notna(),'pratiques'] = \ + df[df.type_pat.notna()].pratiques + ' ' + df[df.type_pat.notna()].type_pat.str.lower() + tutu = pd.concat([tutu,df]) + + + + c = ['ident','id_origine','id_site','geom','auteur','hab_1','code_hab1','n_hab1', + 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3', + 'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6', + 'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8', + 'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10', + 'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12', + 'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14', + 'hab_4','code_hab4','n_hab4','pratiques','niv_embrous', + 'recouvmnt','remarques','phyto','statut','h_remarq','legende', + 'leg_carto','date','annee','%_embrous','%_recouvmnt','surf_emb', + 'source','structure', + # 'org','dept','num', + 'table_org' + ] + if 'ident' in df.columns and 'id_origine' not in df.columns: + tutu.rename(columns={'ident':'id_origine'}, inplace=True) + cols = tutu.columns[tutu.columns.isin(c)] + df = tutu[cols].copy() + df = df.set_geometry('geom',crs=2154) + # df.code_hab3.replace({ + # '43.171':'41.171', + # '31.4B':'31.4' + # }, inplace=True) + + df = pycen.tools.Polygons_to_MultiPolygon(df) + df_new, df_replace, df_maj = define_siteName(df,rcvmt=10) + + df_new_tmp = df_new.copy() + df_new = df_new_tmp[df_new_tmp.id_site.notna()].copy() + + if not df_new.empty: + df_new = df_new[~df_new.id_site.isna()].copy() + df_new.sort_index(inplace=True) + df_new['auteur'] = normalize_auteur(df_new.auteur) + df_new['id_origine'] = df_new['id_origine'].astype(str) + df_new = filter_saisierror(df_new) + if not df_maj.empty: + df_maj['auteur'] = normalize_auteur(df_maj.auteur) + df_maj['id_origine'] = df_maj['id_origine'].astype(str) + df_maj = filter_saisierror(df_maj) + if not df_replace['df'].empty: + df_replace['df']['auteur'] = normalize_auteur(df_replace['df']['auteur']) + df_replace['df']['id_origine'] = df_replace['df']['id_origine'].astype(str) + df_replace['df'] = filter_saisierror(df_replace['df']) + + # for d in df.dtypes[df.dtypes == object].index: + # # df[d].replace([' '], [' '],regex=True,inplace=True) + # df[d] = df[d].str.normalize('NFKC') + + DF = df_new.copy() + Cnhab = DF.columns[DF.columns.str.startswith('n_hab')] + if not all(DF[[*Cnhab]].astype(float).sum(axis=1) == 100): + print( DF[ ~(DF[[*Cnhab]].astype(float).sum(axis=1) == 100) ] ) + # DF.loc[ (~(DF[[*Cnhab]].sum(axis=1) == 100)),'n_hab1' ] = 20 + + + if not df_new.empty: + new_site(df_new) + if not df_maj.empty: + maj_site(df_maj) + if not df_replace['df'].empty: + replace_site( + df = df_replace['df'], + cor = df_replace['cor'] + ) + + + exit() + ################ + ## END SCRIPT ## + ################ + + print(df) + while True: + res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n') + if not res[0].upper() in ['Y','N']: + continue + else: break + if res[0].upper() == 'Y': + df_ins, ins = insert_site(df) + if ins: + insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') + + df_ins, ins = insert_geom(df) + if ins: + insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur') + + df_ins, ins = insert_attrsPS(df) + if ins: + insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur') + + df_ins, ins = insert_habPS(df) + if ins: + if df_ins.shape[0] > 4000: + df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy() + df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy() + insert_cbPS(df_ins1) + insert_cbPS(df_ins2) + else: + insert_cbPS(df_ins) + insert_authordata( + df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat', + colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur') + + # df_ins, ins = insert_legendCartoPS(df)%_embrous + # if ins: + # insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur') + + +sql = """ +DELETE FROM sites.sites +WHERE id in (SELECT id_site FROM sites.r_sites_geom where id_lot = 20); +""" +with pycen.con.begin() as cnx: + cnx.execute(sql) + diff --git a/3_AZALEE/insert_zh.py b/3_AZALEE/insert_zh.py new file mode 100644 index 0000000..68c06d7 --- /dev/null +++ b/3_AZALEE/insert_zh.py @@ -0,0 +1,1031 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : insert_zh.py +#Description : Insertion de nouveaux sites et mise à jour des anciens. +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +from binascii import Error, Incomplete +import pandas as pd +import geopandas as gpd +from pyproj import crs +import pycen +con = pycen.con + +FILE_PATH = '/home/colas/Documents/9_PROJETS/1_ZH/MAJ/reinventairezhisre/' +GEOM_PATH = '20231011_Zone_Humide_Les_Chanines_Typhaie_Saint_Maurice_l_Exil.shp' +DATA_PATH = 'Tableau_saisie_ZH_Les_Chanines.xlsx' + + +def crsp_colSite(df): + ''' + Dictionnaire pour la table sites.sites + de correspondance des colonnes : + id, nom, remarques, date_deb. + ''' + df.columns = df.columns.str.lower() + return df.rename(columns={ + **dict.fromkeys(['site_cod','site_code','CODE_SITE'], 'id'), + 'site_name': 'nom', + 'zh_remarks': 'remarques', + 'editdate': 'date_deb', + }) + +def crsp_colGeomSite(df): + ''' + Dictionnaire pour la table sites.r_sites_geom + de correspondance des colonnes : + id_site, date, link_pdf, geom, rmq_fct_majeur, + rmq_interet_patri, rmq_bilan_menace, rmq_orient_act, + rmq_usage_process. + ''' + df.columns = df.columns.str.lower() + return df.rename(columns={ + 'site_code': 'id_site', + 'site_cod': 'id_site', + 'date_modif': 'date', + 'www': 'link_pdf', + 'geometry': 'geom', + 'fct_valeurs_majeurs': 'rmq_fct_majeur', + 'int_patr_majeur': 'rmq_interet_patri', + 'bilan_menaces_facteurs': 'rmq_bilan_menace', + 'orient_actions': 'rmq_orient_act', + 'usages_process': 'rmq_usage_process', + }) + +def crsp_colSiteHab(df): + ''' + Dictionnaire pour la table zones_humides.r_site_habitat + de correspondance des colonnes : + id_site, date, id_cb. + ''' + df.columns = df.columns.str.lower() + return df.rename(columns={ + 'site_code': 'id_site', + 'site_cod': 'id_site', + 'date_modif': 'date', + 'cb_cod': 'id_cb', + 'code_cb': 'id_cb', + }) + +def crsp_colSiteUsgPrss(df): + ''' + Dictionnaire pour la table zones_humides.r_site_usageprocess + de correspondance des colonnes : + id_site, date, remarques. + ''' + df.columns = df.columns.str.lower() + return df.rename(columns={ + 'site_code': 'id_site', + 'site_cod': 'id_site', + 'date_modif': 'date', + 'remarks': 'remarques', + }) + +def crsp_colSiteRegHydro(df): + ''' + Dictionnaire pour la table zones_humides.r_site_reghydro + de correspondance des colonnes : + id_site, date, in, out, perm, rmq_toponymie. + ''' + df.columns = df.columns.str.lower() + return df.rename(columns={ + 'site_code': 'id_site', + 'site_cod': 'id_site', + 'site_cod.1': 'id_site', + 'site_cod.2': 'id_site', + 'date_modif': 'date', + 'entree_eau': 'in', + 'sortie_eau': 'out', + 'perm_in': 'perm', + 'perm_out': 'perm', + 'topoinfl': 'rmq_toponymie', + 'topooutfl': 'rmq_toponymie', + }) + +def crsp_colSiteFct(df): + ''' + Dictionnaire pour la table zones_humides.r_site_fctecosociopatri + de correspondance des colonnes : + id_site, date, remarques. + ''' + df.columns = df.columns.str.lower() + return df.rename(columns={ + 'site_code': 'id_site', + 'site_cod': 'id_site', + 'date_modif': 'date', + 'memo': 'description', + }) + +def crsp_idUsgPrss(df,table_param,id_col,nom_col,description=False): + ''' + Réalise la correspondance entre les usages + ou les processus naturels et leur identifiants respectifs. + ''' + df = df.copy() + df[nom_col] = df[nom_col].str[0].str.upper() + df[nom_col].str[1:] + # df[nom_col] = df[nom_col].str.capitalize() + ids = pycen.zh()._get_param(param_table=table_param) + if description: + df[nom_col] = df[nom_col].replace([*ids.description], [*ids.nom]) + df[id_col] = df[nom_col].replace([*ids.nom], [*ids.id]) + # df[id_col] = df[nom_col].replace([*ids.nom.str.capitalize()], [*ids.id]) + result = any(elem in [*df[id_col].unique()] for elem in [*ids.id]) + if not result: + df[nom_col] = df[nom_col].str[0].str.upper() + df[nom_col].str[1:].str.lower() + df[id_col] = df[nom_col].replace([*ids.nom], [*ids.id]) + + return df + + +def get_id_auteur(lst_autor): + ''' + Récupération des identifiants auteurs en + fonction de la liste des auteurs fournis. + ''' + sch = 'personnes' + tab = 'personne' + sql = """ + WITH tmp as + (SELECT id, TRIM(CONCAT(UPPER(nom),' ',prenom)) as auteur FROM {sch}.{tab} + UNION ALL + SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur FROM {sch}.{tab}), + tmp2 as + (SELECT + p.id, TRIM(UPPER(p.nom)||' '||p.prenom||' '||CONCAT('(',o."abbrev",')')) as auteur + FROM {sch}.{tab} p join {sch}.organisme o on p.id_organisme=o.id), + tmp3 as + (SELECT + p.id, TRIM(UPPER(p.nom)||' '||p.prenom||' '||CONCAT('(',o.nom,')')) as auteur + FROM {sch}.{tab} p join {sch}.organisme o on p.id_organisme=o.id), + tmp4 as ( + SELECT * FROM tmp + UNION ALL + SELECT * FROM tmp2 + UNION ALL + SELECT * FROM tmp3 + ) + SELECT DISTINCT * FROM tmp4 + WHERE auteur {symb} %(lst_autor)s + ORDER BY 1;""".format( + sch=sch, + tab=tab, + symb='IN' if len(lst_autor) > 1 else '=') + df = pd.read_sql( + sql = sql, + con = con, + params = {'lst_autor': tuple(lst_autor) }) + return df + + +def forme_typsite(df): + ''' + Conversion des types de sites en identifiants + pour insertion en BDD. + ''' + df = df.copy() + typsite = pycen.sites._get_typ_site() + if 'type_site' in df.columns : + print(' A CODER !!!!') + df['id_type_site'] = df.type_milieux.replace([*typsite.nom], [*typsite.id]) + df.drop(columns=['type_site'], inplace=True) + else: + df['id_type_site'] = typsite.loc[typsite.nom == 'N.D.', 'id'].values[0] + return df + + +def forme_typsdage(df): + ''' + Conversion des types de sites en identifiants + pour insertion en BDD. + ''' + df = df.copy() + typsdage = pycen.sites._get_typo_sdage() + if 'typo_sdage' in df.columns : + df['id_typo_sdage'] = df.typo_sdage.replace([*typsdage.nom], [*typsdage.id]) + df.drop(columns=['typo_sdage'], inplace=True) + else: + df['id_type_milieu'] = typsdage.loc[typsdage.nom == 'Inconnu', 'id'].values[0] + return df + + +def forme_typmilieu(df): + ''' + Conversion des types de milieux en identifiants + pour insertion en BDD. + ''' + df = df.copy() + typmilieux = pycen.sites._get_typ_milieux() + if 'type_milieux' in df.columns : + df['id_type_milieu'] = df.type_milieux.replace([*typmilieux.nom], [*typmilieux.id]) + df.drop(columns=['type_milieux'], inplace=True) + else: + df['id_type_milieux'] = typmilieux.loc[typmilieux.nom == 'N.D.', 'id'].values[0] + return df + + +def drop_maxDateGeomData(shP): + ''' + Récupération des attributs antérieurs + à la géométries la plus actuelle. + Fonction créée dans le cas où la géométrie + la plus actuelle ne possède pas encore d'attributs. + ''' + df = shP.copy() + keys = [*df.keys()] + for k in keys: + if k != 'infos' and isinstance(df[k], pd.DataFrame): + # date_col = [col for col in df[k].columns if 'date' in col] + if 'date_geom' in df[k].columns: + date_col = 'date_geom' + elif 'date' in df[k].columns: + date_col = 'date' + df[k] = df[k].sort_values(date_col) + m = df[k][date_col].max() + index = df[k][df[k][date_col]==m].index + df[k] = df[k].drop(labels = index) + elif k != 'infos' and isinstance(df[k], dict): + kkeys = [*df[k].keys()] + if 'title' in kkeys: + kkeys.remove('title') + for kk in kkeys: + date_col = 'date_geom' + if len(df[k][kk][date_col].unique()) > 1: + df[k][kk] = df[k][kk].sort_values(date_col) + m = df[k][kk][date_col].max() + index = df[k][kk][df[k][kk][date_col]==m].index + df[k][kk] = df[k][kk].drop(labels = index) + + return df + + +def insert_autorSiteData(df): + ''' + Insertion des auteurs des sites. + ''' + df = df.copy() + df = df[['id', 'auteur']].copy() + if df.auteur.str.contains(',|&').any(): + df = (df + .set_index('id',drop=True) + .auteur.str.split(',|&') + .explode() + .str.strip() + .reset_index(drop=False)) + df_autor = get_id_auteur([*df.auteur.unique()]) + df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) + df.columns = ['id_site','id_auteur'] + df.to_sql( + name='r_sites_auteur', + con=con, + schema='sites', + index=False, + if_exists='append', + method='multi' + ) + return 'Insert author sites data OK !' + + +def insert_autorGeomData(df): + ''' + Insertion des auteurs des geometries. + ''' + df = df.copy() + df = df[['id', 'auteur']].copy() + if df.auteur.str.contains(',|&').any(): + df = (df + .set_index('id',drop=True) + .auteur.str.split(',|&') + .explode() + .str.strip() + .reset_index(drop=False)) + df_autor = get_id_auteur([*df.auteur.unique()]) + df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) + df.columns = ['id_geom_site','id_auteur'] + try : + df.to_sql( + name='r_geomsites_auteur', + con=con, + schema='sites', + index=False, + if_exists='append', + method='multi' + ) + except Exception as e: + print('ERROR : News author sites insert impossible !') + print(e) + return 'Insert author sites data OK !' + + +def name_site(df): + ''' + Définition du code_site du site. + ''' + df['id'] = df.index + srd = (pycen.ref.territoire.sdr_ressources_en_eau() + .drop(columns=['superficie','perimetre'])) + dep = pycen.ref.territoire.dept_isere() + if 'dept' not in df.columns: + isindep = df.intersects(dep.unary_union) + df.loc[isindep,'dept'] = '38' + if df.dept.isna().any(): + notindept = df[df.dept.isna()].index.tolist() + raise('ERROR : Certaines zones humides ne sont pas dans le département :\n %s'%str(notindept)) + + df['surf'] = df.area + tmp = gpd.overlay(srd,df,how='intersection',make_valid=True,keep_geom_type=False) + tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf'] + tmp.sort_values(['id','perc_rcvmt'],inplace=True) + test = tmp.id.duplicated(keep='first') + tmp = tmp[test] + df = (df.merge(tmp[['id','code_tr']],how='left',on='id') + .rename(columns={'code_tr':'org'})) + + for s in srd.iterrows(): + org = s[1].code_tr + num = con.execute("SELECT max(num::int) FROM sites.sites WHERE org = '%s'"%org).one()[0] + test = df.org == org + dim = df[test].shape[0] + df.loc[test, 'num'] = list(range(num,num+dim)) + + df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] + df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] + df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] + + df['id'] = df.dept + df.org + df.num + return df + + +def insertNewSite(df): + ''' + Insertion des nouveaux sites. + + columns : + --------- + 'site_cod', 'dept', 'org', 'num', 'site_name', 'editdate', + 'auteur', 'organisat', 'maj', 'type_milieux' + ''' + df = df.copy() + + columns = ['id', 'nom', 'date_deb', 'old_site', 'dept', 'org', + 'num', 'remarques', 'auteur', 'type_site', 'typo_sdage', 'type_milieux'] + df = crsp_colSite(df) + if 'id' not in df.columns: + df = name_site(df) + c = df.columns[df.columns.isin(columns)] + df['date_deb'] = pd.to_datetime(df['date_deb']) + df = df[c] + df = forme_typsite(df) + df = forme_typmilieu(df) + df = forme_typsdage(df) + c = df.columns + try: + df[c.drop('auteur')].to_sql( + name='sites', + con=con, + schema='sites', + index=False, + if_exists='append', + method='multi' + ) + try : + insert_autorSiteData(df) + except Exception as e: + print('ERROR : News author sites insert impossible !') + print(e) + except Exception as e : + print('ERROR : News sites insert impossible !') + print(e) + + +def insertNewGeom(gdf, rmq_geom=pd.DataFrame()): + ''' + Insertion des nouvelles géométries des sites + et les éventuelles remarques associées. + + paramètres : + --------- + gdf : GeoDataFrame. Tableau des géométries + identifiées par leur code site respectifs. + rmq_geom : DataFrame. Tableau des remarques + globales du sites pour ces géométries: + 'fct_valeurs_majeurs', 'int_patr_majeur', + 'bilan_menaces_facteurs', 'orient_actions' + ''' + df = gdf.copy() + columns = [ x['name'] for x in pycen.sites.columns_geomtab ] + ['auteur'] + df = crsp_colGeomSite(df) + + if df.crs.srs.split(':')[-1] != '2154': + df.to_crs(2154,inplace=True) + + if not rmq_geom.empty : + rmq_geom.columns = rmq_geom.columns.str.lower() + rmq_geom = crsp_colGeomSite(rmq_geom) + if 'date' in df.columns and 'date' in rmq_geom.columns: + del rmq_geom['date'] + if 'auteur' in df.columns and 'auteur' in rmq_geom.columns: + del rmq_geom['auteur'] + df = df.merge(rmq_geom, on='id_site', how='left') + + c = df.columns[df.columns.isin(columns)] + df = df[c] + df = pycen.tools.Polygons_to_MultiPolygon(df) + try: + df[c.drop('auteur')].to_postgis( + name='r_sites_geom', + con=con, + schema='sites', + #geom_col='geom', + index=False, + if_exists='append', + ) + except Exception as e : + print('ERROR : News sites insert impossible !') + print(e) + + sql = 'SELECT id, id_site, date FROM sites.r_sites_geom WHERE id_site IN %(id_site)s AND date IN %(date)s ;' + tmp = pd.read_sql(sql=sql, con=con, params= { + 'id_site': tuple(df.id_site), + 'date': tuple(df.date) + }) + tmp['date'] = tmp['date'].astype(str) + df['date'] = df['date'].astype(str) + df = df.merge(tmp, on=['id_site', 'date'], how='left') + try : + insert_autorGeomData(df) + except Exception as e: + print('ERROR : News author sites insert impossible !') + print(e) + + +def get_idGeomSite(lst_site): + ''' + Récupération des identifiants des géometries + des sites les plus récentes. + ''' + id_geom = pycen.zh().get_sitesGeom( + id_site=[*lst_site], last_update=True) + id_geom = id_geom[['id','id_site']] + id_geom.columns = ['id_geom_site','id_site'] + return id_geom + + +def select_ID(df,sch,tab): + ''' + Récupération des identifiants des nouvelles + lignes insérées. + + paramètres : + --------- + df : pd.DataFrame, Tableau des données + attributaires insérées. + sch : str. Nom du schéma où sont stockés les attributs. + tab : str. Nom de la table. + ''' + sql = 'SELECT * FROM %s.%s'%(sch,tab) + df = df.copy().dropna(axis=1) + if 'date' in df.columns: + df['date'] = df['date'].astype(str) + if 'id_cb' in df.columns: + df['id_cb'] = df['id_cb'].astype(str) + column = '(%s)'%','.join(df.columns) + df_tuple = tuple(df.itertuples(index=False, name=None)) + sql = sql + ' WHERE %s IN %s'%(column,df_tuple) + if 'nan' in sql: sql = sql.replace('nan','NULL') + if '),)' in sql: sql = sql.replace('),)','))') + if '"' in sql: sql = sql.replace('"',"'") + + # print(sql) + return pd.read_sql( + sql=sql, + con=con,) + + +def insertAttrs(sh2, sch, tab): + ''' + Insertion des nouveaux attributs des sites. + Ne Concerne pas les auteurs des attributs + (cf. insertAutorAttrs). + + paramètres : + --------- + df : pd.DataFrame, Tableau des données + attributaires insérées. + sch : str. Nom du schéma où sont stockés les attributs. + tab : str. Nom de la table. + ''' + df = sh2.copy() + df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') + df.dropna(subset=['id_geom_site'],inplace=True) + df['id_geom_site'] = df['id_geom_site'].astype(int) + tab_cols = pycen.con.dialect.get_columns(con,schema=sch,table_name=tab) + columns = [ x['name'] for x in tab_cols ] + ['auteur'] + c = df.columns[df.columns.isin(columns)] + df['date'] = pd.to_datetime(df['date']) + df = df[c] + if tab == 'r_site_sub': + df.dropna(subset=['id_freqsub'],inplace=True) + try: + df[c.drop('auteur')].to_sql( + name=tab, + con=con, + schema=sch, + index=False, + if_exists='append', + method='multi' + ) + print('Insert attrs sites data OK !') + return [df,True] + except Exception as e : + print('ERROR : News sites insert impossible !') + print(e) + return [df,False] + + +def insertAutorAttrs(df,colname_rsite, sch, tab): + ''' + Insertion des auteurs des attributs. + + paramètres : + --------- + df : pd.DataFrame, Tableau des données + attributaires insérées. + colname_rsite : str. Nom du champs + relationnelle de la table auteur, + se rapportant à la table attributaire. + sch : str. Nom du schéma où se situe la table auteur. + tab : str. Nom de la table auteur. + ''' + df = df.copy() + df = df[['id', 'auteur']].copy() + df.drop_duplicates(inplace=True) + if df.auteur.str.contains(',|&').any(): + df = (df + .set_index('id',drop=True) + .auteur.str.split(',|&') + .explode() + .str.strip() + .reset_index(drop=False)) + # tmp = df.auteur.str.split(' & ', expand =True).stack().droplevel(-1) + # df = pd.concat([df,tmp],axis=1) + # del df['auteur'] + # df.columns = ['id', 'auteur'] + df_autor = get_id_auteur([*df.auteur.unique()]) + df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) + df.columns = [colname_rsite, 'id_auteur'] + try : + df.to_sql( + name=tab, + con=con, + schema=sch, + index=False, + if_exists='append', + method='multi' + ) + print('Insert author sites data OK !') + return 'Insert author sites data OK !' + except Exception as e: + print('ERROR : News author sites insert impossible !') + print(e) + + +def insertAttrsCB(sh3): + ''' + Insertion des nouvelles version des attributs + habitats des sites zones humides. + Affectation des attributs à la géométrie la plus récente. + + columns : + --------- + 'site_cod', 'cb_cod', 'desc_cbiotope', 'auteur', 'date' + ''' + tab = 'r_site_habitat' + sch = pycen.zh().schema + df = sh3.copy() + df = crsp_colSiteHab(df) + + if df.id_cb.dtype == float: + df.id_cb = df.id_cb.astype(str) + df.id_cb = df.id_cb.replace('.0','',regex=True) + df,ins = insertAttrs(df,sch, tab) + + if ins: + ids = select_ID(df[df.columns.drop('auteur')],sch,tab) + same_col = df.columns[df.columns.isin(ids.columns)] + if 'date' in same_col: + df['date'] = df['date'].astype(str) + ids['date'] = ids['date'].astype(str) + df = df.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df,colname_rsite='id_sitehab', sch=sch, tab='r_rsitehab_auteur') + + +def insertAttrsDelim(sh3): + ''' + Insertion des nouvelles version des attributs + de délimitation et de definition de l'espace de + fonctionnalité des sites zones humides. + Affectation des attributs à la géométrie la plus récente. + + columns : + --------- + 'site_cod', 'crit_def_esp', 'crit_delim', 'auteur', 'date' + ''' + tab = 'r_site_critdelim' + sch = pycen.zh().schema + df = sh3.copy() + inds = df.columns[~df.columns.isin(['crit_def_esp','crit_delim'])] + df.set_index([*inds],inplace=True) + df = df.stack().reset_index(-1) + df.columns = ['type', 'nom'] + df.reset_index(inplace=True) + df.drop_duplicates(inplace=True) + + param = pycen.zh()._get_param(param_table='param_delim_fct', type_table='type_param_delim_fct') + for typ in df.type.unique(): + param_tmp = param.loc[param.type==typ] + df.loc[df.type == typ,'id_crit_delim'] = df.loc[df.type == typ,'nom'].replace([*param_tmp.nom],[*param_tmp.id]) + df['id_crit_delim'] = df['id_crit_delim'].astype(int) + df = crsp_colSiteHab(df) + df,ins = insertAttrs(df,sch, tab) + + if ins: + ids = select_ID(df[df.columns.drop('auteur')],sch,tab) + same_col = df.columns[df.columns.isin(ids.columns)] + if 'date' in same_col: + df['date'] = df['date'].astype(str) + ids['date'] = ids['date'].astype(str) + df = df.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df,colname_rsite='id_sitedelim', sch=sch, tab='r_rsitedelim_auteur') + + +def insertAttrsUsgPrss(sh4): + ''' + Insertion des nouvelles version des attributs + usages et processus naturels des sites zones humides. + Affectation des attributs à la géométrie la plus récente. + + columns : + --------- + 'site_cod', 'remarks', 'activ_hum_autre', 'temp', 'affich_act', + 'localisation', 'activite_hum', 'typo_sdage', 'impact', + 'auteur','date' + ''' + tab = 'r_site_usageprocess' + sch = pycen.zh().schema + df = sh4.copy() + df = crsp_idUsgPrss(df,table_param='param_activ_hum',id_col='id_activ_hum', nom_col='activite_hum') + df = crsp_idUsgPrss(df,table_param='param_impact',id_col='id_impact', nom_col='impact') + if len(df.localisation.max()) > 10: descript = True + else : descript = False + df = crsp_idUsgPrss( + df,table_param='param_position',id_col='id_position', + nom_col='localisation',description=descript) + df = crsp_colSiteUsgPrss(df) + df,ins = insertAttrs(df,sch, tab) + + if ins: + ids = select_ID(df[df.columns.drop('auteur')],sch,tab) + same_col = df.columns[df.columns.isin(ids.columns)] + if 'date' in same_col: + df['date'] = df['date'].astype(str) + ids['date'] = ids['date'].astype(str) + for col in same_col: + if df[col].dtype != ids[col].dtype: + print(col) + if df[col].dtype == float: + ids[col] = ids[col].astype(float) + elif df[col].dtype == object: + ids[col] = ids[col].astype(object) + df = df.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df,colname_rsite='id_siteusage', sch=sch, tab='r_rsiteusage_auteur') + + +def insertAttrsRegHydro(sh5): + ''' + Insertion des nouvelles version des attributs + régimes hydriques des sites zones humides. + Affectation des attributs à la géométrie la plus récente. + + columns : + --------- + 'site_cod', 'topoinfl', 'entree_eau', 'perm_in', 'unnamed: 4', + 'unnamed: 5', 'unnamed: 6', 'site_cod.1', 'topooutfl', 'sortie_eau', + 'perm_out', 'unnamed: 11', 'unnamed: 12', 'unnamed: 13', 'unnamed: 14', + 'site_cod.2', 'sub_orig', 'sub_freq', 'sub_etend', 'connexion', + 'auteur', 'date' + ''' + dic = { + 'Canaux / fossés':'Canaux/fossés', + 'Nappe':'Nappes', + } + sch = pycen.zh().schema + tab_regHyd = 'r_site_reghydro' + tab_con = 'r_site_type_connect' + tab_sub = 'r_site_sub' + p_inout_perm = pycen.zh()._get_param(param_table='param_permanence') + p_inout_rHyd = pycen.zh()._get_param(param_table='param_reg_hydro') + p_con_sub = pycen.zh()._get_param(param_table='param_sub', type_table='type_param_sub',type_court=False) + p_con_cnx = pycen.zh()._get_param(param_table='param_type_connect') + + df = sh5.copy() + df.set_index(['auteur', 'date'],inplace=True) + dfin = df.iloc[:,:4].copy() + dfout = df.iloc[:,7:11].copy() + dfcon = df.iloc[:,15:].copy() + dfin = crsp_colSiteRegHydro(dfin).set_index('id_site',append=True) + dfout = crsp_colSiteRegHydro(dfout).set_index('id_site',append=True) + dfcon = crsp_colSiteRegHydro(dfcon).set_index('id_site',append=True) + dfin = dfin.dropna(how='all').reset_index(drop=False) + dfout = dfout.dropna(how='all').reset_index(drop=False) + dfcon = dfcon.dropna(how='all').reset_index(drop=False) + + col_inout = ['in','out'] + dfinout = pd.concat([dfin,dfout]) + dfinout.set_index([*dfinout.columns[~dfinout.columns.isin(col_inout)]], inplace=True) + dfinout = dfinout.stack().reset_index(-1, drop=False) + dfinout.columns = ['in_out','reg_hydro'] + dfinout.reset_index(inplace=True, drop=False) + dfinout['reg_hydro'] = dfinout['reg_hydro'].str[0].str.upper() + dfinout['reg_hydro'].str[1:] + dfinout['reg_hydro'].replace(dic, inplace=True) + dfinout.in_out.replace(['in','out'],[True,False],inplace=True) + dfinout['id_reg_hydro'] = dfinout['reg_hydro'].str.lower() \ + .replace([*p_inout_rHyd.nom.str.lower()],[*p_inout_rHyd.id]) + dfinout['id_permanance'] = dfinout['perm'].str.lower() \ + .replace([*p_inout_perm.nom.str.lower()],[*p_inout_perm.id.astype(str)]) + + if dfcon.columns.str.contains('sub').any(): + dfcon['sub_freq'].fillna('Inconnu',inplace=True) + dfcon['sub_etend'].fillna('Inconnu',inplace=True) + dfcon['id_freqsub'] = dfcon['sub_freq'].str.lower() \ + .replace([*p_con_sub.nom.str.lower()],[*p_con_sub.id.astype(str)]) + dfcon['id_etendsub'] = dfcon['sub_etend'].str.lower() \ + .replace([*p_con_sub.nom.str.lower()],[*p_con_sub.id.astype(str)]) + dfcon['id_origsub'] = dfcon['sub_orig'] + else : + dfcon['id_freqsub'] = p_con_sub[ + (p_con_sub.type == 'Submersion fréquente') & + (p_con_sub.nom == 'Inconnu')].id.values[0] + dfcon['id_param_connect'] = dfcon['connexion'].str.lower() \ + .replace([*p_con_cnx.nom.str.lower()],[*p_con_cnx.id.astype(str)]) + + dfinout,ins = insertAttrs(dfinout,sch, tab_regHyd) + if ins: + ids = select_ID(dfinout[dfinout.columns.drop('auteur')],sch,tab_regHyd) + same_col = dfinout.columns[dfinout.columns.isin(ids.columns)] + if 'date' in same_col: + dfinout['date'] = dfinout['date'].astype(str) + ids['date'] = ids['date'].astype(str) + for c in same_col: + if dfinout[c].dtype != ids[c].dtype: + dfinout[c] = dfinout[c].astype(ids[c].dtype) + dfinout = dfinout.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(dfinout,colname_rsite='id_sitehydro', sch=sch, tab='r_rsitehydro_auteur') + else: return + + dfconn = dfcon.copy() + dfcon,ins = insertAttrs(dfcon,sch, tab_sub) + if ins: + ids = select_ID(dfcon[dfcon.columns.drop('auteur')],sch,tab_sub) + ids.loc[~ids.id_etendsub.isna(),'id_etendsub'] = ids.loc[~ids.id_etendsub.isna(),'id_etendsub']\ + .astype(int).astype(str) + if ids.id_freqsub.dtype==int: + ids.id_freqsub = ids.id_freqsub.astype(str) + same_col = dfcon.columns[dfcon.columns.isin(ids.columns)] + if 'date' in same_col: + dfcon['date'] = dfcon['date'].astype(str) + ids['date'] = ids['date'].astype(str) + for col in same_col: + if dfcon[col].dtype != ids[col].dtype: + print(col) + if dfcon[col].dtype == float: + ids[col] = ids[col].astype(float) + elif dfcon[col].dtype == int: + ids[col] = ids[col].astype(int) + elif dfcon[col].dtype == object: + ids[col] = ids[col].astype(object) + dfcon = dfcon.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(dfcon,colname_rsite='id_sitesub', sch=sch, tab='r_rsitesub_auteur') + else: return + + dfconn,ins = insertAttrs(dfconn,sch, tab_con) + if ins: + ids = select_ID(dfconn[dfconn.columns.drop('auteur')],sch,tab_con) + if ids.id_param_connect.dtype==int: + ids.id_param_connect = ids.id_param_connect.astype(str) + same_col = dfconn.columns[dfconn.columns.isin(ids.columns)] + if 'date' in same_col: + dfconn['date'] = dfconn['date'].astype(str) + ids['date'] = ids['date'].astype(str) + dfconn = dfconn.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(dfconn,colname_rsite='id_siteconnect', sch=sch, tab='r_rsiteconnect_auteur') + else: return + + +def insertAttrsFct(sh6,nom_typ_court=False): + ''' + Insertion des nouvelles version des attributs + fonctions biologiques, écologiques, sociologiques, + patrimoniale des sites zones humides. + Affectation des attributs à la géométrie la plus récente. + + columns : + --------- + 'site_cod', 'fonction', 'typ_fonc', 'memo', 'auteur', 'date' + ''' + tab = 'r_site_fctecosociopatri' + sch = pycen.zh().schema + df = sh6.copy() + df = crsp_colSiteFct(df) + param = pycen.zh()._get_param( + param_table='param_fct_eco_socio_patri', + type_table='type_param_fct',type_court=nom_typ_court) + + for typ in df.typ_fonc.unique(): + param_tmp = param.loc[param.type==typ] + df.loc[df.typ_fonc == typ,'id_fct'] = df.loc[df.typ_fonc == typ,'fonction'] \ + .str.lower() \ + .replace([*param_tmp.nom.str.lower()],[*param_tmp.id]) + df['id_fct'] = df['id_fct'].astype(int) + df,ins = insertAttrs(df,sch, tab) + + if ins: + ids = select_ID(df[df.columns.drop('auteur')],sch,tab) + if 'description' in df.columns: + df.description = df.description.astype(str) + ids.description = ids.description.astype(str) + if df.description.str.contains("'").any(): + df.description = df.description.replace("'","''",regex=True) + if df.shape[0] != ids.shape[0]: + ids = select_ID(df[df.columns.drop(['auteur','description'])],sch,tab) + if 'description' in df.columns: + if df.description.str.contains("''").any(): + df.description = df.description.replace("''","'",regex=True) + same_col = (df.columns[df.columns.isin(ids.columns)]) + if 'date' in same_col: + df['date'] = df['date'].astype(str) + ids['date'] = ids['date'].astype(str) + df = df.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df,colname_rsite='id_sitefct', sch=sch, tab='r_rsitefct_auteur') + + +if __name__ == "__main__": + + gdf = gpd.read_file(FILE_PATH+GEOM_PATH, crs='EPSG:2154') + # lst = ['38BB0089','38BB0090','38BB0091','38BB0092'] + # gdf = gdf[gdf.site_code.isin(lst)] + DF = pd.read_excel(FILE_PATH+DATA_PATH, sheet_name=None, header=1) + lst_sheet = [*DF.keys()] + for k in lst_sheet: + if isinstance(DF[k], pd.DataFrame): + DF[k].columns = DF[k].columns.str.lower() + elif isinstance(DF[k], dict): + lst_tmp = [*DF[k].keys()] + for kk in lst_tmp: + if isinstance(DF[k][kk], pd.DataFrame): + DF[k][kk].columns = DF[k][kk].columns.str.lower() + + sh1 = DF[lst_sheet[0]] + sh1 = sh1[sh1.maj != 'SUPP'].copy() + sh1.editdate = pd.to_datetime(sh1.editdate) + + sh1.auteur.replace( + ['Nicolas BIRON','Alix GUEDOU'], + ['BIRON Nicolas','GUEDOU Alix'], + inplace=True) + + lst_site = list(sh1.site_cod.unique()) + gdf = gdf[gdf.site_code.isin(lst_site)].reset_index(drop=True) + gdf.rename_geometry('geom', inplace=True) + lst_site = list(gdf.site_code.unique()) + sh1 = sh1[sh1.site_cod.isin(lst_site)] + sh1.name = lst_sheet[0] + gdf['auteur'] = sh1.auteur.values[0] + + + data_date = sh1[['site_cod','upddate']].copy() + data_date.columns = ['site_cod', 'date'] + data_date.loc[data_date.date.isnull(),'date'] = sh1.loc[ + sh1.site_cod.isin( + data_date.loc[data_date.date.isnull(),'site_cod']), + 'editdate'] + data_date['date'] = pd.to_datetime(data_date['date']) + if 'date' not in gdf.columns: + gdf = gdf.merge(data_date, left_on=['site_code'], right_on=['site_cod'], how='left') + del gdf['site_cod'] + gdf['date_modif'] = gdf['date'] + del gdf['date'] + + sh2 = DF[lst_sheet[1]] + sh2['auteur'] = sh1.auteur.values[0] + sh2 = sh2.merge(data_date, on=['site_cod'], how='left') + sh2 = sh2[sh2.site_cod.isin(lst_site)] + sh2.name = lst_sheet[1] + sh3 = DF[lst_sheet[2]] + sh3['auteur'] = sh1.auteur.values[0] + sh3 = sh3[sh3.site_cod.isin(lst_site)] + sh3.name = lst_sheet[2] + sh3 = sh3.merge(data_date, on=['site_cod'], how='left') + sh4 = DF[lst_sheet[3]] + sh4['auteur'] = sh1.auteur.values[0] + sh4 = sh4.merge(data_date, on=['site_cod'], how='left') + sh4 = sh4[sh4.site_cod.isin(lst_site)] + sh4.name = lst_sheet[3] + sh5 = DF[lst_sheet[4]] + sh5['auteur'] = sh1.auteur.values[0] + if sh5.site_cod.isna().any(): + sh5.loc[sh5.site_cod.isna(),'site_cod'] = sh5.loc[sh5.site_cod.isna(),'site_cod.1'] + sh5 = sh5.merge(data_date, on=['site_cod'], how='left') + sh5 = sh5[sh5.site_cod.isin(lst_site)] + sh5.name = lst_sheet[4] + sh6 = DF[lst_sheet[5]] + sh6['auteur'] = sh1.auteur.values[0] + sh6 = sh6.merge(data_date, on=['site_cod'], how='left') + sh6 = sh6[sh6.site_cod.isin(lst_site)] + sh6.name = lst_sheet[5] + sh7 = DF[lst_sheet[6]] + sh7['auteur'] = sh1.auteur.values[0] + sh7 = sh7.merge(data_date, on=['site_cod'], how='left') + sh7 = sh7[sh7.site_cod.isin(lst_site)] + sh7.name = lst_sheet[6] + sh8 = DF[lst_sheet[7]] + sh8['auteur'] = sh1.auteur.values[0] + sh8 = sh8.merge(data_date, on=['site_cod'], how='left') + sh8 = sh8[sh8.site_cod.isin(lst_site)] + sh8.name = lst_sheet[7] + + + + add = sh1[sh1.maj=='ADD'].copy() + add = add.dropna(axis=1, how='any') + add['type_milieux'] = 'Tourbières et marais' + add = pd.merge( + add, + sh4[['site_cod','typo_sdage']].dropna(how='any').drop_duplicates(), + how='left',on='site_cod') + # add = forme_typsdage(add) + # maj = gdf[~gdf.site_code.isin(add.site_code)].copy() + + # tmp = pycen.get_sitesGeom(id_site=['38BB0052','38BB0050']) + # gdf.loc[gdf.site_code=='38BB0050','geom'] = gdf[gdf.site_code=='38BB0050'].geom + + + insertNewSite(add) + insertNewGeom(gdf, rmq_geom=sh7) + + + insertAttrsDelim(sh3) + insertAttrsCB(sh2) + insertAttrsUsgPrss(sh4) + insertAttrsRegHydro(sh5) + insertAttrsFct(sh6) + + + + maj = sh1[sh1.maj=='MAJ GEOM'].copy() + lst_maj = [*maj.site_cod] + # site = lst_maj[3] + for site in lst_maj: + print(site) + shP = pycen.zh().get_bilan(code_site=site,last_update=False) + shP = drop_maxDateGeomData(shP) + sh2b = shP['corine_biotope'].copy() + sh2b[['date','auteur']] = [sh1.loc[sh1.site_cod==site,'upddate'].values[0],sh1.auteur.values[0]] + del sh2b['id'] + sh3b = shP['delimitation'].copy() + sh3b[['date','auteur']] = [sh1.loc[sh1.site_cod==site,'upddate'].values[0],sh1.auteur.values[0]] + sh3b = sh3b.set_index([ *sh3b.columns[~sh3b.columns.isin(['nom_crit'])] ])\ + .unstack(level=-4).droplevel(0,axis=1).reset_index(drop=False) + del sh3b['id'] + sh4b = shP['description'].copy() + sh4b[['date','auteur']] = [sh1.loc[sh1.site_cod==site,'upddate'].values[0],sh1.auteur.values[0]] + sh4b.rename(columns={'position':'localisation'},inplace=True) + del sh4b['id'] + sh5b = shP['fonctionnement'].copy() + sh5ba = sh5b['entree_eau'].rename(columns={'id_site':'site_cod'}).drop(columns=['id']).reset_index(drop=True) + sh5bb = sh5b['sortie_eau'].rename(columns={'id_site':'site_cod'}).drop(columns=['id']).reset_index(drop=True) + sh5bc = sh5b['sub_connex'].rename(columns={'id_site':'site_cod'}).drop(columns=['id']).reset_index(drop=True) + sh5ba = sh5ba[['site_cod','rmq_toponymie','regime_hydri','permanence']].rename(columns={'regime_hydri':'entree_eau','permanence':'perm_in'}) + sh5bb = sh5bb[['site_cod','rmq_toponymie','regime_hydri','permanence']].rename(columns={'regime_hydri':'sortie_eau','permanence':'perm_out'}) + sh5bc = sh5bc[['site_cod','connexion']] + sh5ba[['unnamed: 4','unnamed: 5','unnamed: 6']] = None + sh5bb[['unnamed: 11','unnamed: 12','unnamed: 13','unnamed: 14']] = None + sh5B = pd.concat([sh5ba,sh5bb,sh5bc],axis=1) + sh5B['date'] = sh1.loc[sh1.site_cod==site,'upddate'].values[0] + sh5B['auteur'] = sh1.auteur.values[0] + sh6b = shP['fonction'].copy() + sh6b = sh6b.rename(columns={'type':'typ_fonc','nom_fct':'fonction'}) + sh6b[['date','auteur']] = [sh1.loc[sh1.site_cod==site,'upddate'].values[0],sh1.auteur.values[0]] + del sh6b['id'] + sh7b = shP['evaluation'].copy() + sh7b[['date','auteur']] = [sh1.loc[sh1.site_cod==site,'upddate'].values[0],sh1.auteur.values[0]] + + + insertAttrsCB(sh2b) + insertAttrsDelim(sh3b) + insertAttrsUsgPrss(sh4b) + insertAttrsRegHydro(sh5B) + insertAttrsFct(sh6b,nom_typ_court=True) + +# st_clusterdbscan \ No newline at end of file diff --git a/3_AZALEE/recup_CBNA_habPS.py b/3_AZALEE/recup_CBNA_habPS.py new file mode 100644 index 0000000..9c721df --- /dev/null +++ b/3_AZALEE/recup_CBNA_habPS.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +from statistics import geometric_mean +import geopandas as gpd +import pandas as pd +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +# from shapely.geometry import Polygon +import pycen +from os import listdir, chdir +from pathlib import Path +from zipfile import ZipFile +from rasterstats import zonal_stats + + +# Path MNT +path0 = '/home/colas/Documents/9_PROJETS/2_PS/' +path0_mnt = '/home/colas/Documents/9_PROJETS/3_PGZH/' +path = path0_mnt + 'SIG/' +p_mltifct = 'multi_fonctions/IGN - BD Alti 25M/' +path2_mnt = path0+'MNT/' +file_mnt25 = 'MNT_25m.tif' +file_mnt5 = 'MNT_5m.tif' + +# Parametres bdd IN +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '192.168.0.189' +base = 'bd-cen-38' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) +# con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False) + +def keepgeom_indept(df): + dnat = pycen.ref.get_districtNat().geom.unary_union + df['in_isere'] = df.intersects(dnat) + df = df[df.in_isere].copy() + df.dropna(axis=1, how='all', inplace=True) + del df['in_isere'] + return df +# df = gpd.read_postgis( +# sql = 'SELECT * FROM pelouse_seche."cr_PS_CBNA_habitats_aggreg_06_2020"', +# con = con +# ) + +lst_tab = ['"cr_VERCORS_habitats_CBNA_1999-2007"','"cr_TRIEVES+VERCORS_habitats_CBNA_2014"','"cr_ECRIN_habitats_CBNA_2014"','"cr_CHARTREUSE_habitats_CBNA_2000-2012"',] +table = lst_tab[3] +df = gpd.read_postgis( + sql = 'SELECT * FROM habitat.%s'%table, + con = con +) + +df.set_index('idfinal', inplace=True) +# df.set_index('data_id', inplace=True) +colnhab = df.columns[df.columns.str.startswith('n_hab')] +nhab = df[colnhab].copy() +nhab['SUMM'] = nhab.sum(axis=1) +nhab['SUMM'].unique() + +lst_hab = ['34.1','34.3','34.4','34.7','35.2','62.3','64.1'] +df2 = pd.DataFrame() +for h in lst_hab: + tmp = df[ + (df.code_hab1.str.contains(h,na=False))| + (df.code_hab2.str.contains(h,na=False)&(df.n_hab2 >= 20))| + (df.code_hab3.str.contains(h,na=False)&(df.n_hab3 >= 20))| + (df.code_hab4.str.contains(h,na=False)&(df.n_hab4 >= 20)) + ] + df2 = pd.concat([df2,tmp]) + +df3 = df2.copy() + +# Before 1385 / After 1422 +lst_hab2 = ['41','42','43','44','81','82','83'] + +for h2 in lst_hab2: + tmp2 = df2[ + (df2.code_hab1.str.contains('&'+h2,na=False)&(df2.n_hab1 >= 60)) + ] + df2.drop(labels=tmp2.index,inplace=True) +for h2 in lst_hab2: + tmp2 = df2[ + (df2.code_hab1.str.contains(h2+'.',na=False)&(df2.n_hab1 >= 60)) + ] + df2.drop(labels=tmp2.index,inplace=True) + + +for h2 in lst_hab2: + tmp3 = df3[ + (df3.code_hab1.str.contains(h2,na=False)&(df3.n_hab1 >= 60)) + ] + df3.drop(labels=tmp3.index,inplace=True) + + +df2 = keepgeom_indept(df2) +df3 = keepgeom_indept(df3) + + +df = df2.copy() +df.reset_index(inplace=True,drop=False) +# ps = pycen.ps.get_sitesGeom() +sql = 'SELECT * FROM ps.v_pelouseseches_all' +PS = gpd.read_postgis(sql,pycen.con) +ps = PS[PS.source==table.replace('"','')].copy() +# 1385 + + + +not_in_azalee = df2[~df2.index.isin(df3.index)].copy() +in_azalee = df2[df2.index.isin(df3.index)].copy() + +not_in_azalee.to_file(path0+'CBNA/MISSING_DATA_INDB/chartreuse.gpkg',layer='not_in_azalee') +in_azalee.to_file(path0+'CBNA/MISSING_DATA_INDB/chartreuse.gpkg',layer='in_azalee') + +ps.columns + +# Drop poly who intersect poly non-CBNA +# ps_autre = ps[~ps.auteur_geom.str.contains('CBNA')] +# intersect = gpd.sjoin(df,ps_autre,op='intersects').idfinal.tolist() +# drop_index = df[df.idfinal.isin(intersect)].index +# df.drop(drop_index,inplace=True) + +# Drop poly who intersect poly CBNA +# ps_cbna = ps[ps.auteur_geom.str.contains('CBNA')] +# intersect = gpd.sjoin( +# df, +# gpd.GeoDataFrame( geometry=ps_cbna.centroid), +# op='intersects').idfinal.tolist() +# drop_index = df[df.idfinal.isin(intersect)].index +# df.drop(drop_index,inplace=True) +from os.path import join +PATH = '/home/colas/Documents/9_PROJETS/2_PS/CBNA/MISSING_DATA_INDB' +FILE = 'CHARTREUSE.gpkg' +df = gpd.read_file(join(PATH,FILE)) + + +if 'date' not in df.columns and 'annee' in df.columns: + df.loc[~df.annee.isna(),'date'] = df.loc[~df.annee.isna(),'annee'].astype(str) + '-01-01' + df['date'] = pd.to_datetime(df['date']) + +colnhab2 = df2.columns[df2.columns.str.startswith('n_hab')] +nhab2 = df2[colnhab2].copy() +nhab2['SUMM'] = nhab2.sum(axis=1) +nhab2['SUMM'].unique() + +if table == '"cr_CHARTREUSE_habitats_CBNA_2000-2012"': + dic = { + 'JCV':'VILLARET Jean-charles', + 'GP' :'PACHE Gilles', + 'AM' :'MIKOLAJCZAK Alexis', + 'TL' :'LEGLAND Thomas', + 'HM' :'MERLE Hugues', + 'CC' :'Crassous Claire', + # 'AL' + # 'LA' + # 'JC' + } + df.obs = df.obs.replace(dic) + df.loc[~df.obs.isin(['ONF38',*dic.values()]),'obs'] = 'CBNA' + df.loc[df.structur=='ONF 38','structur'] = 'ONF38' +if table == '"cr_ECRIN_habitats_CBNA_2014"': + df['auteur'] = 'PnE' + df['structure'] = 'PnE' + df['date'] = '2014-01-01' + df['date'] = pd.to_datetime(df['date']) +if table == '"cr_TRIEVES+VERCORS_habitats_CBNA_2014"': + df['auteur'] = 'CBNA' + df['structure'] = 'CBNA' + df['date'] = '2014-01-01' + df['date'] = pd.to_datetime(df['date']) +if table == '"cr_VERCORS_habitats_CBNA_1999-2007"': + df['structure'] = 'CBNA' +if table in [ + '"cr_VERCORS_habitats_CBNA_1999-2007"','"cr_TRIEVES+VERCORS_habitats_CBNA_2014"' +]: + df.loc[df.statut=="Habitat d'intérêt communautaire",'statut'] = \ + "Communautaire" + df.loc[df.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \ + "Prioritaire" + df.loc[df.statut=="Habitat d'intérêt communautaire, retenu prioritaire",'statut'] = \ + "Prioritaire" + df.loc[ + df.statut=="""Habitat d'intérêt communautaire, retenu prioritaire pour les sites riches en orchidées""", + 'statut'] = "Prioritaire" + df.loc[ + df.statut=="""Habitat d'intérêt communautaire retenu prioritaire pour les sites riches en orchidées""", + 'statut'] = "Prioritaire" + df.loc[ + df.statut=="""Habitat communautaire, retenu prioritaire pour les sites riches en orchidées""", + 'statut'] = "Prioritaire" + df.loc[df.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \ + "Prioritaire" +if table in [ + '"cr_CHARTREUSE_habitats_CBNA_2000-2012"', '"cr_ECRIN_habitats_CBNA_2014"']: + dic = {'PR':'Prioritaire','IC':'Communautaire','NC':'Non communautaire'} + df.statut.replace(dic,inplace=True) + +df.reset_index(inplace=True, drop=True) + +df.to_postgis( + 'ps'+table[3:-1],pycen.con_bdcen,'pelouse_seche',geom_col='geom',if_exists='replace' +) + +# [CROSS_MNT] +ddf = df[['idfinal','geom']].copy() +ddf.set_geometry('geom',inplace=True) + +# home = str(Path.home()) +# chdir(path+p_mltifct) +# Dir = listdir() +# Dir = [x for x in Dir if '.zip' in x] +# # stats = pd.DataFrame() +# for i, d in enumerate(Dir): +# zip = ZipFile(d).namelist() +# z = [z for z in zip if 'MNT' in z][0] +# print(z) +# tmp = zonal_stats(ddf.geom,'/vsizip/{zip}/{mnt}'.format(zip=d,mnt=z),stats="max") +# tmp = pd.DataFrame(tmp) +# tmp.columns = ['max%s'%i] +# if i == 0 : +# stats = tmp +# else: +# stats = pd.merge(stats,tmp,how='left',left_index=True,right_index=True) + +# stats['bilan'] = stats.max(axis=1) + +# tmp = pd.merge(df,stats['bilan'],left_index=True,right_index=True) +# tmp.bilan = tmp.bilan.round() + +# zonal_tmp = zonal_stats(ddf.geom,path2_mnt+file_mnt25,stats="mean max",nodata=0,all_touched=True) + +# import rasterio +# with rasterio.open(path2_mnt+file_mnt5) as src: +# affine = src.transform +# array = src.read(1) +# zonal_tmp3 = zonal_stats(ddf, array, affine=affine,stats="mean max",nodata=0,all_touched=True) + + +# zonal_tmp = pd.DataFrame(zonal_tmp) +# zonal_tmp3 = pd.DataFrame(zonal_tmp3) +# zonal_tmp.columns = ['max_alti','mean_alti'] +# zonal_tmp3.columns = ['max_alti','mean_alti'] +# tmp = pd.merge(df,zonal_tmp2,left_index=True,right_index=True) +from pycen import con as con_aza +sql = 'SELECT * FROM ps.v_pelouseseches_noalti' +ddf = gpd.read_postgis(sql,con_aza) +zonal_tmp2 = zonal_stats(ddf,path2_mnt+file_mnt5,stats="max",nodata=0,all_touched=True) +zonal_tmp2 = pd.DataFrame(zonal_tmp2) +zonal_tmp2.columns = ['max_alti'] +tmp = pd.concat([ddf,zonal_tmp2], axis=1) +tmp = tmp.set_geometry('geom', crs=2154) +tmp['infeq_1200'] = tmp.max_alti <= 1200 +tmp2 = tmp[['site_code','infeq_1200']].copy() +tmp2.rename(columns={'site_code':'id_site'},inplace=True) +tmp2.to_sql('r_infeq_1200m',con_aza,'ps',if_exists='append',index=False) + +df.to_postgis( + 'ps'+table[3:-1],pycen.con_bdcen,'pelouse_seche',geom_col='geom',if_exists='replace' +) + +PSinfeq1200 = tmp[tmp.max_alti <= 1200].copy() +PSsup1200 = tmp[tmp.max_alti > 1200].copy() + + + + +# PSinfeq1200 = tmp[tmp.bilan <= 1200].copy() +# PSsup1200 = tmp[tmp.bilan > 1200].copy() +# PSinfeq1200.rename(columns={'bilan':'max_alti'}, inplace=True) +# PSsup1200.rename(columns={'bilan':'max_alti'}, inplace=True) +PSinfeq1200.dropna(axis=1, how='all',inplace=True) +if table in [ + '"cr_VERCORS_habitats_CBNA_1999-2007"','"cr_TRIEVES+VERCORS_habitats_CBNA_2014"']: + PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire",'statut'] = \ + "Communautaire" + PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \ + "Prioritaire" + PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire, retenu prioritaire",'statut'] = \ + "Prioritaire" + PSinfeq1200.loc[ + PSinfeq1200.statut=="""Habitat d'intérêt communautaire, retenu prioritaire pour les sites riches en orchidées""", + 'statut'] = "Prioritaire" + PSinfeq1200.loc[ + PSinfeq1200.statut=="""Habitat d'intérêt communautaire retenu prioritaire pour les sites riches en orchidées""", + 'statut'] = "Prioritaire" + PSinfeq1200.loc[ + PSinfeq1200.statut=="""Habitat communautaire, retenu prioritaire pour les sites riches en orchidées""", + 'statut'] = "Prioritaire" + PSinfeq1200.loc[PSinfeq1200.statut=="Habitat d'intérêt communautaire retenu prioritaire",'statut'] = \ + "Prioritaire" +if table in [ + '"cr_CHARTREUSE_habitats_CBNA_2000-2012"', '"cr_ECRIN_habitats_CBNA_2014"']: + dic = {'PR':'Prioritaire','IC':'Communautaire','NC':'Non communautaire'} + PSinfeq1200.statut.replace(dic,inplace=True) + +DF_INI = PSinfeq1200.copy() +df = DF_INI.copy() +# df['structure'] = 'CBNA' + + +# sql = """ +# DELETE FROM sites.sites +# WHERE sites.id in (SELECT id_site from sites.r_sites_geom where id_lot = 1); +# """ +# with pycen.con.begin() as cnx: +# cnx.execute(sql) +path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/' +cols_date = PSsup1200.columns[PSsup1200.columns.str.contains('date')] +PSsup1200[cols_date] = PSsup1200[cols_date].astype(str) +PSsup1200.to_file(path0 + 'PS_ECRIN_CBNA_2014_SUP1200.shp') + +cols_date = DF_INI.columns[DF_INI.columns.str.contains('date')] +DF_INI[cols_date] = DF_INI[cols_date].astype(str) +DF_INI.to_file(path0 + 'PS_ECRIN_CBNA_2014_INF1200.shp') + + +op = ''' +gdalwarp -overwrite -s_srs EPSG:2154 -t_srs EPSG:2154 -co FORMAT=GPKG -of GTiff -tr 25.0 -25.0 -tap -cutline \ + "PG:dbname='bd-cen-38' host=192.168.0.189 port=5432 sslmode=disable user='cgeier' password='adm1n*bdCen'" \ + -csql "SELECT * FROM habitat."cr_TRIEVES_VERCORS_habitats_CBNA_2014" WHERE idfinal in (24060)" \ + "{mnt}" {out} +'''.format( + mnt=path2_mnt+'out.tif', + out=path2_mnt+'24060.tif') +system(op) +mnt = rio.open(Path_tmp+mnt_out) + + + +sql = 'SELECT * FROM pelouse_seche."PB_codehab_nonPresent_dans_corineBiotope"' +df = gpd.read_postgis(sql,con) +lst = ['cr_TRIEVES+VERCORS_habitats_CBNA_2014','cr_VERCORS_habitats_CBNA_1999-2007','cr_ECRIN_habitats_CBNA_2014','cr_CHARTREUSE_habitats_CBNA_2000-2012'] +df = df[df.table_org.isin(lst)] +df.loc[df.auteur=='PnE','structure'] = 'PnE' + + +sql = 'SELECT * FROM pelouse_seche."PB_codehabCBNA_nonPresent_dans_corineBiotope"' +df2 = gpd.read_postgis(sql,con) + +df3 = pd.concat([df,df2]) +df3.set_geometry('geom',inplace=True,crs=2154) + +df3.to_postgis( + name='PB_codehabCBNA_nonPresent_dans_corineBiotope', + con=con, + schema='pelouse_seche', + if_exists='replace', + geom_col='geom' +) \ No newline at end of file diff --git a/3_AZALEE/recup_ps.py b/3_AZALEE/recup_ps.py new file mode 100644 index 0000000..e766912 --- /dev/null +++ b/3_AZALEE/recup_ps.py @@ -0,0 +1,1480 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : recup_ps.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +# import re +from os import register_at_fork +import types +from numpy.lib.shape_base import expand_dims +import pandas as pd +import geopandas as gpd +from pandas.io.pytables import incompatibility_doc +from shapely import wkb +import numpy as np +from sqlalchemy.sql.expression import column +# from pycen import bdd +import pycen +from sqlalchemy import create_engine +from sqlalchemy.engine import URL +from geoalchemy2 import Geometry + + + +isin_bdd = True +lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"', +'"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"', +'"PS_VERCORS_CEN38_2011"' +] +# Parametres bdd IN +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '192.168.0.189' +base = 'bd-cen-38' +schema = 'pelouse_seche' +schema = 'habitat' +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +table = '"cr_VERCORS_habitats_CBNA_1999-2007"' +# table = '"PS_CHAMBARAN_CEN38_2013"' +# bd = bdd.CEN( +# user = user, +# pwd = pwd, +# adr = adr, +# base = base +# # schema = schema +# ) + +# Parametres bdd OUT +user_ps = 'colas_g' +pwd_ps = 'adm1n*38*' +adr_ps = '192.168.60.10' +base_ps = 'bd_cen' +url_ps = URL.create('postgresql+psycopg2', + username=user_ps, + password=pwd_ps, + host=adr_ps, + database=base_ps, +) +con_ps = create_engine(url_ps) +con_ps = pycen.con + + + +def get_pers(): + sql = ('SELECT t1.id id, t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1' + ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id ORDER BY id').format(sch='personnes', tab1='personne', tab2='organisme') + df_pers = pd.read_sql( + sql = sql, + con = con_ps) + df_pers.replace([None],'',inplace=True) + df_pers['auteur'] = df_pers.prenom + ' ' + df_pers.nom + df_pers['auteur'] = df_pers['auteur'].str.strip() + df_pers['organisme'] = df_pers['organisme'].str.strip() + return pycen.pers.get_auteur() + + +# def get_idgeomsite(lst_site): +# ''' +# :lst_site: tuple. +# ''' +# sql = ('SELECT id, id_site, date FROM {sch}.{tab} t0 ' +# 'JOIN {sch}.type_milieu t1 WITH t0.id_type_milieu = t1.id ' +# 'WHERE t1.nom = "Pelouses sèches" AND t0.id_site IN {lst} ORDER BY id').format(sch='sites', tab='r_sites_geom', lst=lst_site) +# df = pd.read_sql( +# sql = sql, +# con = con_ps) +# return df + +def format_legende(leg): + dic = { + + } + df = pd.DataFrame({'leg':leg}) + df.replace( + ['/',' x ','X',' x','fasciès','faciés', ' ','erigé','érigéé','Troêne','Troëne','brachypode','dominée','dominé','érigé_et_brachypode','cyperacées'], + ['|','|','|','|','faciès','faciès',' ','érigé','érigé','Troène','Troène','Brachypode','dominé','dominée','érigé et brachypode','Cyperacées'], + regex=True, inplace=True) + return df + + + + +def format_date(df): + df = df.copy() + if table in ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"', + '"PS_CHAMBARAN_CEN38_2013"']: + df.date = pd.to_datetime(df.date,format='%d/%m/%Y') + if table == '"PS_BELLEDONNE_CEN38_2014"': + df.date = pd.to_datetime(df.date,format='%d%b%Y') + if table == '"PS_BIEVRE_CEN38_2016"': + df.date = df.date.astype(str).replace('\.','',regex=True) + df.date = df.date.str.rsplit(' ',1).str[0]\ + .replace(['avr','mai','juin','juil'],['apr','may','jun','jul'],regex=True) + df.date = pd.to_datetime(df.date,format='%d %b %Y') + if table == '"PS_SUD-GRENOBLOIS_CEN38_2009"': + # df.loc[df.date.str.len() < 6,'date'] = df.loc[df.date.str.len() < 6,'date'].astype(int) + df.loc[df.date.str.len() < 6,'date'] = '01/06/' + df.loc[df.date.str.len() < 6,'date'].astype(int).max().astype(str) + df.loc[df.date.str.len() > 6,'date'] = pd.to_datetime(df.loc[df.date.str.len() > 6,'date']) + + + if 'annee' in df.columns and 'date' not in df.columns: + # Si pas de date mais année précisée + df['annee'] = df['annee'].astype(int) + df['date'] = df['annee'].astype(str) + '-06-01' + df.date = pd.to_datetime(df.date,format='%Y-%m-%d') + elif any(df.date.astype(str).str.len() <= 4): + # Si dates non-homogènes, cohexistance date&année + d = df.loc[df.date.astype(str).str.len() <= 4,'date'].unique() + if None in d: + df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'annee'].astype(str) + '-06-01' + else: + df.loc[df.date.astype(str).str.len() <= 4,'date'] = df.loc[df.date.astype(str).str.len() <= 4,'date'].astype(str) + '-06-01' + df.date = pd.to_datetime(df.date,format='%Y-%m-%d') + else: + df.date = pd.to_datetime(df.date) + return df + + +def normalize_auteur(lst_auteur): + ''' + lst_auteur : pd.Series + ''' + dic = { + 'Lise Duconte':'DUCONTE Lise', + 'COSQUER Mélanie (AVENIR)':'COSQUER Mélanie', + 'CHABERT Chloé (AVENIR)':'CHABERT Chloé', + 'CHABERT Chloé (AVNEIR)':'CHABERT Chloé', + 'PACHE Gilles (CBNA)':'PACHE Gilles', + 'Gilles PACHE':'PACHE Gilles', + 'JAMEAU Laura (AVENIR)':'JAMEAU Laura', + 'MARCIAU Roger (AVENIR)':'MARCIAU Roger', + 'Roger MARCIAU':'MARCIAU Roger', + 'Aude Massa':'MASSA Aude', + 'Aude MASSA':'MASSA Aude', + 'Bilkisse ABOUDOU AVENIR':'ABOUDOU Bilkisse', + 'Romain BARTHELD':'BARTHELD Romain', + 'VILLARET J.C.':'VILLARET Jean-charles', + 'Jean-Charles VILLARET':'VILLARET Jean-charles', + 'Fabien ANTHELME':'ANTHELME Fabien', + 'Jean-Christophe GATTUS':'GATTUS Jean-christophe', + 'Alexis MIKOLAJCZAK':'MIKOLAJCZAK Alexis', + 'Jonathan MALINEAU':'MALINEAU Jonathan', + 'Aurélien DAUTREY':'DAUTREY Aurélien', + 'Etienne MARY':'MARY Etienne', + 'Alix GUEDOU': 'GUEDOU Alix', + } + lst_aut = lst_auteur.copy() + lst_aut.replace(' & ',',',regex=True,inplace=True) + + if any(lst_aut.str.contains(',')): + lst_aut = lst_aut.str.split(',',expand=True).stack().str.strip().droplevel(-1) + lst_aut.replace(dic,inplace=True) + + df_aut = lst_aut.str.rsplit(' ',n=1,expand=True) + df_aut[1] = df_aut[1].str[0].str.upper() + df_aut[1].str[1:].str.lower() + df_aut[0] = df_aut[0].str.upper() + df_aut[2] = df_aut[0] + df_aut.loc[~df_aut[1].isna(),2] = df_aut.loc[~df_aut[1].isna(),0] + ' ' + df_aut.loc[~df_aut[1].isna(),1] + lst_aut = df_aut[2].copy() + aut = lst_aut.unique() + res = [x for x in aut if x not in [*get_pers().nom_prenom]] + if res: + print('Les auteurs suivant ne figurent pas dans la BDD contact : %s'%res) + new = new_auteur(res) + lst_aut.replace([*new['aut'].keys()],[*new['aut'].values()], inplace=True) + lst_aut = lst_aut.groupby(axis=0,level=0).apply(lambda x: "%s" % ','.join(x)) + return lst_aut + + +def new_auteur(new): + ''' + new : list + ''' + df = pd.DataFrame({'auteur': new}) + dic = {'aut':{}, 'org':[]} + + # Pour chaque auteur + for pers in df.auteur: + print(pers) + + while True: + newaut = input("Insérer un nouvel auteur ? (Y/N)\n") + if not newaut[0].upper() in ['Y','N']: + continue + else: break + # Si : pas d'insertion d'un nouvel auteur + if newaut[0].upper() == 'N': + while True: + rep = input("Remplacer l'auteur par un auteur existant ? (Y/N)\n") + if not rep[0].upper() in ['Y','N']: + continue + else: break + if rep[0].upper() == 'Y': + while True: + org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] + reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) + if not reporg in [*org.keys()]: + continue + else: break + while True: + aut = get_pers().sort_values('id') + aut = aut[aut.id_organisme == reporg] + aut = aut.nom_prenom.to_dict() + repaut = int(input("Sélectionner le numéro de l'auteur :\n %s \n"%aut)) + if not repaut in [*aut.keys()]: + continue + else: break + dic['aut'] = {**dic['aut'],pers:aut[repaut]} + dic['org'] = [*dic['org'],org[reporg]] + # Autrement : insertion d'un nouvel auteur + else: + while True: + print("Auteur : %s"%pers) + rep = input("L'auteur doit être sous le format NOM Prénom.\nRenommer l'auteur ? (Y/N)\n") + if not rep[0].upper() in ['Y','N']: + continue + elif rep[0].upper() == 'Y': + repaut = input("Saisisser un nouveau nom :\n") + dic['aut'] = {**dic['aut'],pers:repaut} + pers = repaut + break + else: + dic['aut'] = {**dic['aut'],pers:pers} + break + + while True: + org = pycen.pers.get_organisme().sort_index().to_dict()['nom'] + org = {-1: 'Autre', **org} + reporg = int(input("Sélectionner le numéro de l'organisme :\n %s \n"%org)) + if not reporg in [*org.keys()]: + continue + elif reporg == -1: + reporg = input("Saisisser un nouvel organisme :\n") + dic['org'] = [*dic['org'],reporg] + insert_newOrganisme(reporg) + org = pycen.pers.get_organisme() + idorg = org[org.nom==reporg].index[0] + insert_newAuteur(pers,idorg) + break + else: + dic['org'] = [*dic['org'],org[reporg]] + insert_newAuteur(pers,reporg) + break + + return dic + + +def insert_newAuteur(aut,idorg): + aut = pd.Series(aut) + idorg = pd.Series(idorg) + df = pd.DataFrame({'aut':aut,'id_organisme':idorg}) + # print(df) + tmp = df.aut.str.rsplit(' ',1,expand=True) + if tmp.shape[1] == 1: + df[['nom']] = tmp + elif tmp.shape[1] == 2: + df[['nom', 'prenom']] = tmp + else: + raise ValueError('NOM Prénom est de longueur inconnu :\ntmp') + del df['aut'] + try: + df.to_sql(name='personne',con=pycen.con,schema=pycen.pers.schema, + if_exists='append', index=False) + print("Nouvel auteur inséré : '%s'"%aut) + except: + print("ERROR : impossible d'insérer le nouvel auteur :\n '%s'"%aut) + + + +def insert_newOrganisme(nom): + nom = pd.Series(nom) + df = pd.DataFrame({'nom':nom}) + try: + df.to_sql(name='organisme',con=pycen.con,schema=pycen.pers.schema, + if_exists='append', index=False) + print("Nouvel organisme inséré : '%s'"%nom) + except: + print("ERROR : impossible d'insérer le nouvel organisme :\n '%s'"%nom) + + + +def normalize_colname(df): + df.columns = df.columns.str.lower() + df.columns = df.columns.str.replace('__','_',regex=True) + return df.rename(columns={ + 'n_polygone':'ident', + 'id_site':'ident', + 'ident_':'ident', + 'id':'ident', + 'idfinal':'id_origine', + 'date_':'date', + 'obs':'auteur', + 's_p_brous' :'%_embrous', + 's_p_brouss':'%_embrous', + 'niv__embro':'niv_embrous', + 'niv_embro' :'niv_embrous', + 'niv_emb' :'niv_embrous', + 'recouvreme':'recouvmnt', + 'recouvr':'recouvmnt', + 'recouv' :'recouvmnt', + 'recouvr_' :'recouvmnt', + 'remarque' :'remarques', + 'remarq_' :'remarques', + 'legendes' :'legende', + 'legend' :'legende', + 'sources' :'source', + 'surf' :'surface', + 'geometry' :'geom', + }) + + +def get_id_auteur(lst_autor): + sch = 'personnes' + tab = 'personne' + sql = """ + WITH tmp as + (SELECT id, TRIM(CONCAT(nom,' ',prenom)) as auteur FROM {sch}.{tab}) + SELECT * FROM tmp WHERE auteur IN %(lst_autor)s""".format(sch=sch,tab=tab) + df = pd.read_sql( + sql = sql, + con = pycen.con, + params = {'lst_autor': tuple(lst_autor)}) + return df + + +def existing_ps(df): + gdf = pycen.ps.get_sitesGeom() + gdf['area'] = gdf.area + ovl = gpd.overlay(df,gdf) + ovl = gpd.overlay(df,gdf,how='intersection') #'intersection', 'union', 'identity', 'symmetric_difference' or 'difference' + ovl['ovl_area'] = ovl.area + ovl['ovl%'] = ovl['ovl_area'] * 100 / ovl['area'] + + return df + +def define_siteOrg(df): + gdf = pycen.get_districtNat()[['abrev','geom']] + tmp = df[['geom']].copy().set_geometry('geom') + tmp = gpd.overlay(tmp,gdf, how='intersection') + tmp['area'] = tmp.area + tmp.reset_index(0, inplace=True) + tmp.set_index('area',inplace=True) + tmp.sort_index(inplace=True) + tmp.drop_duplicates(subset=['index'],keep='last',inplace=True) + tmp.reset_index(drop=True,inplace=True) + del tmp['geometry'] + df = df.merge(tmp,how='left',left_index=True,right_on='index') + del df['index'] + df.rename(columns={'abrev':'org'}, inplace=True) + return df + # tmp[tmp.id_site==334] + +# 'touches', None, 'contains', 'overlaps', 'contains_properly', 'within', 'intersects', 'crosses', 'covers', 'covered_by' + +def define_siteName(df): + df = define_siteOrg(df.copy()) + df['dept'] = '38' + # df['num'] = None + if 'org' in df.columns: + for org in df.org.unique(): + tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu) + # tmp = pycen.sites.get_sitesInfos(milieu=pycen.zh().typ_milieux.nom[0]) + tmp = tmp[tmp.org == org] + if not tmp.empty: + num = tmp.num.astype(int).max() + seq = range(num+1, num + df.loc[df.org==org].shape[0]+1) + df.loc[df.org==org,['num']] = list(seq) + else: + seq = range(1, df.loc[df.org==org].shape[0]+1) + df.loc[df.org==org,['num']] = list(seq) + else: + tmp = pycen.sites.get_sitesInfos(milieu=pycen.ps.milieu) + + df.loc[~df.num.isna(),'num'] = df[~df.num.isna()].num.astype(int).astype(str) + df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] + df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] + df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] + df['id_site'] = df['dept'] + df['org'] + df['num'] + return df + + + +def normalize_formatIdsite(id_site): + import re + df = pd.DataFrame({'id_site':id_site}) + df['dept'] = '38' + df[['org','num','other']] = [re.split('(\d+)',s) for s in [*df.id_site]] + del df['other'] + df.loc[df.num.str.len() == 1,'num'] = '000'+ df.loc[df.num.str.len() == 1,'num'] + df.loc[df.num.str.len() == 2,'num'] = '00' + df.loc[df.num.str.len() == 2,'num'] + df.loc[df.num.str.len() == 3,'num'] = '0' + df.loc[df.num.str.len() == 3,'num'] + df['id_site'] = df['dept'] + df['org'] + df['num'] + return df.id_site + + +def normalize_paturage(pat): + return pat.str.lower().replace( + ['paturage','ânes','vaches','moutons','cheval','chevaux','poneys','sangliers','chèvres'], + ['pâturage','asin', 'bovin','ovin','équin','équin','équin',None,'caprin'],regex=True) + + +def normalize_pratiques(prat): + dic = { + 'pâturage equin':'pâturage équin', + 'pâturage bovin':'pâturage bovin', + 'paturâge cervidé':'pâturage cervidé', + 'broyer': 'broyage', + 'truffier': 'broyage', + 'plantation': 'broyage', + 'culture' : 'labourage', + 'abandon recent': 'abandon', + 'abandon verger': 'abandon', + 'remblais et feu' : 'incertain', + 'remblais, feu' : 'incertain', + 'caprin': 'pâturage caprin', + 'ovin': 'pâturage ovin', + 'équin': 'pâturage équin', + 'equin': 'pâturage équin', + 'bovin': 'pâturage bovin', + 'paturage': 'pâturage', + 'paturâge': 'pâturage', + 'f' : 'fauche', + 'p' : 'pâturage', + '0' : 'N.D', + 'i' : 'incertain', + 'a' : 'abandon', + 'pv': 'pâturage', + 'b' : 'broyage', + 'pc': 'pâturage caprin', + 'po': 'pâturage ovin', + 'pe': 'pâturage équin', + 'pb': 'pâturage bovin', + '-' : 'N.D', + '0' : 'N.D', + 'ND' : 'N.D', + 'n.d' : 'N.D', + 'chemin carrossable non goudronné' : 'N.D', + 'plantation de châtaigniers' : 'broyage', + } + p = prat.replace(['.et','/'],[',',', '],regex=True).copy() + p = p.str.lower() + p = normalize_paturage(p) + p = p.replace(dic) + p.fillna('N.D', inplace=True) + p[p.str.contains('\?| ou ',na=False)] = 'incertain' + pp = p.str.split(',',expand=True) + pp.columns = 'p' + pp.columns.astype(str) + for c in pp.columns: + pp[c] = pp[c].str.strip() + pp[c] = pp[c].replace(dic) + return pp + + +def select_ID(df,sch,tab): + df = df.copy() + sql = 'SELECT * FROM %s.%s'%(sch,tab) + # df = df.copy().dropna(axis=1,how='all') + df.replace("'","''",regex=True,inplace=True) + if 'geom' in df.columns: + del df['geom'] + if 'date' in df.columns: + df['date'] = df['date'].astype(str) + elif 'date_deb' in df.columns: + df['date_deb'] = df['date_deb'].astype(str) + if 'id_cb' in df.columns: + df['id_cb'] = df['id_cb'].astype(str) + column = '(%s)'%','.join(df.columns) + df_tuple = tuple(df.itertuples(index=False, name=None)) + sql = sql + ' WHERE %s IN %s'%(column,df_tuple) + if 'nan' in sql: sql = sql.replace('nan','NULL') + if ',)' in sql: sql = sql.replace(',)',')') + if '"' in sql: sql = sql.replace('"',"'") + + # print(sql) + return pd.read_sql( + sql=sql, + con=pycen.con,) + + +def insertAutorAttrs(df,colname_rsite, sch, tab): + df = df.copy() + df = df[['id', 'auteur']].copy() + if any(df.auteur.str.contains(',')): + tmp = df.auteur.str.split(',',expand=True).stack().droplevel(-1) + tmp = pd.DataFrame({'auteur':tmp}) + df = pd.merge(df[['id']],tmp,how='left',left_index=True,right_index=True) + df_autor = get_id_auteur([*df.auteur.unique()]) + df.replace([*df_autor.auteur],[*df_autor.id], inplace=True) + df.columns = [colname_rsite, 'id_auteur'] + try : + df.to_sql( + name=tab, + con=pycen.con, + schema=sch, + index=False, + if_exists='append', + method='multi' + ) + print('Insert author sites data OK !') + return 'Insert author sites data OK !' + except Exception as e: + print('ERROR : News author insert impossible !') + print(e) + + +def _altertype(df1,df2): + same_col = df1.columns[df1.columns.isin(df2.columns)] + if 'date' in df2.columns: + df2['date'] = df2['date'].astype(str) + if 'geom' in same_col: + same_col = same_col.drop('geom') + for col in same_col: + if df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == int: + df1[col] = df1[col].astype(int) + elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == float: + df1[col] = df1[col].astype(float) + elif df1.dtypes[col] != df2.dtypes[col] and df2.dtypes[col] == str: + df1[col] = df1[col].astype(str) + return [df1,df2] + + +def insert_authordata(df_ins,tab_data,id_data,colname_rsite, tab_auteur): + df_ins = df_ins.copy() + if 'date' in df_ins.columns: + df_ins['date'] = df_ins['date'].astype(str) + elif 'date_deb' in df_ins.columns: + df_ins['date_deb'] = df_ins['date_deb'].astype(str) + + tab_sit = pycen.con.dialect.get_table_names(pycen.con,pycen.sites.schema) + tab_ps = pycen.con.dialect.get_table_names(pycen.con,pycen.ps.schema) + if tab_data in tab_sit: + sch = pycen.sites.schema + elif tab_data in tab_ps: + sch = pycen.ps.schema + + # ids = select_ID(df_ins[df_ins.columns.drop('auteur')],sch,tab_data) + ids = select_ID(df_ins[[id_data]],sch,tab_data) + if 'date' in ids.columns: + ids['date'] = ids['date'].astype(str) + same_col = df_ins.columns[df_ins.columns.isin(ids.columns)] + if 'geom' in same_col: + same_col = same_col.drop('geom') + df_ins,ids = _altertype(df_ins,ids) + + df_ins = df_ins[[*same_col,'auteur']].merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df=df_ins,colname_rsite=colname_rsite, sch=sch, tab=tab_auteur) + + + +def insert_site(df): + df = df.copy() + if 'nom' not in df.columns: + df['nom'] = 'Inconnu' + if 'type_site' not in df.columns: + df['type_site'] = 'N.D.' + if 'type_milieu' not in df.columns: + df['type_milieu'] = 'Pelouses sèches' + # if 'remarques' not in df.columns: + # df['remarques'] = None + + df = df[['id_site','nom','date','auteur','type_site','type_milieu', + # 'remarques' + ]] + typs = pycen.sites._get_typ_site() + typm = pycen.sites._get_typ_milieux() + + for t in df.type_milieu.unique(): + df.loc[df.type_milieu == t, 'id_type_milieu'] = typm.loc[typm.nom == t, 'id'].values[0] + for s in df.type_site.unique(): + df.loc[df.type_site == s, 'id_type_site'] = typs.loc[typs.nom == s, 'id'].values[0] + + df['id_type_milieu'] = df['id_type_milieu'].astype(int) + # df['id_type_site'] = df['id_type_site'].astype(str) + + df['dept'] = df.id_site.str[:2] + df['org'] = df.id_site.str[2:-4] + df['num'] = df.id_site.str[-4:] + df.rename(columns={'date':'date_deb', 'id_site':'id'}, inplace=True) + + columns = [col['name'] for col in pycen.sites.columns_sitetab] + ['auteur'] + c = df.columns[df.columns.isin(columns)] + df = df[c] + print(df) + + try: + df[df.columns.drop('auteur')].to_sql( + name = 'sites', + con = pycen.con, + schema = pycen.sites.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news sites OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news sites impossible !') + return [df,False] + + +def check_org_lot(lot): + """ + :lot: list. liste des lots + + return + ------- + DataFrame des id_lot de la liste lot. + """ + tab_lot = pycen.sites._get_lots() + not_intab = [ l for l in lot if l not in [*tab_lot.libelle] ] + if not_intab: + df = pd.DataFrame({'libelle':not_intab}) + df.to_sql( + name='lots', + con=con_ps, + schema='sites', + index=False, + if_exists='append', + method='multi', + ) + tab_lot = pycen.sites._get_lots() + tab_lot = tab_lot[tab_lot.libelle.isin(lot)] + return tab_lot[['id_lot','libelle']] + + + +def insert_geom(df): + dic = {'remarques':'rmq_interet_patri','legende':'rmq_usage_process'} + c = ['id_site','geom','auteur','date','remarques','legende','table_org','id_origine'] + cc = df.columns[df.columns.isin(c)] + df = df[cc].copy() + df.set_geometry('geom', inplace=True) + colindict = [ *df.columns[df.columns.isin(dic.keys())] ] + if colindict: + df.rename( + columns={'remarques':'rmq_interet_patri','legende':'rmq_usage_process'}, + inplace=True, + ) + if 'table_org' in df.columns: + tab = [*df.table_org.unique()] + lots = check_org_lot(tab) + df = pd.merge(df,lots,left_on='table_org',right_on='libelle',how='left') + del df['table_org'] + del df['libelle'] + + try: + df[df.columns.drop('auteur')].to_postgis( + name = 'r_sites_geom', + con = pycen.con, + schema = pycen.sites.schema, + if_exists = 'append', + index = False, + geom_col = 'geom' + ) + print('Insert news geom OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news geom impossible !') + return [df,False] + + +def get_idGeomSite(lst_site): + ''' + :lst_site: list or pd.Series . + ''' + id_geom = pycen.ps.get_sitesGeom( + id_site=[*lst_site]) + if not id_geom.empty: + id_geom = id_geom[['id','id_site']] + id_geom.columns = ['id_geom_site','id_site'] + return id_geom + + +def insert_attrsPS(df): + c = ['id_site','auteur','date','%_embrous','niv_embrous','recouvmnt','pratiques','statut'] + cc = df.columns[df.columns.isin(c)] + df = df[cc].copy() + df = pd.concat([df,normalize_pratiques(df['pratiques'])],axis='columns') + del df['pratiques'] + for col in ['%_embrous','niv_embrous','recouvmnt']: + df[col] = df[col].astype(str).replace(['\.0'],[''],regex=True) + df.niv_embrous.replace(['nan'],['N.D'],inplace=True) + df.recouvmnt.replace(['nan','None','0'],['N.D','N.D','N.D'],inplace=True) + df.statut.replace(['ND'],['N.D'],inplace=True) + # for t in df.dtypes[df.dtypes == int].index: + # df.loc[~df[t].isna(),t] = df.loc[~df[t].isna(),t].astype(int).astype(str) + if '%_embrous' in df.columns: + taux = True + df.niv_embrous = df.niv_embrous + ';' + df['%_embrous'] + del df['%_embrous'] + else : taux = False + df.set_index(['id_site','auteur','date'], inplace=True) + df = df.stack().reset_index(-1) + df.columns = ['type','param'] + df.loc[df.type.str.len() < 4,'type'] = 'pratique' + if taux: + df[['param','taux']] = df.param.str.split(';',expand=True) + df.taux.replace(['nan'],[None],inplace=True) + + df.type.replace({'niv_embrous':'embroussaillement','recouvmnt':'recouvrement'}, inplace=True) + df.param.replace({'ND':'N.D'}, inplace=True) + df.reset_index(inplace=True,drop=False) + + param = pycen.ps.get_param() + typ_p = pycen.ps._get_table(con=pycen.con,schema=pycen.ps.schema,table='type_param') + for t in df.type.unique(): + tmp = param[param.type == t] + df.loc[df.type==t,'param'] = df.loc[df.type==t,'param'].replace([*tmp.nom], [*tmp.id]) + df['type'].replace([*typ_p.nom], [*typ_p.id], inplace=True) + df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') + del df['id_site'] + del df['type'] + df.rename(columns={'param':'id_param'}, inplace=True) + + try: + df[df.columns.drop('auteur')].to_sql( + name = 'r_site_param', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news attrs OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news attrs impossible !') + return [df,False] + +def split_codehab(lst_codehab): + lst = lst_codehab.copy() + lst = lst.str.split('x|,|&|/',expand=True) \ + .stack().str.strip() \ + .droplevel(-1).reset_index(drop=False) + return lst + +def format_codehab2insert(lst_codehab): + lst = lst_codehab.copy() + lst.replace('x|,','&',regex=True, inplace=True) + lst.replace('&','&;',regex=True, inplace=True) + lst.replace('/','/;',regex=True, inplace=True) + lst = lst.str.split(';',expand=True).stack().str.strip().reset_index(-1) + lst.columns = ['ordre', 'code_hab'] + lst.ordre = lst.ordre + 1 + lst.loc[lst.code_hab.str.contains('&'),'sep'] = '&' + lst.loc[lst.code_hab.str.contains('/'),'sep'] = '/' + lst.code_hab = lst.code_hab.replace('&|/','',regex=True).str.strip() + + suspect = lst.code_hab.str.split('(',expand=True) + if suspect.shape[1] > 1: + raise Exception('habitat suspecté en développement') + else: + suspect.columns = ['code_hab'] + del suspect['code_hab'] + lst = pd.merge(lst,suspect,right_index=True,left_index=True) + + return lst + +def insert_cbPS(df): + df = df.copy() + df.date = df.date.astype(str) + df.code_hab = df.code_hab.replace(['\*'],[''],regex=True).str.strip() + ids = select_ID(df[df.columns.drop(['code_hab','auteur'])],sch=pycen.ps.schema,tab='r_site_habitat') + same_col = df.columns[df.columns.isin(ids.columns)] + df,ids = _altertype(df,ids) + df = df.merge(ids, on=[*same_col], how='left') + df = df[['id', 'code_hab']].copy() + df.set_index('id',inplace=True) + # df = df.code_hab.str.split('x|,|&',expand=True) \ + # .stack().str.strip() \ + # .droplevel(-1).reset_index(drop=False) + # df = split_codehab(df.code_hab) + # df.columns = ['id_sitehab', 'code_hab'] + df = format_codehab2insert(df.code_hab) + + + try: + df.to_sql( + name = 'r_hab_cb', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news codes habitats OK !') + except Exception as e: + print(e) + print('PS : Insert news codes habitats impossible !') + + +def insert_habPS(df): + import re + c = ['id_site','auteur','date'] + cc = df.columns[(df.columns.isin(c)) | (df.columns.str.contains('code_hab|n_hab')) ] + df = df[cc].copy() + cc = df.columns[(~df.columns.str.contains('hab')) ] + df.set_index([*cc], inplace=True) + # df.set_index(['id_site','auteur','date'], inplace=True) + # for t in df.dtypes[df.dtypes == int].index: + # df[t] = df[t].astype(str) + df = df.stack().reset_index(-1) + df.columns = ['param','value'] + df[['param','index','other']] = [re.split('(\d+)',s) for s in [*df.param]] + del df['other'] + df['index'] = df['index'].astype(int) + df['value'] = df['value'].replace(['-'],[None]) + df.dropna(subset=['value'], inplace=True) + df = df.set_index(['index','param'], append=True).unstack() + df = df.droplevel(0,axis=1) + df.code_hab.replace(['0'],[None], inplace=True) + df.dropna(subset=['code_hab'],inplace=True) + df['n_hab'] = df['n_hab'].astype(int) + df.reset_index(inplace=True,drop=False) + df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') + del df['id_site'] + + try: + df[df.columns.drop(['auteur','code_hab'])].to_sql( + name = 'r_site_habitat', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news attrs OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news attrs impossible !') + return [df,False] + + +def insert_legendCartoPS(df): + c = ['id_site','auteur','date','leg_carto'] #,'legende'] + cc = df.columns[df.columns.isin(c)] + df = df[cc].copy() + param = pycen.ps.get_listLegendePS() + df['id_param_leg'] = df['leg_carto'].replace([*param.nom_court],[*param.id]) + df = df.merge(get_idGeomSite(df.id_site), on=['id_site'], how='left') + del df['id_site'] + del df['leg_carto'] + + try: + df[df.columns.drop('auteur')].to_sql( + name = 'r_site_legcarto', + con = pycen.con, + schema = pycen.ps.schema, + if_exists = 'append', + index = False, + method = 'multi' + ) + print('Insert news legendes OK !') + return [df,True] + except Exception as e: + print(e) + print('PS : Insert news legendes impossible !') + return [df,False] + + +def filter_saisierror(df): + import re + tmp = df.copy() + lst_iderror = [] + del tmp['geom'] + cc = tmp.columns[(~tmp.columns.str.contains('hab')) ] + tmp.set_index([*cc], inplace=True) + tmp = tmp.stack().reset_index(-1) + tmp.columns = ['param','value'] + tmp[['param','index','other']] = [re.split('(\d+)',s) for s in [*tmp.param]] + del tmp['other'] + tmp['index'] = tmp['index'].astype(int) + tmp['value'] = tmp['value'].replace(['-'],[None]) + tmp.dropna(subset=['value'], inplace=True) + tmp = tmp.set_index(['index','param'], append=True).unstack() + tmp = tmp.droplevel(0,axis=1) + tmp.reset_index(inplace=True,drop=False) + lst_error1 = [] + if '_hab' in tmp.columns: + tmp.hab_.replace( + ['Xerobromenion erecti', + 'Mesobromion', + 'Carpino betuli prunion spinosae','Carpino-betuli prunion spinosae',], + ['Xerobromion erecti', + 'Mesobromion erecti', + 'Carpino betuli-prunion spinosae','Carpino betuli-prunion spinosae',], + inplace=True) + + for hab in tmp.hab_.sort_values().unique(): + print(hab) + print(tmp[tmp.hab_ == hab].code_hab.unique()) + for cod in tmp[tmp.hab_ == hab].code_hab.unique(): + perc = tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].code_hab.count() / tmp[tmp.hab_ == hab].code_hab.count() + if perc < 0.6: + if hab == 'Mesobromion erecti' and cod in ['34.322','34.323']: + continue + print('\t'+cod + ' : '+ perc.astype(str)) + lst_error1 = [*lst_error1,*tmp[(tmp.hab_ == hab)&(tmp.code_hab==cod)].sort_values('id_site').id_site.unique()] + df[df.id_site.isin(lst_error1)].to_postgis( + name='incoherence_habVScodehab', + con=con, + schema=schema, + if_exists='replace', + index=False, + geom_col='geom', + index_label='id_site', + ) + + cb = pycen.ref_hab().get_CB() + tmp.code_hab = tmp.code_hab.replace(['\*'],[''],regex=True).str.strip() + tmp.code_hab = tmp.code_hab.replace(['0'],[None]) + tmp.dropna(subset=['code_hab'], inplace=True) + tmpp = tmp[['id_site','code_hab']].copy() + tmpp.set_index('id_site', inplace=True) + # if any(tmpp.code_hab.str.contains('x|,|&')): + # tmpp = tmpp.code_hab.str.split('x|,|&',expand=True).stack().str.strip().droplevel(-1) + tmpp = split_codehab(tmpp.code_hab) + tmpp.columns = ['id_site', 'code_hab'] + tmpp = tmpp[~tmpp.code_hab.isin(cb.id)] + lst_error2 = [*tmpp.sort_values('id_site').id_site.unique()] + lst_error2 = [*lst_error2,*tmp[tmp.n_hab.isna()].sort_values('id_site').id_site.unique()] + df[df.id_site.isin(lst_error2)].to_postgis( + name='PB_codehabCBNA_nonPresent_dans_corineBiotope', + con=con, + schema=schema, + if_exists='append', + index=False, + geom_col='geom', + index_label='id_site', + ) + + lst_iderror = [*lst_error1,*lst_error2] + lst_iderrorindex = df[df.id_site.isin(lst_iderror)].index + df.drop(lst_iderrorindex,inplace=True) + return df.sort_values('id_site') + + + +if __name__ == "__main__": + tutu = pd.DataFrame() + for table in lst_tab: + sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=table) + df = gpd.read_postgis( + sql = sql, + con = con + ) + if 'id' in df.columns and 'n_polygone' in df.columns: + df['id'] = df['n_polygone'] + del df['n_polygone'] + elif 'id' in df.columns and 'polygon' in df.columns: + df['id'] = df['polygon'] + del df['polygon'] + elif 'id' in df.columns and 'ident' in df.columns: + df['id'] = df['ident'] + del df['ident'] + df = normalize_colname(df) + if 'remarques' in df.columns and 'legende' not in df.columns: + df['legende'] = df.remarques + # df.rename(columns={'id':'id_site'}, inplace=True) + if table == '"PS_VERCORS_CEN38_2011"': + df = df[df.date != '?'].copy() + df = format_date(df) + # df.legende = format_legende(df.legende) + + if table == '"PS_CHAMBARAN_CEN38_2013"': + df.type_patur = normalize_paturage(df.type_patur) + df[['p1','p2']] = df.pratiques.str.split('/|.et.',expand=True) + df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] = \ + df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'p1'] + ' ' + \ + df.loc[(~df.type_patur.isna()) & (df.p1.str.lower()=='pâturage'), 'type_patur'] + df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] = \ + df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'p2'] + ' ' + \ + df.loc[(~df.type_patur.isna()) & df.p2.str.lower()=='pâturage', 'type_patur'] + df['pratiques'] = df.p1 + df.loc[~df.p2.isna(),'pratiques'] = df.p1 + '/' + df.p2 + df.drop(columns=['type_patur','p1','p2'], inplace=True) + + df['table_org'] = table[1:-1] + # df['table_org'] = table + tutu = pd.concat([tutu,df]) + + + c = ['ident','id_origine','geom','auteur','hab_1','code_hab1','n_hab1', + 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3', + 'hab_5','code_hab5','n_hab5','hab_6','code_hab6','n_hab6', + 'hab_7','code_hab7','n_hab7','hab_8','code_hab8','n_hab8', + 'hab_9','code_hab9','n_hab9','hab_10','code_hab10','n_hab10', + 'hab_11','code_hab11','n_hab11','hab_12','code_hab12','n_hab12', + 'hab_13','code_hab13','n_hab13','hab_14','code_hab14','n_hab14', + 'hab_4','code_hab4','n_hab4','pratiques','niv_embrous', + 'recouvmnt','remarques','phyto','statut','h_remarq','legende', + 'leg_carto','date','annee','source','%_embrous','surf_emb', + # 'org','dept','num', + 'table_org' + ] + cols = tutu.columns[tutu.columns.isin(c)] + df = tutu[cols].copy() + df = define_siteName(df) + df = df[~df.id_site.isna()].copy() + df.sort_index(inplace=True) + df['auteur'] = normalize_auteur(df.auteur) # ERROR : Longueur de sortie != + df = filter_saisierror(df) + # for d in df.dtypes[df.dtypes == object].index: + # # df[d].replace([' '], [' '],regex=True,inplace=True) + # df[d] = df[d].str.normalize('NFKC') + DF = df.copy() + # df = define_siteName(df) + # tmp = df.copy() + print(df) + while True: + res = input('Voulez-vous insérer le tableau de donnée ? (Y/N)\n') + if not res[0].upper() in ['Y','N']: + continue + else: break + if res[0].upper() == 'Y': + df_ins, ins = insert_site(df) + if ins: + insert_authordata(df_ins,id_data='id',tab_data='sites',colname_rsite='id_site', tab_auteur='r_sites_auteur') + df_ins, ins = insert_geom(df) + if ins: + insert_authordata(df_ins,id_data='id_site',tab_data='r_sites_geom',colname_rsite='id_geom_site', tab_auteur='r_geomsites_auteur') + + df_ins, ins = insert_attrsPS(df) + if ins: + insert_authordata(df_ins,id_data='id_geom_site',tab_data='r_site_param',colname_rsite='id_siteparam', tab_auteur='r_siteparam_auteur') + + df_ins, ins = insert_habPS(df) + if ins: + if df_ins.shape[0] > 4000: + df_ins1 = df_ins.iloc[:round(df_ins.shape[0]/2)].copy() + df_ins2 = df_ins.iloc[round(df_ins.shape[0]/2):].copy() + insert_cbPS(df_ins1) + insert_cbPS(df_ins2) + else: + insert_cbPS(df_ins) + insert_authordata( + df_ins[df_ins.columns.drop('code_hab')],id_data='id_geom_site',tab_data='r_site_habitat', + colname_rsite='id_sitehab', tab_auteur='r_sitehab_auteur') + + # df_ins, ins = insert_legendCartoPS(df) + # if ins: + # insert_authordata(df_ins,tab_data='r_site_legcarto',colname_rsite='id_sitecarto', tab_auteur='r_sitecarto_auteur') + + + + + + + + + + + + + +table = '' +df = df2.copy() +col_hab = df.columns[df.columns.str.contains('hab')] +c = ['ident','id_origine','geom','auteur',*col_hab, +# 'hab_1','code_hab1','n_hab1', +# 'hab_2','code_hab2','n_hab2','hab_3','code_hab3','n_hab3', +# 'hab_4','code_hab4','n_hab4', +'pratiques','niv_embrous', +'recouvmnt','remarques','phyto','statut','h_remarq','legende', +'leg_carto','date','annee','source','%_embrous','surf_emb', +'table_org' +] +df.reset_index(drop=False,inplace=True) +df = normalize_colname(df) +df = format_date(df) +# df.legende = format_legende(df.legende) +cols = df.columns[df.columns.isin(c)] +df = df[cols].copy() +df = define_siteName(df) +df['auteur'] = normalize_auteur(df.auteur) +df.set_index('id_site', inplace=True) +df.dropna(axis=1, how='all', inplace=True) +col_hab = df.columns[df.columns.str.startswith('code_hab')] +rm = pd.DataFrame() +for col in col_hab: + tmp = df[df[col].str.contains('/|\(',na=False)] + if not tmp.empty: + rm = pd.concat([rm,tmp]) + +rm.to_postgis( + 'CBNA_codehab_error', + con, + schema='pelouse_seche', + if_exists='replace', index=True, geom_col='geom' +) + +df.drop(tmp.index,inplace=True) + +# filter_saisierror(df) + + + + + + + + +# if 'id_site' not in df.columns: +# df.rename(columns={ +# 'id':'id_site',}, +# inplace=True) +# df.sort_values('id_site', inplace=True) + +# df_site = df[['id_site', 'auteur', 'commune','lieu_dit', 'date']].copy() +# if 'sources' in df.columns: +# df_site = df[['id_site', 'auteur', 'sources', 'commune','lieu_dit', 'date']].copy() + +# df_autre = df[['id_site', 'h_remarq', 'remarques']].copy() + +# # sql = ('SELECT t1.nom nom, t1.prenom prenom, t2.nom organisme FROM {sch}.{tab1} t1' +# # ' JOIN {sch}.{tab2} t2 ON t1.id_organisme = t2.id').format(sch='personnes', tab1='personne', tab2='organisme') +# # df_pers = pd.read_sql( +# # sql = sql, +# # con = con_ps +# # ) +# df_pers = get_pers() + +# tmp = df_site[['auteur']].copy() +# if 'sources' in df_site.columns: +# tmp = df_site[['auteur', 'sources']].copy() + +# tmp[['prenom', 'nom']] = df_site['auteur'].str.split(' ', expand=True) +# tmp.drop_duplicates(inplace=True) +# pers_add = tmp.loc[~tmp.nom.isin(df_pers.nom),['prenom', 'nom']] +# if 'sources' in df_site.columns: +# pers_add = tmp.loc[~tmp.nom.isin(df_pers.nom),['prenom', 'nom', 'sources']] + +# if not pers_add.empty: +# pers_add.replace( +# ['CEN Isere AVENIR'], +# [1], +# inplace=True +# ) +# pers_add.rename(columns={'sources':'id_organisme'}, inplace=True) +# pers_add['id'] = max(df_pers.index)+1 +# pers_add.set_index('id', inplace=True) +# pers_add.to_sql( +# name='personne', +# con=con_ps, +# schema='personnes', +# if_exists='append', +# index=True, +# index_label='id' +# ) +# df_pers = get_pers() + +# # SITE +# df_site['nom'] = 'Inconnu' +# df_site['date_deb'] = pd.to_datetime(df_site.date) +# df_site['id_type_milieu'] = 2 +# df_site['id_type_site'] = 0 +# df_site['dept'] = 38 +# df_site['org'] = df_site.id_site.str[:2] +# df_site['num'] = df_site.id_site.str[2:] +# df_site['id_auteur'] = df_site['auteur'].copy() +# df_site['id'] = df_site['id_site'].copy() +# df_site.id_auteur.replace( +# df_pers.auteur.tolist(), +# df_pers.index.tolist(), +# inplace=True) +# df_site.drop( +# columns=['id_site','auteur','sources','date','commune','lieu_dit'], +# inplace=True) +# if not isin_bdd: +# df_site.to_sql( +# name='sites', +# con=con_ps, +# schema='sites', +# if_exists='append', +# index=False, +# # index_label='id' +# ) + +# # df_geom +# df_geom = df[['id_site', 'auteur', 'geom', 'date']].copy() +# df_geom['date'] = pd.to_datetime(df_geom.date) +# df_geom['id_auteur'] = df_geom['auteur'].copy() +# df_geom.id_auteur.replace( +# df_pers.auteur.tolist(), +# df_pers.index.tolist(), +# inplace=True) +# df_geom.drop( +# columns=['auteur'], +# inplace=True) +# df_geom.set_geometry('geom', crs='EPSG:2154', inplace=True) +# df_geom.reset_index(drop=True, inplace=True) + +# if not isin_bdd: +# df_geom.to_postgis( +# name='r_sites_geom', +# con=con_ps, +# schema='sites', +# if_exists='append', +# index=False, +# geom_col='geom' +# # index_label='id' +# ) + + +# # HABITAT +# df_hab = df[['id_site', 'code_hab1','n_hab1','code_hab2','n_hab2','code_hab3','n_hab3','code_hab4','n_hab4']].copy() +# df_hab1 = df[['id_site', 'code_hab1','n_hab1']].copy() +# df_hab2 = df[['id_site', 'code_hab2','n_hab2']].copy() +# df_hab3 = df[['id_site', 'code_hab3','n_hab3']].copy() +# df_hab4 = df[['id_site', 'code_hab4','n_hab4']].copy() +# df_hab1.columns = ['id_site', 'code_hab', 'n_hab'] +# df_hab2.columns = ['id_site', 'code_hab', 'n_hab'] +# df_hab3.columns = ['id_site', 'code_hab', 'n_hab'] +# df_hab4.columns = ['id_site', 'code_hab', 'n_hab'] +# df_hab1['index'] = 1 +# df_hab2['index'] = 2 +# df_hab3['index'] = 3 +# df_hab4['index'] = 4 +# df_habF = pd.concat([df_hab1,df_hab2,df_hab3,df_hab4]) +# df_habF.reset_index(drop=True, inplace=True) + +# idgeom = get_idgeomsite(tuple(df_hab.id_site)) \ +# .rename(columns={'id':'id_geom_site'}) +# idgeom.drop_duplicates(['id','id_site'],keep='last',inplace=True) +# df_habF['id_geom_site'] = df_habF['id_site'].copy() +# df_habF.id_geom_site.replace( +# idgeom.id_site.tolist(), +# idgeom.id.tolist(), +# inplace=True) +# df_habF.replace(['-'],[None], inplace=True) +# df_habF.dropna(subset=['code_hab'],inplace=True) +# df_cb = pd.read_sql_table( +# table_name='corine_biotope', +# schema='ref_habitats', +# con = con_ps) +# df_rSitHab = df_habF \ +# .drop(columns=['id_site', 'code_hab']) + +# if not isin_bdd: +# df_rSitHab.to_sql( +# name='r_site_habitat', +# con=con_ps, +# schema='ps', +# if_exists='append', +# index=False, +# method='multi', +# # index_label='id' +# ) +# sql = ('SELECT * FROM {sch}.{tab} ' +# 'WHERE id_geom_site IN {lst} ORDER BY id').format(sch='ps', tab='r_site_habitat', lst=tuple(df_rSitHab.id_geom_site)) +# index_return = pd.read_sql( +# sql = sql, +# con = con_ps) \ +# .rename(columns={'id':'id_site_hab'}) +# df_habF = pd.merge(df_habF, index_return, on=['id_geom_site','n_hab','index'], how='left') +# df_rHabCb = df_habF[['id_site_hab', 'code_hab']] + +# if not isin_bdd: +# df_rHabCb.to_sql( +# name='r_hab_cb', +# con=con_ps, +# schema='ps', +# if_exists='append', +# index=False, +# method='multi', +# # index_label='id' +# ) + + + +# # parametre +# columns_select = ['id_site','statut','pratiques','niv_embro','recouvmnt'] +# df_Sparam = df[columns_select].copy() +# if 's_p_brouss' in df.columns: +# df_Sparam = df[columns_select + ['s_p_brouss','surf_emb']].copy() +# df_Sparam = pd.merge(df_Sparam, idgeom[['id_geom_site','id_site']],how='left', on='id_site') +# df_prm = get_param() +# df_stt = df_Sparam[['id_site','id_geom_site','statut']].copy() +# df_prt = df_Sparam[['id_site','id_geom_site','pratiques']].copy() +# df_rcv = df_Sparam[['id_site','id_geom_site','recouvmnt']].copy() +# df_brs = df_Sparam[['id_site','id_geom_site','niv_embro']].copy() +# if 's_p_brouss' in df_Sparam.columns: +# df_brs = df_Sparam[['id_site','id_geom_site','niv_embro','s_p_brouss']].copy() +# if not df_brs.empty: +# param = df_prm[df_prm.type_param=='embroussaillement'].copy() +# # if not (df_brs.s_p_brouss.unique() < 3).all(): +# # # Conversion des pourcentages en identifiant de fourchette +# # param[['min', 'max']] = param.desc.str.split(' à ',expand=True) +# # param.loc[param['min'].str.contains('<'),'max'] = param.loc[param['min'].str.contains('<'),'min'] +# # param.loc[param['min'].str.contains('<'),'min'] = '0' +# # param.loc[param['min'].str.contains('>'),'max'] = '100' +# # param[['min', 'max']] = param[['min', 'max']].replace(r'[\%\<\>]','',regex=True).astype(int) +# # df_brs['tx_brouss'] = df_brs['s_p_brouss'].copy() +# # for r,row in param.iterrows(): +# # df_brs.loc[(df_brs.tx_brouss > row['min']) & (df_brs.tx_brouss <= row['max']), 's_p_brouss'] = row.param +# df_brs.niv_embro = df_brs.niv_embro.replace(list(param.param.astype(int)), param.id.tolist()) +# df_brs.rename(columns={'niv_embro':'id_param','s_p_brouss':'taux'},inplace=True) +# if not df_rcv.empty: +# param = df_prm[df_prm.type_param=='recouvrement'] +# df_rcv.recouvmnt = df_rcv.recouvmnt.replace(list(param.param.astype(int)), param.id.tolist()) +# df_rcv.rename(columns={'recouvmnt':'id_param'},inplace=True) +# if not df_prt.empty: +# param = df_prm[df_prm.type_param=='pratique'] +# df_prt.pratiques = df_prt.pratiques.str.lower() +# df_prt.pratiques.replace(['paturage'],['pâturage'], regex=True, inplace=True) +# # dissociation des multi-pratiques +# d1 = df_prt.loc[df_prt.pratiques.str.contains('et'),].copy() +# d1[['p1', 'p2']] = d1['pratiques'].str.split(' et ', expand=True) +# d2 = df_prt.loc[df_prt.pratiques.str.contains('ou'),].copy() +# d2[['p1', 'p2']] = d2['pratiques'].str.split(' ou ', expand=True) +# d = pd.concat([d1,d2]) +# # Uniformisation des champs p1 et p2 avant ajout de "?" +# d.loc[(d.pratiques.str.contains('\?')),['p1','p2']] = d.loc[(d.pratiques.str.contains('\?')),['p1','p2']].replace(' \?', '', regex=True) +# # Ajout de "?" +# d.loc[(d.pratiques.str.contains('\?|ou')),['p1','p2']] = d.loc[(d.pratiques.str.contains('\?|ou')),['p1','p2']] + ' ?' +# dd = pd.concat([ +# d[['id_site','id_geom_site','p1']].rename(columns={'p1':'pratiques'}), +# d[['id_site','id_geom_site','p2']].rename(columns={'p2':'pratiques'}) +# ]).sort_index() +# df_prt= pd.concat([ +# df_prt.drop(index=dd.index.unique(),axis=0), +# dd +# ]) +# df_prt.loc[ +# df_prt.pratiques.str.contains('caprin|bovin|ovin|equin') & +# (~df_prt.pratiques.str.contains('pâturage')), ['pratiques'] +# ] = 'pâturage ' + df_prt.loc[ +# df_prt.pratiques.str.contains('caprin|bovin|ovin|equin') & +# (~df_prt.pratiques.str.contains('pâturage')), ['pratiques'] ] +# df_prt.pratiques = df_prt.pratiques.replace(list(param.param), param.id.tolist()) +# df_prt.rename(columns={'pratiques':'id_param'},inplace=True) +# if not df_stt.empty: +# param = df_prm[df_prm.type_param=='statut'] +# df_stt.statut = df_stt.statut.replace(list(param.param), param.id.tolist()) +# df_stt.rename(columns={'statut':'id_param'},inplace=True) + +# df_SparamF = pd.concat([df_stt,df_prt,df_rcv,df_brs]) +# if not isin_bdd: +# df_SparamF.drop(columns=['id_site']).to_sql( +# name='r_site_param', +# con=con_ps, +# schema='ps', +# if_exists='append', +# index=False, +# method='multi', +# # index_label='id' +# ) + + + + +# # Legende Carto ! +# df_leg = pd.DataFrame() +# if 'legende' in df.columns and 'leg_carto' in df.columns: +# df_leg = df[['id_site', 'leg_carto', 'legende']].copy() +# df_leg.legende = df_leg.legende.replace(r'[\d+\.\|\(\)]','',regex=True).str.strip() +# df_leg.legende = df_leg.legende.replace(' ',' ',regex=True) +# df_leg[~df_leg.leg_carto.eq(df_leg.legende)] +# elif 'leg_carto' in df.columns and 'legende' not in df.columns : +# df_leg = df[['id_site', 'leg_carto']].copy() +# elif 'legende' in df.columns and 'legende' not in df.columns : +# df_leg = df[['id_site', 'legende']].copy() +# if 'legende' in df.columns: +# df_leg.legende = df_leg.legende.replace(r'[\d+\.\|\(\)]','',regex=True).str.strip() +# df_leg.legende = df_leg.legende.replace(' ',' ',regex=True) + +# df_leg = pd.merge(df_leg, idgeom[['id_geom_site','id_site']],how='left', on='id_site') +# # ... +# # ... +# # ... +# if not isin_bdd: +# df_leg.drop(columns=['id_site']).to_sql( +# name='r_site_param', +# con=con_ps, +# schema='ps', +# if_exists='append', +# index=False, +# method='multi', +# # index_label='id' +# ) + + + + +# lst_tab = ['"PS_4MONTAGNESNE_CEN38_2014"','"PS_CHAMBARAN_CEN38_2013"','"PS_CHARTREUSE_CEN38_2010"','"PS_DRAC_CEN38_2014"', +# '"PS_BELLEDONNE_CEN38_2014"','"PS_BIEVRE_CEN38_2014"','"PS_BIEVRE_CEN38_2016"','"PS_SUD-GRENOBLOIS_CEN38_2009"', +# '"PS_VERCORS_CEN38_2011"','"cr_PS_CBNA_habitats_aggreg_06_2020"','"c_ps_inventaire_bievre_valloire"' +# ] +# df = pd.DataFrame() +# for tab in lst_tab: +# sql = "SELECT * FROM {sch}.{tab}".format(sch=schema,tab=tab) +# tmp = gpd.read_postgis( +# sql = sql, +# con = con +# ) +# df = pd.concat([df,tmp[['pratiques']] ]) + +# df.pratiques = df.pratiques.replace(['paturage', 'Paturage', 'Paturâge'],['pâturage', 'Pâturage', 'Pâturage'], regex=True) +# df.pratiques = df.pratiques.str[0].str.upper() + df.pratiques.str[1:] +# df.drop_duplicates(inplace=True) +# df.dropna(inplace=True) +# df.to_excel('~/Documents/tmp/pratiques_pelouses_seches.xlsx', index=False) + + + +# ###################################### +# ###### UPDATE EMBROUSSAILLEMENT ###### +# ###################################### +# from pycen import update_to_sql +# brs = df_brs.copy() +# sql = ('SELECT id,id_geom_site FROM {sch}.{tab} ' +# 'WHERE id_geom_site IN {gsit} ' +# 'AND id_param IN {param};').format( +# sch='ps', tab='r_site_param', +# gsit=tuple(df_brs.id_geom_site.unique()), +# param=tuple(df_brs.id_param.unique()) ) +# data = pd.read_sql(sql=sql, con=con_ps) + +# brs = pd.merge(brs,data, on='id_geom_site').drop(columns=['id_site']) + + +# update_to_sql(brs,con_ps,table_name='r_site_param',schema_name='ps',key_name=['id','id_geom_site']) + + + + + + +# # sql = 'SELECT *, ST_ClusterDBSCAN(geom::geometry, eps := 1000, minpoints := 1) over () AS cluster_id FROM pelouse_seche.c_ps_inventaire_agreg' +# # df = gpd.read_postgis(sql,con) +# # df.sort_values('cluster_id', inplace=True) +# # # df2 = gpd.GeoDataFrame({'geom':[]},geometry='geom', crs='EPSG:2154') +# # df2 = gpd.GeoSeries() +# # for i in df.cluster_id.unique().tolist(): +# # print(i) +# # tmp = gpd.GeoSeries(df.loc[df.cluster_id == i, 'geom'].unary_union) +# # df2 = df2.append(tmp) + + +# # df2 = gpd.GeoDataFrame({'geom':df2},geometry='geom', crs='EPSG:2154') +# # df3 = df2.copy() +# # df3 = df3.buffer(500) +# # df3.to_file('/home/colas/Documents/tmp/cluster_500.shp') +# # df2.to_file('/home/colas/Documents/tmp/cluster_withoutbuffer.shp') + + + diff --git a/3_AZALEE/refHydro_to_db.py b/3_AZALEE/refHydro_to_db.py new file mode 100644 index 0000000..8d4abce --- /dev/null +++ b/3_AZALEE/refHydro_to_db.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from pycen.tools import Polygons_to_MultiPolygon +from pycen import con +import geopandas as gpd +from sqlalchemy import create_engine, text +from geoalchemy2 import Geometry + +user_cad = 'cgeier' # utilisateur de connexion à la bdd +pwd_cad = 'adm1n*bdCen' # mot de passe de la bdd +adr_cad = '91.134.194.221' # adresse ip de la bdd +port_cad = '5432' # port de la bdd +base_cad = 'rhomeo' # nom de la bdd +schema_cad = 'bdtopo3' +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False) + + + +file = '/media/colas/Disk2/5_BDD/RPG/1_DONNEES_LIVRAISON_2021/RPG_2-0_SHP_LAMB93_R84_2021-01-01/ILOTS_ANONYMES.shp' +dic = { + 'CD_SSBV': 'cdssbv', + 'LIB_SSBV': 'nom', + 'CD_COMGEO': 'cd_comgeo', + 'CD_CTB': 'cd_ctb', + 'CD_': 'cd_', + 'CD_SUBUNIT': 'cd_subunit', + 'geometry': 'geom', +} + +name_table = 'ilots_anonymes_2021' +name_schema = 'rpgnew' +# Parametres bdd OUT +# user_zh = 'postgres' +# pwd_zh = 'tutu' +# adr_zh = '192.168.60.10' +# base_zh = 'bd_cen' +# con_zh = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user_zh,pwd_zh,adr_zh,base_zh), echo=False) + + +select_cols = list(dic.values()) + +df = gpd.read_file(file) +# df = df.set_geometry('geom') +df.rename_geometry('geom', inplace=True) +df.columns = df.columns.str.lower() + +if not df.crs: + df.set_crs(epsg=2154, inplace=True) + +if df.crs.srs != 'epsg:2154': + df.to_crs(epsg=2154, inplace=True) + +# df.rename(columns=dic, inplace=True) +# df = df[select_cols] + + +if 'id' not in df.columns: + df.index.name = 'id' + df.reset_index(drop=False, inplace=True) + + +if 'Polygon' in df.geom_type.unique() and 'MultiPolygon' in df.geom_type.unique(): + df = Polygons_to_MultiPolygon(df) + # from shapely.geometry.multipolygon import MultiPolygon + # tmp = df.loc[df.geom_type == 'Polygon'].copy() + # geom = [MultiPolygon([x]) for x in tmp.loc[tmp.geom_type == 'Polygon','geom']] + # tmp = tmp.set_geometry(geom) + # df = gpd.pd.concat([df.drop(tmp.index), tmp]).sort_values('id').reset_index(drop=True) + +if 'LineString' in df.geom_type.unique() and 'MultiLineString' in df.geom_type.unique(): + from shapely.geometry.multilinestring import MultiLineString + tmp = df.loc[df.geom_type == 'LineString'].copy() + geom = [MultiLineString([x]) for x in tmp.loc[tmp.geom_type == 'LineString','geom']] + tmp = tmp.set_geometry(geom) + df = gpd.pd.concat([df.drop(tmp.index), tmp]).sort_values('id').reset_index(drop=True) + +con.begin() +df.to_postgis( + name = name_table, + con = con, + schema = name_schema, + if_exists = 'replace', + chunksize = 100000, + dtype={'geom':Geometry(srid=2154)} + ) + +sql = """ALTER TABLE {sch}.{tab} OWNER TO cen_admin; +GRANT ALL ON TABLE {sch}.{tab} TO cen_admin; +GRANT ALL ON TABLE {sch}.{tab} TO grp_admin; +--GRANT SELECT ON TABLE {sch}.{tab} TO grp_consult; +""".format(sch=name_schema,tab=name_table) +with con.begin() as cnx: + cnx.execute(sql) + +sql = """ +REVOKE SELECT ON TABLE {sch}.{tab} FROM grp_consult; +""".format(sch=name_schema,tab=name_table) +with con.begin() as cnx: + cnx.execute(sql) + +if 'rpg' in name_table: + + v_tab = 'v_'+name_table.replace('reg','')+'isere' + lst_col = ','.join(tuple('s.'+df.columns)) + + sql = """CREATE OR REPLACE VIEW {sch}.{v_tab} + AS SELECT {cols} + FROM {sch}.{tab} s, + ref_territoire.dept_isere reg + WHERE st_intersects(s.geom, reg.geom); + + ALTER TABLE {sch}.{v_tab} OWNER TO cen_admin; + GRANT ALL ON TABLE {sch}.{v_tab} TO cen_admin; + GRANT ALL ON TABLE {sch}.{v_tab} TO grp_admin; + GRANT SELECT ON TABLE {sch}.{v_tab} TO grp_consult;""".format(sch=name_schema,tab=name_table,v_tab=v_tab,cols=lst_col) + with con.begin() as cnx: + cnx.execute(sql) diff --git a/3_AZALEE/tmp/correct&maj_idsite_ps.py b/3_AZALEE/tmp/correct&maj_idsite_ps.py new file mode 100644 index 0000000..be20329 --- /dev/null +++ b/3_AZALEE/tmp/correct&maj_idsite_ps.py @@ -0,0 +1,116 @@ +import pycen +import geopandas as gpd + +dstct = pycen.ref.get_districtNat() +ps = pycen.ps.get_sitesGeom(statut='all') + +ps['org_bd'] = ps.id_site.str[2:6] +ps['num'] = ps.id_site.str[6:] + + +df = gpd.sjoin(ps,dstct[['abrev','geom']]).drop(columns=['index_right']) +df.sort_values('id_site',inplace=True) + +tmp = pycen.tools.calc_recouvrmt(df1=df[df.org != df.abrev], df2=dstct) + + +from geopandas import sjoin +tmp = sjoin( + df1, + df2[['geom']], + op = 'intersects', + how = 'left') +tmp.dropna(subset=['index_right'],inplace=True) +tmp.index_right = tmp.index_right.astype(int) +tmp.reset_index(inplace=True) +tmp = tmp.join( + df2[['geom']].rename(columns={'geom': 'right_geom'}), + on=['index_right'], how='left') +tmp2 = tmp[['index_right','right_geom']].copy() \ + .rename(columns={'right_geom': 'geom'}) \ + .set_geometry('geom') +tmp1 = tmp[['id_site','geom']].copy() \ + .set_geometry('geom') + +if not tmp1.geom.values.is_valid.all(): + tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) +if not tmp2.geom.values.is_valid.all(): + tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) + +tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 +tmp = tmp.groupby(['id_site']).sum().reset_index() +df1 = df1.merge(tmp[['id_site','perc_rcvmt']], on=['id_site'], how='left') +df1.perc_rcvmt.fillna(0, inplace=True) +df1.perc_rcvmt = df1.perc_rcvmt.round(2) + +tmp1['area_rcvmt'] = [gpd.overlay(tmp1.iloc[[i]],tmp2.iloc[[i]]).area.round()[0] for i in range(0,tmp1.shape[0]) ] + + + +# Maj origine district +df = ps[ps.org_bd!=ps.org].sort_values('id_site').copy() +pycen.update_to_sql( + df[['id_site','org']].rename(columns={'id_site':'id'}).drop_duplicates(), + pycen.con,'sites','sites','id' +) + + +# Redéfinition des numéro de sites +ps_sit = pycen.ps.get_sitesInfos(statut='all') +ps_sit.loc[ps_sit.id.isin(df.id_site),'num'] = None +# attribution de numéro non utilisé +for o in ps_sit.org.unique(): + tmp_sit = ps_sit.loc[ps_sit.org==o,'num'].dropna().astype(int) + min = tmp_sit.min() + max = tmp_sit.max() + lst = [str(i) for i in range(min,max) if i not in [*tmp_sit]] + tt = (ps_sit.org==o)&(ps_sit.num.isna()) + tmp_sitna = ps_sit[tt] + if tmp_sitna.shape[0] <= len(lst): + ps_sit.loc[tt,'num'] = lst[:tmp_sitna.shape[0]] + else: + ps_sit.loc[(ps_sit.index.isin(tt[tt].index[:len(lst)])),'num'] = lst + + tt2 = (ps_sit.org==o)&(ps_sit.num.isna()) + min2 = max + 1 + max2 = max + 1 + ps_sit[tt2].shape[0] + lst2 = [str(i) for i in range(min2,max2)] + ps_sit.loc[tt2,'num'] = lst2 + +df2 = ps_sit.loc[ps_sit.id.isin(df.id_site)] +df2.loc[df2.num.str.len() == 1,'num'] = '000'+ df2.loc[df2.num.str.len() == 1,'num'] +df2.loc[df2.num.str.len() == 2,'num'] = '00' + df2.loc[df2.num.str.len() == 2,'num'] +df2.loc[df2.num.str.len() == 3,'num'] = '0' + df2.loc[df2.num.str.len() == 3,'num'] + +pycen.update_to_sql( + df2[['id','num']].drop_duplicates(), + pycen.con,'sites','sites','id' +) + + +# Redéfinition des écritures +ps_sitnew = pycen.ps.get_sitesInfos(id_site=[*df.id_site],statut='all') +ps_sitnew = ps_sitnew[[ + 'id','dept','org','num', +]] +ps_sitnew['idnew'] = ps_sitnew.dept+ps_sitnew.org+ps_sitnew.num +# incrémentation de l'historique +ps_sitnew[['id','idnew']]\ + .rename(columns={ + 'id':'id_site_old', + 'idnew':'id_site_new' + }).to_sql( + 'hist_id_site', + pycen.con,'sites',if_exists='append',index=False +) +# modification des noms +pycen.update_to_sql( + ps_sitnew[['index','dept','org','num']].rename(columns={'index':'id'}).drop_duplicates(), + pycen.con,'sites','sites',['dept','org','num'] +) +pycen.update_to_sql( + ps_sitnew[['idnew','dept','org','num']].rename(columns={'idnew':'id'}).drop_duplicates(), + pycen.con,'sites','sites',['dept','org','num'] +) + + diff --git a/3_AZALEE/tmp/maj_tmpv.date_geom b/3_AZALEE/tmp/maj_tmpv.date_geom new file mode 100644 index 0000000..e69de29 diff --git a/3_AZALEE/tmp/manip_invPS_CDIsère2016.py b/3_AZALEE/tmp/manip_invPS_CDIsère2016.py new file mode 100644 index 0000000..3a0b740 --- /dev/null +++ b/3_AZALEE/tmp/manip_invPS_CDIsère2016.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import geopandas as gpd +from pycen import con + +path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/' +file = 'CDisère_PS2016_4Montagnes.shp' +dic = { + 'identifian':'ident', + 'habitat1' : 'code_hab1', + 'partpoly1': 'n_hab1', + 'habitat2' : 'code_hab2', + 'partpoly2': 'n_hab2', + 'habitat3' : 'code_hab3', + 'partpoly3': 'n_hab3', + 'habitat4' : 'code_hab4', + 'partpoly4': 'n_hab4', + 'légende':'legende', + 'embrou_cod':'niv_embrous', + 'embrous':'%_embrous', + 'sol nu_cod':'recouvmnt', + 'sol nu':'%_recouvmnt', + 'orchidées':'rmq_phyto', + 'alliance':'rmq_phytosocio', + 'maec':'rmq_contract_agri', +} +dic_auth = { + 'Anne-Lyse Murro':'MURRO Anne-Lyse', + 'Lucas Berardi' : 'BERARDI Lucas' +} +dic_prat = { + 'Mixte' : 'Fauche, Pâturage', + 'Adandon':'Abandon', + 'P�turage':'Pâturage' +} +dic_stat = { + 'Aucun' : 'Non communautaire', + 'Prioriataire' : 'Prioritaire' +} +dic_date = { + 'mai 2016' : '2016-05-01', + 'juin 2016' : '2016-06-01', + 'juillet 2016' : '2016-07-01', + 'aout 2016' : '2016-08-01', + 'août 2016' : '2016-08-01', +} +keep_cols = ['auteur','date',*dic.values(),'rosette','pratique','statut','remarques','geom'] + +def emb_correct(df): + l1 = ['GIR3','AU7'] + l2 = ['BOI1','VIL10','VIL8a'] + l3 = ['MEA4a','ME'] + l4 = ['MO3','MA3','VIL14','BD1','VF','CHA2','CHA5','ROM3'] + l5 = ['BEL1','JA5','AU4'] + df.loc[df.index.isin(l1),'niv_embrous'] = 1 + df.loc[df.index.isin(l2),'%_embrous'] = 4 + df.loc[df.index.isin(l3),'%_embrous'] = 29 + df.loc[df.index.isin(l4),'%_embrous'] = 30 + df.loc[df.index.isin(l5),'%_embrous'] = 60 + return df + +def sol_correct(df): + # df.loc[df.index=='GEY1','%_recouvmnt'] = 19 + df.loc[df.index=='CHA5','%_recouvmnt'] = 21 + # df.loc[df.index=='GEY2','%_recouvmnt'] = 19 + # df.loc[df.index=='SN1','%_recouvmnt'] = 19 + df.loc[df.index=='AU1','%_recouvmnt'] = 21 + df.loc[(df.recouvmnt==1)&(df['%_recouvmnt']>0),'%_recouvmnt'] = 0 + df.loc[(df.recouvmnt==2)&(df['%_recouvmnt']>10),'%_recouvmnt'] = 10 + df.loc[(df.recouvmnt==3)&(df['%_recouvmnt']>20),'%_recouvmnt'] = 20 + df.loc[(df.recouvmnt==4)&(df['%_recouvmnt']<21),'%_recouvmnt'] = 21 + return df + + +# récupération des données +sql = 'SELECT site_code,geom FROM ps.v_pelouseseches' +v_ps = gpd.read_postgis(sql, con) +df0 = gpd.read_file(path0 + file) +df0.columns = df0.columns.str.lower() +df0.rename(columns=dic, inplace=True) +df0.rename_geometry('geom', inplace=True) +df = df0[keep_cols].copy() +df.set_index('ident', inplace=True) +df.pratique.replace(dic_prat,inplace=True) +df.statut.replace(dic_stat,inplace=True) +df.auteur.replace(dic_auth,inplace=True,regex=True) +df.date = df.date.str.lower() +df.date.replace(dic_date,inplace=True) +df.date = gpd.pd.to_datetime(df.date) +df = emb_correct(df) +df = sol_correct(df) + +# Correction code Corine Biotope +df.loc[df.code_hab2=='41D','code_hab2'] = '41.D' + +# Concaténation des remarques +df.loc[~df.rmq_phyto.isna(),'remarques'] = df.loc[~df.rmq_phyto.isna(),'remarques'] +\ + '; rmq_phyto : ' + df.loc[~df.rmq_phyto.isna(),'rmq_phyto'] +df.loc[~df.rosette.isna(),'remarques'] = df.loc[~df.rosette.isna(),'remarques'] +\ + '; rosette : ' + df.loc[~df.rosette.isna(),'rosette'] +df.loc[~df.rmq_phytosocio.isna(),'remarques'] = df.loc[~df.rmq_phytosocio.isna(),'remarques'] +\ + '; rmq_phytosocio : ' + df.loc[~df.rmq_phytosocio.isna(),'rmq_phytosocio'] +df.loc[~df.rmq_contract_agri.isna(),'remarques'] = df.loc[~df.rmq_contract_agri.isna(),'remarques'] +\ + '; rmq_contract_agri : ' + df.loc[~df.rmq_contract_agri.isna(),'rmq_contract_agri'] +del df['rmq_phyto'] +del df['rosette'] +del df['rmq_phytosocio'] +del df['rmq_contract_agri'] + + +df.reset_index(drop=False,inplace=True) +df.rename(columns={'ident':'id_origine'}, inplace=True) +df['table_org'] = file +df['structure'] = 'CD Isère' + +# END SCRIPT # + + +## tri des géométries avant insertion +# Identification des superpositions new_site/old_site +df_inters = gpd.sjoin(df,v_ps, how='left') +del df_inters['index_right'] +lst_old_site = df_inters.site_code.unique() +v_ps = v_ps[v_ps.site_code.isin(lst_old_site)].copy() +# Calcul du recouvrement +df.reset_index(drop=False,inplace=True) +v_ps.loc[:,'surf'] = v_ps.area +tmp = gpd.overlay(v_ps,df[['ident','geom']],how='intersection',make_valid=True) +tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf'] +tmp_sitemaj = tmp[(tmp.perc_rcvmt > 10)&(~tmp.ident.duplicated(keep=False))].ident +tmp_sitenew = tmp[tmp.perc_rcvmt <= 10].ident.unique() + + +site_newvsold = { + 'old' : [*tmp.site_code.unique()], + 'new' : [*tmp.ident.unique()], +} + +df_new = df_inters[df_inters.site_code.isna()] +df_inters.drop(df_new.index,inplace=True) +df_maj = df_inters[~df_inters.site_code.duplicated(keep=False)] +df_inters.drop(df_maj.index,inplace=True) +df_rep = df_inters.copy() + +df_new = gpd.pd.concat([df_new,df_rep],axis=0) diff --git a/3_AZALEE/tmp/manip_invPS_PS_AGGREGATION_NB_AG_2022.py b/3_AZALEE/tmp/manip_invPS_PS_AGGREGATION_NB_AG_2022.py new file mode 100644 index 0000000..74aee14 --- /dev/null +++ b/3_AZALEE/tmp/manip_invPS_PS_AGGREGATION_NB_AG_2022.py @@ -0,0 +1,65 @@ +import geopandas as gpd +import os +import pycen +import numpy as np + +path = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT' +file = 'PS_AGGREGATION_NB_AG.shp' + + +def split_auth(col): + return col.str.split(r' \(',expand=True).replace('\)','',regex=True) + +def format_auth(col): + auth = pycen.pers.get_auteur() + for c in col.unique(): + tmp = c.lower().split(' ') + cd1 = (np.array([tmp[0] == n for n in auth.nom.str.lower()]) | + np.array([tmp[0] == n for n in auth.prenom.str.lower()])) + cd2 = (np.array([tmp[1] == n for n in auth.nom.str.lower()]) | + np.array([tmp[1] == n for n in auth.prenom.str.lower()])) + r = auth[cd1&cd2].nom_prenom.values[0] + col.replace(c,r,inplace=True) + return col + + +if __name__ == "__main__": + + df = gpd.read_file(os.path.join(path,file)) + df.columns = df.columns.str.lower() + df.dropna(axis=1,how='all', inplace=True) + for c in df.columns: + if df[c].dtype == object: + df[c] = df[c].str.strip() + if 'date' in c : + df[c] = gpd.pd.to_datetime(df[c]) + + n_hab = df.columns[df.columns.str.startswith('n_hab')] + df[df[n_hab].sum(axis=1) != 100] + + # Auteurs + df[['auteur_split','structure']] = split_auth(df.auteur) + df.auteur_split = format_auth(df.auteur_split) + del df['auteur'] + df.rename(columns={'auteur_split':'auteur'},inplace=True) + + # Pratiques + df.pratiques.replace([r' \/ '],r', ',regex=True,inplace=True) + df.pratiques.replace({ + 'paturage broyage':'pâturage, broyage', + 'abandon paturage':'abandon, pâturage', + 'paturage':'pâturage' + }, inplace=True) + df.type_patur.replace({'N.D':None + }, inplace=True) + cd3 = (df.pratiques=='pâturage')&(~df.type_patur.isna()) + df.loc[cd3,'pratiques'] = df.loc[cd3,'pratiques'] +' '+ df.loc[cd3,'type_patur'] + df.loc[cd3,'type_patur'] = None + cd4 = (~df.pratiques.isin(['pâturage','pâturage ?']))&(df.pratiques.str.contains('pâturage'))&(~df.type_patur.isna()) + df.loc[cd4,'pratiques'] = df.loc[cd4,'pratiques'].str.split(', ',expand=True)[0] + ' ' + df.loc[cd4,'type_patur'] + ', ' + df.loc[cd4,'pratiques'].str.split(', ',expand=True)[1] + df.loc[cd4,'type_patur'] = None + + df.pratiques.replace({ + 'pâturage caprin - ovin':'pâturage caprin, pâturage ovin' + }, inplace=True) + diff --git a/3_AZALEE/tmp/manip_invPS_platière.py b/3_AZALEE/tmp/manip_invPS_platière.py new file mode 100644 index 0000000..dea724c --- /dev/null +++ b/3_AZALEE/tmp/manip_invPS_platière.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + + +import pandas as pd +import geopandas as gpd +import pycen + + + +path0 = '/home/colas/Documents/9_PROJETS/2_PS/TO IMPORT/' +path = 'cartohabitatsplatiere/' +file = 'Habitats I33 2008.gpkg' +df = gpd.read_file(path0+path+file) +df.rename_geometry('geom', inplace=True) + +if file == 'Habitats I33 2008.gpkg' : + df.columns = df.columns.str.lower() + df.rename(columns={'date_maj':'date'}, inplace=True) + df['remarques'] = \ + 'détails : ' + df['type_détaillé'] + ';' +\ + ' regroupement : ' + df['type_regroupé'] + ';'\ + ' code_n2000 : ' + df['code_n2000'] + ';'\ + ' age : ' + df['age'] + ';' + del df['id'] + del df['code_enjeu'] + del df['enjeu_numérique'] + del df['complexe_hab'] + del df['echelle'] + del df['entretien'] + del df['type_détaillé'] + del df['type_regroupé'] + del df['surface'] + del df['num_site'] + del df['code_n2000'] + del df['age'] + # df.set_index('num_site',inplace=True) + df['auteur'] = 'PONT Bernard' + df['structure'] = "AAIP" + + lst_hab = ['34.1','34.3','34.4','34.7','35.2','62.3','64.1'] + df = df[df.code_cb.str.contains('|'.join(lst_hab))] + + lst_hab2 = ['41','42','43','44','81','82','83'] + df = df[~df.code_cb.str.contains('|'.join(lst_hab2))] + + df.replace({'IC':'Communautaire'}, inplace=True) + + +df.index.name = 'ident' +df.reset_index(inplace=True, drop=False) +df.rename(columns={'code_cb':'code_hab1'}, inplace=True) +df['n_hab1'] = 100 + +ps = pycen.ps.get_sitesGeom() +# nv = gpd.read_file(path0+'PS_nature_vivante_2014.shp') + + +# nv.geometry = nv.buffer(50) +intersect = gpd.sjoin(df,ps,op='intersects')['ident'].tolist() +drop_index = df[df['ident'].isin(intersect)].index +df.drop(drop_index,inplace=True) +df.reset_index(inplace=True, drop=True) +# df.to_file(path0+path+'Habitats I33 2008 (manquants).gpkg', driver='GPKG') +# df = gpd.read_file(path0+path+'Habitats I33 2008 (manquants).gpkg') \ No newline at end of file diff --git a/3_AZALEE/tmp/test.py b/3_AZALEE/tmp/test.py new file mode 100644 index 0000000..45e4bea --- /dev/null +++ b/3_AZALEE/tmp/test.py @@ -0,0 +1,14 @@ +from pathlib import Path +from marshmallow import ( + Schema, + fields, + validates_schema, + ValidationError, + post_load, +) + +BACKEND_DIR = Path(__file__).absolute().parent.parent.parent +ROOT_DIR = BACKEND_DIR.parent +CUSTOM_STATIC_FOLDER = fields.String(load_default=ROOT_DIR / "1_Packages") +print(CUSTOM_STATIC_FOLDER) + diff --git a/3_AZALEE/tmp/zh_plu_metro.py b/3_AZALEE/tmp/zh_plu_metro.py new file mode 100644 index 0000000..25a6526 --- /dev/null +++ b/3_AZALEE/tmp/zh_plu_metro.py @@ -0,0 +1,1025 @@ +import geopandas as gpd +from pycen import zh +from os import path + +zh = zh() + +def ident_newsite(df,view=None,rcvmt=10): + ''' + Identification des nouvelles géometries ou des mises + à jours des géométries par recouvrement. + MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0) + ''' + maj = [] + # Récupération de la couche pelouses_seches en bdd. + if view is None: + view = zh.v_zoneshumides() + # Identification des superpositions new_site/old_site + df_inters = gpd.sjoin(df,view, how='left') + del df_inters['index_right'] + news1 = df_inters[df_inters.site_code.isna()].CODE_SITE + lst_old_site = df_inters.site_code.unique() + view = view[view.site_code.isin(lst_old_site)].copy() + view.loc[:,'surf'] = view.area + # Explosion des MULTIPOLYGONS + view2 = view.explode(index_parts=True) + view2['surf'] = view2.area + + + tmp = gpd.overlay(view,df[['CODE_SITE','geom']],how='intersection',make_valid=True,keep_geom_type=False) + if isinstance(tmp, gpd.GeoDataFrame) and tmp.geometry.name !='geom': + tmp.rename_geometry('geom',inplace=True) + tmp['perc_rcvmt'] = 100*tmp.area/tmp['surf'] + + # Recouvrement avec explosion + tmp2 = gpd.overlay(view2,df[['CODE_SITE','geom']].explode(index_parts=True),how='intersection',make_valid=True,keep_geom_type=False) + if isinstance(tmp2, gpd.GeoDataFrame) and tmp2.geometry.name !='geom': + tmp2.rename_geometry('geom',inplace=True) + tmp2['perc_rcvmt'] = 100*tmp2.area/tmp2['surf'] + # Identification des sites : MAJ (1:1) / Remplacement (1:n) / Nouveau (1:0) + # Limite recouvrement = 10% + # rcvmt_inf = tmp.perc_rcvmt < rcvmt + # rcvmt_sup = tmp.perc_rcvmt > rcvmt + # tmp.loc[rcvmt_inf&(~code_dupl)&(~site_dupl)] + + tmpp = tmp2[tmp2.perc_rcvmt > rcvmt].copy() + code_dupl = tmpp.CODE_SITE.duplicated(keep=False) + site_dupl = tmpp.site_code.duplicated(keep=False) + site_maj = tmpp[(~code_dupl) & (~site_dupl)].CODE_SITE.unique() + # site_cor = tmpp.loc[(code_dupl) | (site_dupl),['site_code','CODE_SITE']] + + + maj2 = df[df.CODE_SITE.isin(site_maj)].merge( + tmpp[['site_code','CODE_SITE','perc_rcvmt']], on=['CODE_SITE']) + maj2['id_site'] = maj2['site_code'] + + del maj2['site_code'] + if maj : df_maj = gpd.pd.concat([maj,maj2]) + else : df_maj = maj2 + # Isolement des correspondance new_site / old_site_toClose + orig_maj_all = df_maj.CODE_SITE + id_maj_all = df_maj.id_site + df_cor = tmpp.loc[ + (~tmpp.site_code.isin(id_maj_all))&(~tmpp.CODE_SITE.isin(orig_maj_all)), + ['site_code','CODE_SITE'] + ] + df_cor.rename(columns={'site_code':'id_site_old'}, inplace=True) + df_cor.sort_values('id_site_old',inplace=True) + df_cor.drop_duplicates(inplace=True) + COR = df_cor.copy() + df_cor = {} + df_cor['cor'] = COR + df_cor['df'] = df[df.CODE_SITE.isin(COR.CODE_SITE)].sort_values('CODE_SITE').copy() + # Isolement des nouveaux sites + df_new = df[(~df.CODE_SITE.isin(orig_maj_all))&(~df.CODE_SITE.isin(df_cor['cor'].CODE_SITE))].sort_values('CODE_SITE').copy() + + return df_new, df_cor, df_maj.sort_values('CODE_SITE') + + + +def format_delim(df): + dict_delim = { + "présence d'une végétation hygrophile": "présence ou absence d'une végétation hygrophile", + "présence de sols hydromorphes": "présence ou absence de sols hydromorphes", + } + dlm = (df[['CRITERES DELIMITATION']] + .droplevel(1,axis=1)['CRITERES DELIMITATION'] + .str.split('\n') + .explode() + .replace('. - ', '', regex=True) + .str.strip() + .str.lower() + .replace(dict_delim) + .to_frame('crit_delim') + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True)) + return dlm + +def format_usg(df): + USG = (df[['USAGES/PROCESSUS NATURELS']] + .droplevel(1,axis=1)['USAGES/PROCESSUS NATURELS'] + .str.split('\n') + .explode() + .str.strip()) + usg = (USG + .replace('-.*', '', regex=True) + .str.strip() + .astype(int) + .to_frame('crit_usg') + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True) + .merge( + (USG + .to_frame('activ_hum_autre') + .loc[USG.str.contains('Autre')] + .replace('.*-.*.: ', '', regex=True) + ), left_index=True, right_index=True, how='left') + .merge( + (USG + .replace({ + '.*-': '', + ' ': ' '}, regex=True) + .str.strip() + .to_frame('remarques') + .loc[USG.str.contains('Remb|Pât')] + ), left_index=True, right_index=True, how='left') + ) + return usg + +def format_reghydro(df): + + hydro = df['REGIME HYDRIQUE'] + hydro_in = hydro[["Entrée d'eau "]].rename(columns={"Entrée d'eau ":'reg_hydro'}) + hydro_out = hydro[["Sortie d'eau"]].rename(columns={"Sortie d'eau":'reg_hydro'}) + hydro_in['in_out'] = True + hydro_out['in_out'] = False + return (gpd.pd.concat([hydro_in,hydro_out]) + .sort_index() + .set_index('in_out',append=True)['reg_hydro'] + .str.split('\n') + .explode() + .str.strip() + .replace({ + '.*- ': '', + ' / ': '/', + 'Ev':'Év' + }, regex=True) + .replace({ + 'Nappe':'Nappes', + 'Source':'Sources', + 'Ruissellement':'Ruissellement diffus', + }) + .to_frame('reg_hydro') + .reset_index(-1) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True) + ) + +def format_subm(df): + hydro = df['REGIME HYDRIQUE'] + return (hydro[['Fréquence submersion','Etendue submersion']] + .replace({ + 'Fréquence submersion':{ + 'Inconnue':'Inconnu', + 'Régulière':'Régulièrement submergé', + 'Toujours':'Toujours submergé', + 'Saisonnière':'Régulièrement submergé', + '.*- ': '', + }, + 'Etendue submersion':{ + 'Inconnue':'Inconnu', + 'Partielle':'Partiellement submergé', + 'Totale':'Totalement submergé', + 'Saisonnière':'Inconnu', + '.*- ': '', + } + },regex=True) + .rename(columns={ + 'Fréquence submersion':'sub_freq', + 'Etendue submersion':'sub_etend', + }) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True)) + +def format_cnx(df): + return (df[['CONNEXION ZH ENVIRONNEMENT']] + .droplevel(1,axis=1) + .rename(columns={'CONNEXION ZH ENVIRONNEMENT':'id_param_connect'}) + .astype(int) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True)) + + +def format_fctBio(df): + dict_fct = { + 'Etapes migratoires':'étapes migratoires, zones de stationnement, dortoirs', + 'Zone alimentation faune':"zone particulière d'alimentation pour la faune", + 'Zone reproduction faune':'zone particulière liée à la reproduction', + 'Connexions biologiques':'connexions biologiques', + } + autre_fct = (df[['AUTRE INTERET']] + .droplevel(1,axis=1) + .dropna() + .rename(columns={'AUTRE INTERET':'description'})) + autre_fct['fonction'] = "autre interet fonctionnel d'ordre ecologique" + fct_bio = (df['HABITATS POPULATIONS ANIMALES OU VEGETALES'] + .stack() + .rename_axis(['index','fonction']) + .str.strip() + .replace('x',None) + .to_frame('description') + .reset_index(-1) + .replace(dict_fct) + ) + autre_fct['typ_fonc'] = 'fct_bio' + fct_bio['typ_fonc'] = 'fct_bio' + + return (gpd.pd.concat([autre_fct,fct_bio]) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True) + .sort_index()) + +def format_fctHydro(df): + dict_fct = { + 'Expansion des crues':'expansion naturelle des crues', + "Soutien d'étiage":"soutien naturel d'étiage", + 'Ralentissement du ruissellement':'ralentissement du ruissellement', + 'Epuration des eaux':"fonctions d'épuration", + "Protection contre l'érosion":"rôle naturel de protection contre l'érosion", + } + fct_hyd = (df[['REGULATION HYDRAULIQUE','PROTECTION MILIEU PHYSIQUE']] + .droplevel(0,axis=1) + .stack() + .rename_axis(['index','fonction']) + .str.strip() + .replace('x',None) + .to_frame('description') + .reset_index(-1) + .replace(dict_fct) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True) + ) + fct_hyd['typ_fonc'] = 'fct_hydro' + + return fct_hyd + +def format_valSocioEco(df): + dict_fct = { + 'RESERVOIR AEP':"réservoir pour l'alimentation en eau potable", + 'PRODUCTION BIOLOGIQUE':'production biologique', + 'PROD. MATIERE PREMIERE':'production de matière première', + 'VALORISATION PEDAGOGIQUE':'intérêt pour la valorisation pédagogique / éducation', + 'INTERET PAYSAGER':'intérêt paysager', + 'LOISIRS / VALEURS RECREATIVES':'intérêt pour les loisirs/valeurs récréatives', + 'VALEURS SCIENTIFIQUES':'valeur scientifique', + 'VALEURS CULTURELLES':'valeur culturelle', + 'NUISANCES':'nuisances sur les conditions de vie des populations humaines résidentes', + } + + valSocioEco = (df[['RESERVOIR AEP','PRODUCTION BIOLOGIQUE', + 'PROD. MATIERE PREMIERE','VALORISATION PEDAGOGIQUE','INTERET PAYSAGER','LOISIRS / VALEURS RECREATIVES', + 'VALEURS SCIENTIFIQUES','VALEURS CULTURELLES','NUISANCES']] + .droplevel(1,axis=1) + .stack() + .rename_axis(['index','fonction']) + .replace('x',None) + .to_frame('description') + .reset_index(-1) + .replace(dict_fct) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True) + ) + valSocioEco['typ_fonc'] = 'val_socioEco' + return valSocioEco + +def format_patrim(df): + dict_fct = { + 'Inver-tébrés':'invertébrés', + 'Insectes':'insectes', + 'Poissons':'poissons', + 'Amphi-biens':'amphibiens', + 'Reptiles':'reptiles', + 'Oiseaux':'oiseaux', + 'Mammi-fères':'mammifères', + 'Flore vasculaire':'floristiques', + 'Algues':'algues', + 'Champi-gnons':'champignons', + 'Lichens':'lichens', + 'Bryo-phytes':'bryophytes', + 'Ptérido-phytes':'ptéridophytes', + 'Phané-rophytes':'phanérogames', + } + patrim = (data[['FAUNISTIQUES','FLORISTIQUES']] + .droplevel(0,axis=1) + .stack() + .rename_axis(['index','fonction']) + .replace('x',None) + .to_frame('description') + .reset_index(-1) + .replace(dict_fct) + .merge((df[['CODE_SITE']] + .droplevel(1,axis=1)), left_index=True, right_index=True) + ) + patrim['typ_fonc'] = 'int_patri' + return patrim + +def format_fct(df): + fct_bio = format_fctBio(df) + fct_hydro = format_fctHydro(df) + valSocioEco = format_valSocioEco(df) + patrim = format_patrim(df) + return (gpd.pd.concat([fct_bio,fct_hydro,valSocioEco,patrim]) + .sort_values('CODE_SITE')) + +def format_info(df): + dict_col = { + 'NOM_SITE':'nom', + 'Commentaires / remarques fiche de terrain':'rmq_usage_process', + } + return (df[['CODE_SITE','NOM_SITE', + 'Commentaires / remarques fiche de terrain']] + .droplevel(1,axis=1) + .sort_values('CODE_SITE') + .rename(columns=dict_col)) + +def format_data(df): + info = format_info(df) + delim = format_delim(df) + usg = format_usg(df) + reghydro = format_reghydro(df) + subm = format_subm(df) + cnx = format_cnx(df) + fct = format_fct(df) + return info,delim,usg,reghydro,subm,cnx,fct + + +def insert_regHydro(df): + sch = pycen.zh().schema + tab_regHyd = 'r_site_reghydro' + reg_hydro = pycen.zh()._get_param(param_table='param_reg_hydro') + + df.replace({'reg_hydro':dict(zip(reg_hydro.nom,reg_hydro.id))},inplace=True) + df.rename(columns={ + 'reg_hydro':'id_reg_hydro', + 'site_cod':'id_site', + 'site_code':'id_site', + },inplace=True) + dfinout,ins = insertAttrs(df,'zones_humides','r_site_reghydro') + if ins: + ids = select_ID(dfinout[dfinout.columns.drop('auteur')],sch,tab_regHyd) + same_col = dfinout.columns[dfinout.columns.isin(ids.columns)] + if 'date' in same_col: + dfinout['date'] = dfinout['date'].astype(str) + ids['date'] = ids['date'].astype(str) + for c in same_col: + if dfinout[c].dtype != ids[c].dtype: + dfinout[c] = dfinout[c].astype(ids[c].dtype) + dfinout = dfinout.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(dfinout,colname_rsite='id_sitehydro', sch=sch, tab='r_rsitehydro_auteur') + else: return + +def insert_subm(df): + sch = pycen.zh().schema + tab_sub = 'r_site_sub' + p_con_sub = pycen.zh()._get_param(param_table='param_sub', type_table='type_param_sub',type_court=False) + + + df['sub_freq'].fillna('Inconnu',inplace=True) + df['sub_etend'].fillna('Inconnu',inplace=True) + df['id_freqsub'] = df['sub_freq'].str.lower() \ + .replace([*p_con_sub.nom.str.lower()],[*p_con_sub.id.astype(str)]) + df['id_etendsub'] = df['sub_etend'].str.lower() \ + .replace([*p_con_sub.nom.str.lower()],[*p_con_sub.id.astype(str)]) + df.rename(columns={'site_cod':'id_site','site_code':'id_site'},inplace=True) + df.drop(columns=['sub_freq','sub_etend'],inplace=True) + + df,ins = insertAttrs(df,sch, tab_sub) + if ins: + ids = select_ID(df[df.columns.drop('auteur')],sch,tab_sub) + ids.loc[~ids.id_etendsub.isna(),'id_etendsub'] = ids.loc[~ids.id_etendsub.isna(),'id_etendsub']\ + .astype(int).astype(str) + if ids.id_freqsub.dtype==int: + ids.id_freqsub = ids.id_freqsub.astype(str) + same_col = df.columns[df.columns.isin(ids.columns)] + if 'date' in same_col: + df['date'] = df['date'].astype(str) + ids['date'] = ids['date'].astype(str) + for col in same_col: + if df[col].dtype != ids[col].dtype: + print(col) + if df[col].dtype == float: + ids[col] = ids[col].astype(float) + elif df[col].dtype == int: + ids[col] = ids[col].astype(int) + elif df[col].dtype == object: + ids[col] = ids[col].astype(object) + df = df.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df,colname_rsite='id_sitesub', sch=sch, tab='r_rsitesub_auteur') + else: return + +def insert_cnx(df): + tab_con = 'r_site_type_connect' + sch = pycen.zh().schema + p_con_cnx = pycen.zh()._get_param(param_table='param_type_connect') + + df.rename(columns={'site_cod':'id_site','site_code':'id_site'},inplace=True) + df.drop(columns=['connexion'],inplace=True,errors='ignore') + + df,ins = insertAttrs(df,sch, tab_con) + if ins: + ids = select_ID(df[df.columns.drop('auteur')],sch,tab_con) + if ids.id_param_connect.dtype==int: + ids.id_param_connect = ids.id_param_connect.astype(str) + if df.id_param_connect.dtype==int: + df.id_param_connect = df.id_param_connect.astype(str) + same_col = df.columns[df.columns.isin(ids.columns)] + if 'date' in same_col: + df['date'] = df['date'].astype(str) + ids['date'] = ids['date'].astype(str) + df = df.merge(ids, on=[*same_col], how='left') + insertAutorAttrs(df,colname_rsite='id_siteconnect', sch=sch, tab='r_rsiteconnect_auteur') + else: return + +def define_author(df,is_new=False): + is_mosaique = df['Source'].str.contains('Mosa.que') + is_acer = df['Source'].str.contains('Acer') + is_setis = df['Source'].str.contains('SETIS') + is_evin = df['Source'].str.contains('Evinerude') + is_cen = df['Source'] == 'Inventaire départemental' + if not is_new: + same_date = df.date == df.date_geom + df.loc[same_date&is_mosaique,'auteur'] = 'Mosaïque Environnement' + df.loc[same_date&is_acer,'auteur'] = 'Acer campestre' + df.loc[same_date&is_setis,'auteur'] = 'SETIS GROUPE Degaud' + df.loc[same_date&is_evin,'auteur'] = 'EVINERUDE' + df.loc[same_date&is_cen,'auteur'] = 'CEN Isère' + df.loc[df.auteur.isna(),'auteur'] = 'Mosaïque Environnement' + else: + df.loc[is_mosaique,'auteur'] = 'Mosaïque Environnement' + df.loc[is_acer,'auteur'] = 'Acer campestre' + df.loc[is_setis,'auteur'] = 'SETIS GROUPE Degaud' + df.loc[is_evin,'auteur'] = 'EVINERUDE' + df.loc[is_cen,'auteur'] = 'CEN Isère' + df.loc[df.auteur.isna(),'auteur'] = 'Mosaïque Environnement' + return df + +if __name__ == "__main__": + + PATH = '/media/colas/SRV/FICHIERS/OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/ETUDES/PLUI METRO/INV_ZH_PLUI_METRO/Mosaique/MAJ-INV-ZH MOSAIQUE 2018-SIG_BDD' + shp = 'ZH_GAM_CC45.shp' + data_file = 'BDD_ZH_GAM_database.xlsx' + dict_cols = { + 'CODE_SITE':'id_origine', + 'DATEMODIF':'date', + 'TYPO_SDAGE':'id_typo_sdage', + } + + # Lecture des données + data = gpd.pd.read_excel(path.join(PATH,data_file),sheet_name='Fiches_terrain',header=[1,2]) + info,delim,usg,reghydro,subm,cnx,fct = format_data(data) + + df = gpd.read_file(path.join(PATH,shp)) + df.DATEMODIF = gpd.pd.to_datetime(df.DATEMODIF) + LST_IGNORE = [ + '38GAM0012','38GAM0034','38GAM0045','38GAM0142', + '38GAM0003','38GAM0004','38GAM0006','38GAM0007', + '38GAM0008','38GAM0009','38GAM0011','38GAM0015', + '38GAM0016','38GAM0017','38GAM0019','38GAM0021', + '38GAM0026','38GAM0138','38GAM0032','38GAM0035', + '38GAM0040','38GAM0041','38GAM0042','38GAM0044', + '38GAM0046','38GAM0047','38GAM0049','38GAM0051', + '38GAM0052','38GAM0053','38GAM0054','38GAM0055', + '38GAM0056','38GAM0069','38GAM0070','38GAM0073', + '38GAM0076','38GAM0141','38GAM0001','38GAM0005', + '38GAM0018','38GAM0023','38GAM0029','38GAM0033', + '38GAM0039','38GAM0050','38GAM0060','38GAM0134' + ] + LST_HISTO = [ + '38GAM0024','38GAM0031','38GAM0058','38GAM0139', + '38GAM0010','38GAM0014','38GAM0072','38GAM0075', + '38GAM0133'] + LST_NEW_MAJ = [ + '38GAM0022','38GAM0061', + '38GAM0118','38GAM0127','38GAM0129'] + GAM0115 = ['38GAM0115'] + GAM0057 = ['38GAM0057'] + GAM0091 = ['38GAM0091'] + GAM0079 = ['38GAM0079'] + GAM0108 = ['38GAM0108'] + LST_COR_MAJ = [ + '38GAM0036','38GAM0068', + '38GAM0063','38GAM0123', + '38GAM0071'] + NOT_MAJ = [ + '38GAM0131','38GAM0130','38GAM0126','38GAM0111','38GAM0110','38GAM0089','38GAM0080','38GAM0059','38GAM0048','38GAM0043','38GAM0038','38GAM0037','38GAM0028','38GAM0025','38GAM0020','38GAM0013','38GAM0002', + '38GAM0087','38GAM0132','38GAM0135','38GAM0136','38GAM0098','38GAM0088','38GAM0090','38GAM0092','38GAM0093','38GAM0094','38GAM0095','38GAM0096','38GAM0097','38GAM0099','38GAM0100','38GAM0101','38GAM0102','38GAM0067','38GAM0103','38GAM0104','38GAM0062','38GAM0064','38GAM0065','38GAM0066','38GAM0105','38GAM0074','38GAM0077','38GAM0084','38GAM0109','38GAM0078','38GAM0081','38GAM0082','38GAM0083','38GAM0085','38GAM0086','38GAM0112','38GAM0113','38GAM0114','38GAM0116','38GAM0117','38GAM0119','38GAM0120','38GAM0121','38GAM0122','38GAM0124','38GAM0125','38GAM0137','38GAM0140'] + df = df[~df.CODE_SITE.isin([*LST_IGNORE,*NOT_MAJ,*GAM0115,*GAM0057,*GAM0091,*GAM0079,*GAM0108,*LST_HISTO,*LST_COR_MAJ,*LST_NEW_MAJ])] + + if isinstance(df, gpd.GeoDataFrame) and df.geometry.name !='geom': + df.rename_geometry('geom',inplace=True) + if isinstance(df, gpd.GeoDataFrame) and df.crs.srs.lower() != 'epsg:2154': + df.to_crs(2154,inplace=True) + + # Distinction des types de sites + v_zh = zh.v_zoneshumides() + df_new, df_cor, df_maj = ident_newsite(df,v_zh,rcvmt=10) + df_new.rename(columns=dict_cols,inplace=True) + df_maj.rename(columns=dict_cols,inplace=True) + df_cor['df'].rename(columns=dict_cols,inplace=True) + df_cor['cor'].rename(columns=dict_cols,inplace=True) + if not df_new.empty: + df_new = define_author(df_new,True) + + locdata_new = data.droplevel(1,axis=1).CODE_SITE.isin([*df_new.id_origine]) + locdata_maj = data.droplevel(1,axis=1).CODE_SITE.isin([*df_maj.id_origine]) + locdata_cor = data.droplevel(1,axis=1).CODE_SITE.isin([*df_cor['df'].id_origine]) + data_new = data[locdata_new] + data_maj = data[locdata_maj] + data_cor = data[locdata_cor] + + + # df_maj : distinction date récentes vs anciennes + t1 = df_maj.columns.str.contains('date',case=False) + t3 = v_zh.columns.str.contains('date',case=False) + c1 = df_maj.columns[t1] + c3 = v_zh.columns[t3] + maj_tmpv = (gpd.pd.merge(df_maj,v_zh[['site_code','date_geom']],how='left',left_on='id_site',right_on='site_code') + .drop(columns='id_site')) + maj_tmpv = define_author(maj_tmpv,False) + test_dt_new = maj_tmpv.date >= maj_tmpv.date_geom + maj_dt_new = maj_tmpv[test_dt_new].drop(columns=['CENTRE_X','CENTRE_Y']) + maj_dt_old = maj_tmpv[~test_dt_new].drop(columns=['CENTRE_X','CENTRE_Y']) + + # df_cor : distinction date récentes vs anciennes + t2 = df_cor['df'].columns.str.contains('date',case=False) + c2 = df_cor['df'].columns[t2] + cor_tmpv = ( + gpd.pd.merge( + gpd.pd.merge( + df_cor['cor'], + df_cor['df'],#[['id_origine','date','Source']], + on='id_origine',how='left'), + v_zh[['site_code','date_geom']],how='left',left_on='id_site_old',right_on='site_code') + .drop(columns='id_site_old')) + cor_tmpv = define_author(cor_tmpv,False) + # cor_tmpv = cor_tmpv[~cor_tmpv.site_code.isin(NOT_MAJ)] + test_dt_new2 = cor_tmpv.date >= cor_tmpv.date_geom + cor_dt_new = cor_tmpv[test_dt_new2].sort_values(by='id_origine').drop(columns=['CENTRE_X','CENTRE_Y']) + cor_dt_old = cor_tmpv[~test_dt_new2].sort_values(by='id_origine').drop(columns=['CENTRE_X','CENTRE_Y']) + + + ### Suite : utilisation du fichier `insert_zh.py` + import pycen + from ..insert_zh import ( + insertNewSite, + insertNewGeom, + insertAttrsDelim, + insertAttrsUsgPrss, + insertAttrs, + insertAttrsFct, + select_ID,insertAutorAttrs) + + + ############################ + ######### df_new ########### + ############################ + + dfNew = (df_new[['id_origine','NOM_SITE','id_typo_sdage','date','auteur','geom']] + .copy() + .rename(columns={ + 'NOM_SITE':'site_name','id_origine':'site_cod'}) + ) + dfNew['dept'] = dfNew.site_cod.str[:2] + dfNew['org'] = dfNew.site_cod.str[2:5] + dfNew['num'] = dfNew.site_cod.str[5:] + dfNew['type_milieux'] = 'Milieux humides' + insertNewSite((dfNew + .drop(columns='geom') + .rename(columns={ + 'date':'date_deb' + }))) + + dfNewGeom = (dfNew[['site_cod','date','auteur','geom']] + .copy() + .merge(info,left_on='site_cod',right_on='CODE_SITE',how='left') + .drop(columns=['CODE_SITE','nom'])) + + insertNewGeom(pycen.tools.Polygons_to_MultiPolygon + (dfNewGeom)) + + dfNewDelim = (dfNew[['site_cod','date','auteur']] + .merge(delim,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + insertAttrsDelim(dfNewDelim) + + dfNewUPS = (dfNew[['site_cod','date','auteur']] + .merge(usg,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + dfNewUPS['impact'] = 'Inconnu' + dfNewUPS['localisation'] = 'Inconnu' + hactivhum = gpd.pd.read_sql('SELECT * FROM zones_humides.param_activ_hum',con=pycen.con) + insertAttrsUsgPrss((dfNewUPS + .rename(columns={ + 'crit_usg':'activite_hum', + 'remarques':'remarks'}) + .astype({'activite_hum':int}) + .replace({'activite_hum':dict(zip(hactivhum.id,hactivhum.nom))}) + )) + + dfNewRHyd = (dfNew[['site_cod','date','auteur']] + .merge(reghydro,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + insert_regHydro(dfNewRHyd) + + dfNewSubm = (dfNew[['site_cod','date','auteur']] + .merge(subm,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + insert_subm(dfNewSubm) + + dfNewCnx = (dfNew[['site_cod','date','auteur']] + .merge(cnx,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + insert_cnx(dfNewCnx) + + dfNewFct = (dfNew[['site_cod', 'date', 'auteur']] + .merge(fct,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE']) + .rename(columns={ 'description':'memo'})) + insertAttrsFct(dfNewFct,True) + + ############################ + ############################ + ######### df_maj ########### + ############################ + ############################ + + ############################ + ######### NEW maj ########## + ### 38GAM0108 + GAM108 = cor_dt_new[cor_dt_new.id_origine=='38GAM0108'].copy() + GAM108geom = (GAM108[['site_code','id_origine','date','auteur','geom']] + .copy() + .merge(info,left_on='id_origine',right_on='CODE_SITE',how='left') + .set_geometry('geom',crs=2154) + .drop(columns=['CODE_SITE','nom'])) + insertNewGeom(GAM108geom) + + # Test + # maj_dt_new.merge(info,left_on='id_origine',right_on='CODE_SITE',how='inner').drop(columns=['CODE_SITE']) + + dfNewMajGeom = (maj_dt_new[['site_code','id_origine','date','auteur','geom']] + .copy() + .merge(info,left_on='id_origine',right_on='CODE_SITE',how='left') + .drop(columns=['CODE_SITE','nom'])) + + dfNewMajDelim = (maj_dt_new[['site_code','date','auteur','id_origine']] + .merge(delim,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine'])) + + hactivhum = gpd.pd.read_sql('SELECT * FROM zones_humides.param_activ_hum',con=pycen.con) + dfNewMajUPS = (maj_dt_new[['site_code','date','auteur','id_origine']] + .merge(usg,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine']) + .rename(columns={ + 'crit_usg':'activite_hum', + 'remarques':'remarks'}) + .astype({'activite_hum':int}) + .replace({'activite_hum':dict(zip(hactivhum.id,hactivhum.nom))}) + ) + dfNewMajUPS['impact'] = 'Inconnu' + dfNewMajUPS['localisation'] = 'Inconnu' + + dfNewMajRHyd = (maj_dt_new[['site_code','date','auteur','id_origine']] + .merge(reghydro,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine'])) + + dfNewMajSubm = (maj_dt_new[['site_code','date','auteur','id_origine']] + .merge(subm,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine'])) + + dfNewMajCnx = (maj_dt_new[['site_code','date','auteur','id_origine']] + .merge(cnx,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine'])) + + dfNewMajFct = (maj_dt_new[['site_code', 'date', 'auteur','id_origine']] + .merge(fct,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine']) + .rename(columns={ 'description':'memo'})) + + insertNewGeom(pycen.tools.Polygons_to_MultiPolygon(dfNewMajGeom)) + insertAttrsDelim(dfNewMajDelim) + insertAttrsUsgPrss(dfNewMajUPS) + insert_regHydro(dfNewMajRHyd) + insert_subm(dfNewMajSubm) + insert_cnx(dfNewMajCnx) + insertAttrsFct(dfNewMajFct,True) + + + ############################# + ######## Histo maj ########## + LST_HISTO = [ + '38GAM0024','38GAM0031','38GAM0058','38GAM0139', + '38GAM0010','38GAM0014','38GAM0072','38GAM0075', + '38GAM0133', + ] + histo_maj = gpd.pd.concat([ + maj_dt_old[maj_dt_old.id_origine.isin(LST_HISTO)], + cor_dt_old[cor_dt_old.id_origine.isin(LST_HISTO)] + ]) + histo_maj = define_author(histo_maj,True) + insertNewGeom((histo_maj[['site_code','date','auteur','id_origine','geom']] + .rename(columns={'site_code':'id_site'}) + )) + + histo_maj2 = gpd.read_file(path.join(PATH,'../../zh_gam_cc45_modifByMJ&GC.gpkg')) + histo_maj2 = define_author(histo_maj2,True) + + ### 38GAM0057 + histo_maj2 = (histo_maj2.loc[ + histo_maj2.CODE_SITE == '38GAM0057', + ['CODE_SITE','DATEMODIF','auteur','geometry']] + .rename(columns={ + 'CODE_SITE':'id_origine', + 'DATEMODIF':'date'}) + .rename_geometry('geom')) + histo_maj2['id_site'] = '38RD0010' + histo_maj2['date'] = gpd.pd.to_datetime(histo_maj2.date) + insertNewGeom(histo_maj2) + + + ############################# + ######## New maj ############ + ### 38GAM0115 + replace_geom = maj_dt_old[maj_dt_old.id_origine=='38GAM0115'].copy() + replace_geom = replace_geom[['site_code','id_origine','date','geom','auteur']] + pycen.update_to_sql( + (replace_geom + .merge(info,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','nom']) + .rename(columns={'site_code':'id_site'})), + pycen.con,'r_sites_geom','sites','id_site', + ) + rpl_delim = (replace_geom + .merge(delim,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + hactivhum = gpd.pd.read_sql('SELECT * FROM zones_humides.param_activ_hum',con=pycen.con) + rpl_activHum = (replace_geom + .merge(usg,left_on='id_origine',right_on='CODE_SITE',how='inner') + .rename(columns={ + 'crit_usg':'activite_hum', + 'remarques':'remarks'}) + .astype({'activite_hum':int}) + .replace({'activite_hum':dict(zip(hactivhum.id,hactivhum.nom))}) + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_activHum['impact'] = 'Inconnu' + rpl_activHum['localisation'] = 'Inconnu' + rpl_RHyd = (replace_geom + .merge(reghydro,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_subm = (replace_geom + .merge(subm,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_cnx = (replace_geom + .merge(cnx,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_fct = (replace_geom + .merge(fct,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom']) + .rename(columns={ 'description':'memo'})) + insertAttrsDelim(rpl_delim) + insertAttrsUsgPrss(rpl_activHum) + insert_regHydro(rpl_RHyd) + insert_subm(rpl_subm) + insert_cnx(rpl_cnx) + insertAttrsFct(rpl_fct,True) + + ### 38GAM0091 + replace_geom = cor_dt_old[cor_dt_old.id_origine=='38GAM0091'].copy() + replace_geom = replace_geom[['site_code','id_origine','date','geom','auteur']] + pycen.update_to_sql( + (replace_geom.drop(columns=['auteur']) + .merge(info,left_on='id_origine',right_on='CODE_SITE',how='inner') + .set_geometry('geom',crs=2154) + .drop(columns=['CODE_SITE','nom']) + .rename(columns={'site_code':'id_site'})), + pycen.con,'r_sites_geom','sites','id_site', + ) + + LST_NEW_MAJ = [ + '38GAM0022','38GAM0061', + '38GAM0118','38GAM0127','38GAM0129'] + + NEW_maj = (gpd.pd.concat([ + maj_dt_old[maj_dt_old.id_origine.isin(LST_NEW_MAJ)], + cor_dt_new[cor_dt_new.id_origine.isin(LST_NEW_MAJ)], + cor_dt_old[cor_dt_old.id_origine.isin(LST_NEW_MAJ)] + ]).drop(columns=['CENTRE_X','CENTRE_Y'])) + NEW_maj.loc[NEW_maj.date<'2017-01-01','date'] = '2017-01-01' + + rpl_geom = (NEW_maj + .merge(info,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','nom']) + .rename(columns={'site_code':'id_site'})) + rpl_delim = (NEW_maj + .merge(delim,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + hactivhum = gpd.pd.read_sql('SELECT * FROM zones_humides.param_activ_hum',con=pycen.con) + rpl_activHum = (NEW_maj + .merge(usg,left_on='id_origine',right_on='CODE_SITE',how='inner') + .rename(columns={ + 'crit_usg':'activite_hum', + 'remarques':'remarks'}) + .astype({'activite_hum':int}) + .replace({'activite_hum':dict(zip(hactivhum.id,hactivhum.nom))}) + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_activHum['impact'] = 'Inconnu' + rpl_activHum['localisation'] = 'Inconnu' + rpl_RHyd = (NEW_maj + .merge(reghydro,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_subm = (NEW_maj + .merge(subm,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_cnx = (NEW_maj + .merge(cnx,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom'])) + rpl_fct = (NEW_maj + .merge(fct,left_on='id_origine',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE','id_origine','geom']) + .rename(columns={ 'description':'memo'})) + insertNewGeom(rpl_geom) + insertAttrsDelim(rpl_delim) + insertAttrsUsgPrss(rpl_activHum) + insert_regHydro(rpl_RHyd) + insert_subm(rpl_subm) + insert_cnx(rpl_cnx) + insertAttrsFct(rpl_fct,True) + + + ######### OLD df_maj ######## + maj_dt_old.merge(info,left_on='id_origine',right_on='CODE_SITE',how='inner').drop(columns=['CODE_SITE']) + maj_dt_old[maj_dt_old.date != maj_dt_old.date_geom] + # maj_dt_old = maj_dt_old[~maj_dt_old.id_origine.isin(NOT_MAJ)] + + ############################ + ####### df_cor_OLD ######### + ############################ + ### 38GAM0079 + LST_COR_OLD = ['38GAM0079'] + COR_majOLD = (gpd.pd.concat([ + maj_dt_new[maj_dt_new.id_origine.isin(LST_COR_OLD)], + maj_dt_old[maj_dt_old.id_origine.isin(LST_COR_OLD)], + cor_dt_new[cor_dt_new.id_origine.isin(LST_COR_OLD)], + cor_dt_old[cor_dt_old.id_origine.isin(LST_COR_OLD)] + ]).drop(columns=['CENTRE_X','CENTRE_Y'])) + COR_majOLD.auteur = 'CEN Isère' + COR_majOLD_new = (COR_majOLD[['id_origine','NOM_SITE','id_typo_sdage','date','auteur','geom']].copy() + .rename(columns={ + 'NOM_SITE':'site_name','id_origine':'site_cod'}) + .drop_duplicates()) + + COR_majOLD_new.auteur = 'CEN Isère' + COR_majOLD_new['date_fin'] = '2019-02-28' + COR_majOLD_new['dept'] = COR_majOLD_new.site_cod.str[:2] + COR_majOLD_new['org'] = COR_majOLD_new.site_cod.str[2:5] + COR_majOLD_new['num'] = COR_majOLD_new.site_cod.str[5:] + COR_majOLD_new['type_milieux'] = 'Milieux humides' + COR_majOLD_cor = (COR_majOLD[['id_origine','site_code']] + .rename(columns={ + 'site_code':'id_site_new','id_origine':'id_site_old'}) + .drop_duplicates()) + COR_majOLD_newgeom = (COR_majOLD_new[['site_cod','date','auteur','geom']] + .merge(info,left_on='site_cod',right_on='CODE_SITE',how='left') + .drop(columns=['CODE_SITE','nom'])) + + # COR_majOLD_new.auteur = 'Mosaïque Environnement' + # COR_majOLD_new.date = '2017-01-01' + COR_majOLD_newDelim = (COR_majOLD_new[['site_cod','date','auteur']] + .merge(delim,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + hactivhum = gpd.pd.read_sql('SELECT * FROM zones_humides.param_activ_hum',con=pycen.con) + COR_majOLD_newUPS = (COR_majOLD_new[['site_cod','date','auteur']] + .merge(usg,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE']) + .rename(columns={ + 'crit_usg':'activite_hum', + 'remarques':'remarks'}) + .astype({'activite_hum':int}) + .replace({'activite_hum':dict(zip(hactivhum.id,hactivhum.nom))}) + ) + COR_majOLD_newUPS['impact'] = 'Inconnu' + COR_majOLD_newUPS['localisation'] = 'Inconnu' + COR_majOLD_newRHyd = (COR_majOLD_new[['site_cod','date','auteur']] + .merge(reghydro,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + COR_majOLD_newSubm = (COR_majOLD_new[['site_cod','date','auteur']] + .merge(subm,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + COR_majOLD_newCnx = (COR_majOLD_new[['site_cod','date','auteur']] + .merge(cnx,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + COR_majOLD_newFct = (COR_majOLD_new[['site_cod', 'date', 'auteur']] + .merge(fct,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE']) + .rename(columns={ 'description':'memo'})) + + insertNewSite((COR_majOLD_new + .drop(columns='geom') + .rename(columns={ + 'date':'date_deb' + }))) + COR_majOLD_cor.to_sql( + 'r_site_maj',pycen.con,'sites',if_exists='append',index=False + ) + insertNewGeom(pycen.tools.Polygons_to_MultiPolygon(COR_majOLD_newgeom)) + insertAttrsDelim(COR_majOLD_newDelim) + insertAttrsUsgPrss(COR_majOLD_newUPS) + insert_regHydro(COR_majOLD_newRHyd) + insert_subm(COR_majOLD_newSubm) + insert_cnx(COR_majOLD_newCnx) + insertAttrsFct(COR_majOLD_newFct,True) + + + ############################ + ####### df_cor_NEW ######### + ############################ + RD21 = gpd.read_file(path.join(PATH,'../../../../../../VECTEURS/ETUDES/ZONES HUMIDES/INVENTAIRE_ZH/ZH_2010 (re-save).shp')) + RD21.to_crs(2154,inplace=True) + RD21.rename_geometry('geom',inplace=True) + RD21.DATEMODIF = gpd.pd.to_datetime(RD21.DATEMODIF) + rebase_geom = (RD21 + .loc[RD21.SITE_CODE.isin(['38RD0021','38RD0126','38RD0025']),['SITE_CODE','geom','DATEMODIF']] + .rename(columns={ + 'SITE_CODE':'id_site', + 'DATEMODIF':'date', + })) + rebase_geom.loc[rebase_geom.id_site=='38RD0025','id_site'] = '38RD0127' + pycen.update_to_sql(rebase_geom,pycen.con,'r_sites_geom','sites','id_site') + + + LST_COR_MAJ = [ + '38GAM0036','38GAM0068', + '38GAM0063','38GAM0123', + '38GAM0071'] + COR_maj = (gpd.pd.concat([ + maj_dt_new[maj_dt_new.id_origine.isin(LST_COR_MAJ)], + maj_dt_old[maj_dt_old.id_origine.isin(LST_COR_MAJ)], + cor_dt_new[cor_dt_new.id_origine.isin(LST_COR_MAJ)], + cor_dt_old[cor_dt_old.id_origine.isin(LST_COR_MAJ)] + ]).drop(columns=['CENTRE_X','CENTRE_Y'])) + COR_maj.loc[COR_maj.date<'2017-01-01','date'] = '2017-01-01' + + COR_maj_site = (COR_maj[['id_origine','NOM_SITE','id_typo_sdage','date','auteur','geom']] + .copy() + .drop_duplicates() + .rename(columns={ + 'NOM_SITE':'site_name','id_origine':'site_cod'}) + ) + + COR_maj_site['dept'] = COR_maj_site.site_cod.str[:2] + COR_maj_site['org'] = COR_maj_site.site_cod.str[2:5] + COR_maj_site['num'] = COR_maj_site.site_cod.str[5:] + COR_maj_site['type_milieux'] = 'Milieux humides' + COR_maj_cor = (COR_maj[['id_origine','site_code']] + .rename(columns={ + 'site_code':'id_site_old','id_origine':'id_site_new'}) + .drop_duplicates()) + + COR_maj_siteOLD = (COR_maj[['site_code','date']] + .copy() + .rename(columns={'date':'date_fin','site_code':'id'}) + .sort_values(['id','date_fin']) + .drop_duplicates('id',keep='last')) + + COR_maj_newgeom = (COR_maj_site[['site_cod','date','auteur','geom']] + .merge(info,left_on='site_cod',right_on='CODE_SITE',how='left') + .drop(columns=['CODE_SITE','nom'])) + + COR_maj_newDelim = (COR_maj_site[['site_cod','date','auteur']] + .merge(delim,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + hactivhum = gpd.pd.read_sql('SELECT * FROM zones_humides.param_activ_hum',con=pycen.con) + COR_maj_newUPS = (COR_maj_site[['site_cod','date','auteur']] + .merge(usg,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE']) + .rename(columns={ + 'crit_usg':'activite_hum', + 'remarques':'remarks'}) + .astype({'activite_hum':int}) + .replace({'activite_hum':dict(zip(hactivhum.id,hactivhum.nom))}) + ) + COR_maj_newUPS['impact'] = 'Inconnu' + COR_maj_newUPS['localisation'] = 'Inconnu' + COR_maj_newRHyd = (COR_maj_site[['site_cod','date','auteur']] + .merge(reghydro,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + COR_maj_newSubm = (COR_maj_site[['site_cod','date','auteur']] + .merge(subm,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + COR_maj_newCnx = (COR_maj_site[['site_cod','date','auteur']] + .merge(cnx,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE'])) + COR_maj_newFct = (COR_maj_site[['site_cod', 'date', 'auteur']] + .merge(fct,left_on='site_cod',right_on='CODE_SITE',how='inner') + .drop(columns=['CODE_SITE']) + .rename(columns={ 'description':'memo'})) + + + # Insert New Site + insertNewSite((COR_maj_site + .drop(columns='geom') + .rename(columns={ + 'date':'date_deb' + }))) + COR_maj_cor.to_sql('r_site_maj',con=pycen.con,schema='sites',if_exists='append',index=False) + # Insert `date_fin` for OLD_Site + pycen.update_to_sql(COR_maj_siteOLD,pycen.con,'sites','sites','id') + # Insert NewGeom + insertNewGeom(pycen.tools.Polygons_to_MultiPolygon(COR_maj_newgeom)) + insertAttrsDelim(COR_maj_newDelim) + insertAttrsUsgPrss(COR_maj_newUPS) + insert_regHydro(COR_maj_newRHyd) + insert_subm(COR_maj_newSubm) + insert_cnx(COR_maj_newCnx) + insertAttrsFct(COR_maj_newFct,True) + diff --git a/3_AZALEE/update_geomcover.py b/3_AZALEE/update_geomcover.py new file mode 100644 index 0000000..24537b7 --- /dev/null +++ b/3_AZALEE/update_geomcover.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from statistics import geometric_mean +import geopandas as gpd +from shapely import wkb +from pycen import con,con_bdcen,update_to_sql +from pycen.tools import Polygons_to_MultiPolygon +from sys import exit + +def recreate_history(n_site,o_site): + req = ''' + UPDATE sites.sites SET date_fin = ( + SELECT max(date_deb) FROM sites.sites WHERE id IN ('{site_new}') + ) WHERE "id" IN ('{site_old}'); + INSERT INTO sites.r_site_maj (id_site_new,id_site_old) VALUES ('{site_new}','{site_old}'); + '''.format(site_new=n_site,site_old=o_site) + with con.begin() as cnx: + cnx.execute(req) + + +def modif_geomcover(old_geom, new_geom,how='maj',rcv=0.999): + # print(i) + sql = """ + with w1 as ( + SELECT s1.site_code, s1.geom geom_in, ST_ExteriorRing((ST_Dump(ST_Buffer(s2.geom,0.001))).geom) geom_split, s2.geom geom_over + FROM (select * from ps.v_pelouseseches where site_code = '{geom_old}') s1, (select * from ps.v_pelouseseches where site_code = '{geom_new}') s2 + ), + w2 as ( + SELECT site_code, geom_in, ST_Collect(geom_split) geom_split, geom_over from w1 group by 1,2,4 + ), + w3 as ( + SELECT site_code, geom_in,round(st_area(geom_in)) area_in, (ST_Dump(ST_Split(geom_in,geom_split))).geom geom_split, geom_over from w2 + ) + SELECT site_code, geom_in,area_in,round(st_area(st_union(geom_split))) area_split, st_multi(st_union(geom_split)) geom_split, geom_over from w3 + WHERE ST_Within(geom_split,ST_Buffer(geom_over,0.02)) = False + group by 1,2,3,6; + """.format(geom_old=old_geom,geom_new=new_geom) + res_split = gpd.read_postgis(sql,con, geom_col='geom_split') + res_split.geom_in = [wkb.loads(x,hex=True) for x in res_split.geom_in] + res_split.set_geometry('geom_in',inplace=True, crs=2154) + if res_split.empty: + print([old_geom,new_geom,'EMPTY']) + else: + if res_split.area_split[0] < (res_split.area_in[0]*rcv) : + spl = res_split.area_split[0] + _in = res_split.area_in[0] + print([old_geom,new_geom,_in,spl,(_in/spl)]) + # continue + # raise ValueError(j) + else: + # print('OK') + gdf = gpd.read_postgis("SELECT * FROM sites.r_sites_geom WHERE id_site = '%s'"%old_geom, con) + # df_aut = gpd.pd.read_sql_query('') + gdf.date = gpd.pd.to_datetime(gdf.date) + gdf.geom = res_split.geom_split + + if how=='maj': + gdf = gdf.loc[gdf.groupby('id_site').date.idxmax(),['id','geom']] + update_to_sql(gdf, con, 'r_sites_geom', 'sites', 'id') + elif how=='insert': + gdf = gdf.loc[gdf.groupby('id_site').date.idxmax()] + gdf.drop(columns=['id','date_insert'],inplace=True,errors='ignore') + gdf.to_postgis( + name='r_sites_geom', con=con, schema='sites', if_exists='append', geom_col='geom' + ) + print('END insert') + else: + raise ValueError([old_geom, new_geom]) + + +sql = """SELECT + v1.site_code site_code_old, v1.geom geom_old, v2.site_code site_code_new, v2.geom geom_new, + v1."source" source_old,v2."source" source_new, v1.id_origine orig_old,v2.id_origine orig_new + FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2 + WHERE ST_OVERLAPS(v1.geom,v2.geom) = TRUE + AND v1.date_geom < v2.date_geom + AND v1.site_code <> v2.site_code;""" +df = gpd.read_postgis(sql,con,geom_col='geom_new') +df.geom_old = [wkb.loads(x,hex=True) for x in df.geom_old] +df.set_geometry('geom_old',inplace=True, crs=2154) + +v_ps = gpd.read_postgis('SELECT * FROM ps.v_pelouseseches', con) + + +sql = """SELECT + v1.site_code site_code_old, v1.geom geom_old, v2.site_code site_code_new, v2.geom geom_new, + v1."source" source_old,v2."source" source_new, v1.id_origine orig_old,v2.id_origine orig_new + FROM ps.v_pelouseseches v1, ps.v_pelouseseches v2 + WHERE ST_OVERLAPS(v1.geom,v2.geom) = TRUE + AND v1.date_geom = v2.date_geom + AND v1.site_code <> v2.site_code;""" +dfx = gpd.read_postgis(sql,con,geom_col='geom_new') +print(dfx.shape[0]) +dfx.apply(lambda x: modif_geomcover(old_geom=x.site_code_old,new_geom=x.site_code_new,how='maj',rcv=0.999),axis=1) + +idem = df[(df.geom_old.area.round()/df.geom_new.area.round()).between(0.98,1.02) & (df.source_old=='cr_CHARTREUSE_habitats_CBNA_2000-2012')] +idem.apply(lambda x: recreate_history(o_site=x.site_code_old,n_site=x.site_code_new),axis=1) + +df[df.source_old=='cr_VERCORS_habitats_CBNA_1999-2007']\ + .apply(lambda x: modif_geomcover(old_geom=x.site_code_old,new_geom=x.site_code_new,how='maj',rcv=0.999),axis=1) +df[df.source_old=='cr_VERCORS_habitats_CBNA_1999-2007']\ + .apply(lambda x: modif_geomcover(old_geom=x.site_code_old,new_geom=x.site_code_new,how='insert',rcv=0.9),axis=1) +# modif_geomcover('38VERC0654', '38VERC4644',how='insert',rcv=0.9) +# recreate_history(o_site='38VERC0760',n_site='38VERC3866') +# recreate_history(o_site='38VERC0712',n_site='38VERC3859') +# recreate_history(o_site='38VERC0687',n_site='38VERC3756') +# recreate_history(o_site='38VERC0779',n_site='38VERC3768') +# recreate_history(o_site='38VERC4193',n_site='38VERC4635') +# recreate_history(o_site='38VERC4204',n_site='38VERC4637') +# recreate_history(o_site='38VERC4242',n_site='38VERC4642') +# recreate_history(o_site='38VERC4253',n_site='38VERC4644') +# recreate_history(o_site='38VERC4258',n_site='38VERC4645') +# recreate_history(o_site='38CHAR0735',n_site='38CHAR0045') +# recreate_history(o_site='38CHAR0766',n_site='38GRES0136') +# recreate_history(o_site='38CHAR0694',n_site='38VERC0215') +# recreate_history(o_site='38TRIE2441',n_site='38TRIE1337') +# recreate_history(o_site='38VERC0651',n_site='38VERC3762') +# recreate_history(o_site='38VERC0663',n_site='38VERC3846') +# recreate_history(o_site='38VERC0671',n_site='38VERC3849') +# recreate_history(o_site='38VERC0672',n_site='38VERC3851') +# recreate_history(o_site='38VERC4260',n_site='38VERC4646') +# recreate_history(o_site='38VERC4268',n_site='38VERC4647') +# recreate_history(o_site='38VERC4270',n_site='38VERC4648') +# recreate_history(o_site='38CHAR0677',n_site='38CHAR0100') +# recreate_history(o_site='38CHAR0699',n_site='38CHAR0072') +# recreate_history(o_site='38CREM0404',n_site='38CREM0104') +# recreate_history(o_site='38CREM0405',n_site='38CREM0105') +# recreate_history(o_site='38CREM0412',n_site='38CREM0178') +# recreate_history(o_site='38CREM0417',n_site='38CREM0114') +# recreate_history(o_site='38CREM0420',n_site='38CREM0064') +# recreate_history(o_site='38VERC0735',n_site='38VERC3862') +# recreate_history(o_site='38VERC0744',n_site='38VERC3764') +# recreate_history(o_site='38VERC0753',n_site='38VERC3865') +# recreate_history(o_site='38VERC1194',n_site='38VERC3735') +# recreate_history(o_site='38VERC1198',n_site='38VERC3736') +# recreate_history(o_site='38VERC1207',n_site='38VERC3738') +# recreate_history(o_site='38TRIE1015',n_site='38VERC3714') +# recreate_history(o_site='38TRIE2467',n_site='38TRIE2630') +# recreate_history(o_site='38PCHA0994',n_site='38GRES0844') +# recreate_history(o_site='38PCHA0994',n_site='38GRES0844') +# recreate_history(o_site='38VERC1236',n_site='38VERC4705') +# recreate_history(o_site='38VERC1233',n_site='38VERC4706') +# recreate_history(o_site='38VERC1242',n_site='38VERC1237') +# recreate_history(o_site='38VERC1243',n_site='38VERC4726') +# recreate_history(o_site='38VERC1260',n_site='38VERC4724') +# recreate_history(o_site='38VERC4027',n_site='38VERC4743') +# recreate_history(o_site='38VERC4085',n_site='38VERC4084') + +# for i,j in df.iterrows(): + # update_to_sql(gdf, con, 'r_sites_geom', 'sites', 'id') + + + +df.apply(lambda x: modif_geomcover(old_geom=x.site_code_old,new_geom=x.site_code_new),axis=1) +# df.iloc[57:].apply(lambda x: modif_geomcover(old_geom=x.site_code_old,new_geom=x.site_code_new),axis=1) + +df2 = df[df.source_old=='PS_Beaumont_2018_Drac_Nature.shp'] +df2 = df[df.source_old=='cr_VERCORS_habitats_CBNA_1999-2007'] +df2.apply(lambda x: modif_geomcover(old_geom=x.site_code_old,new_geom=x.site_code_new),axis=1) +# 38TRIE0678 38TRIE0721 + + +vrc = df[df.source_old=='cr_VERCORS_habitats_CBNA_1999-2007'].copy() +s_vrc = gpd.read_postgis('SELECT * FROM pelouse_seche."ps_TRIEVES+VERCORS_habitats_CBNA_2014";',con=con_bdcen) +p_vrc = gpd.read_postgis('SELECT v.*,ST_PointOnSurface(v.geom) geom_point FROM pelouse_seche."ps_TRIEVES+VERCORS_habitats_CBNA_2014" v;',con=con_bdcen, geom_col='geom_point') +pvrc = gpd.read_postgis("SELECT v.*,ST_PointOnSurface(v.geom) geom_point FROM ps.v_pelouseseches v WHERE v.site_code IN ('%s');"%("','".join(vrc.site_code_old)),con=con, geom_col='geom_point') +t1 = s_vrc.geom.intersects(pvrc.geom_point.unary_union) +t2 = p_vrc.geom_point.intersects(vrc.geom_old.unary_union) +INS = s_vrc[t1|t2].copy() +INS.to_postgis( + 'ps_covered',con,geom_col='geom',if_exists='replace' +) \ No newline at end of file diff --git a/3_AZALEE/update_psInf1200.py b/3_AZALEE/update_psInf1200.py new file mode 100644 index 0000000..deea964 --- /dev/null +++ b/3_AZALEE/update_psInf1200.py @@ -0,0 +1,63 @@ +from pycen import ( + v_pelouseseches as ps, + con, con_bdcen, + update_to_sql +) +import geopandas as gpd + + +def update_psInf1200(df): + + from rasterstats import zonal_stats + from pycen import update_to_sql + path0 = '/home/colas/Documents/9_PROJETS/2_PS/' + path2_mnt = path0+'MNT/' + file_mnt5 = 'MNT_5m.tif' + + zonal_tmp = zonal_stats(df,path2_mnt+file_mnt5,stats="max",nodata=0,all_touched=True) + zonal = gpd.pd.DataFrame(zonal_tmp) + zonal.columns = ['max_alti'] + tmp = gpd.pd.concat([df,zonal], axis=1) + tmp = tmp.set_geometry('geom', crs=2154) + tmp.infeq_1200 = tmp.max_alti <= 1200 + PS = tmp[['site_code','infeq_1200']]\ + .rename(columns={'site_code':'id_site'}) + + PS.to_sql('r_infeq_1200m',con,'ps',if_exists='append',index=False) + # update_to_sql(PS,con,'r_infeq_1200m','ps','id_site') + + +metro = gpd.read_file("/home/colas/Documents/tmp/PS_ACTU_METRO/Perimètre_GAM.geojson") +metro.rename_geometry('geom', inplace=True) +ps_agg = gpd.read_postgis('SELECT * FROM "pelouse_seche"."c_ps_inventaire_agreg"',con_bdcen) +ps_all = gpd.read_postgis('SELECT * FROM ps."v_pelouseseches_all"',con) +ps_vps = gpd.read_postgis('SELECT * FROM ps."v_pelouseseches"',con) +ps_noalti = gpd.read_postgis('SELECT * FROM ps."v_pelouseseches_noalti"',con) + +psdate_cols = ps_all.columns[ps_all.columns.str.startswith('date')] + +ps_metro = ps_vps[ps_vps.intersects(metro.unary_union)].copy() +ag_metro = ps_agg[ps_agg.intersects(metro.unary_union)].copy() + +is_cbna = ps_metro.source.str.contains('CBNA',na=False) +is_agg = ps_metro.id_origine.isin(ag_metro.id.tolist()) + +not_intersect = ~ps_metro.centroid.intersects(ag_metro.buffer(0).unary_union) + +ps_metro_cbna = ps_metro[is_cbna].copy() + + +termo = ps_metro[not_intersect & is_cbna].copy() + +ps_metro[psdate_cols] = ps_metro[psdate_cols].astype(str) +termo[psdate_cols] = termo[psdate_cols].astype(str) +termo.to_file('/home/colas/Documents/tmp/PS_ACTU_METRO/ps_actu_metro.gpkg',layer='ps_termo',driver='GPKG') +ps_metro.to_file('/home/colas/Documents/tmp/PS_ACTU_METRO/ps_actu_metro.gpkg',layer='ps_metro',driver='GPKG') +ag_metro.to_file('/home/colas/Documents/tmp/PS_ACTU_METRO/ps_actu_metro.gpkg',layer='ag_metro',driver='GPKG') + + +ps_termo = gpd.read_file('/home/colas/Documents/tmp/PS_ACTU_METRO/ps_actu_metro.gpkg',layer='ps_termo') +updt_termo = ps_termo[['site_code']].copy() +updt_termo.columns = ['id'] +updt_termo['id_type_milieu'] = 99 +update_to_sql(updt_termo,con,'sites','sites','id') \ No newline at end of file diff --git a/4_CARMEN/send_view_to_carmen.py b/4_CARMEN/send_view_to_carmen.py new file mode 100644 index 0000000..a455872 --- /dev/null +++ b/4_CARMEN/send_view_to_carmen.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL +from datetime import datetime as dt +import pandas as pd +import geopandas as gpd + + +# zones_humides / ps +schema = 'zones_humides' + +# Parametres bdd +user = 'cen_admin' +pwd = '#CEN38@venir' +adr = '91.134.194.221' +port = '5432' +base = 'azalee' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con_azalee = create_engine(url) +# Parametres bdd +user = 'Admin_CENI' +pwd = 'yatIv5quoop+' +adr = 'database.carmencarto.fr' +port = '5432' +base = 'CENI' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con_carmen = create_engine(url) + +if schema == 'ps': + table = 'v_pelouseseches' +elif schema == 'zones_humides': + table = 'v_zoneshumides' + +today = dt.now() + +sql = 'SELECT * FROM %s.%s' % (schema,table) +vue = gpd.read_postgis(sql,con_azalee) +vue.to_postgis( + name=table, + con=con_carmen, + # schema=schema, + if_exists='replace', + index=False, + # geom_col='geom' +) + +comment = "COMMENT ON TABLE %s IS 'LAST UPDATE : %s'" % (table,today) +with con_carmen.begin() as cnx: + cnx.execute(comment) \ No newline at end of file diff --git a/5_GEONATURE/GN_ZH/MEDWET2Geonat.py b/5_GEONATURE/GN_ZH/MEDWET2Geonat.py new file mode 100755 index 0000000..429fdb7 --- /dev/null +++ b/5_GEONATURE/GN_ZH/MEDWET2Geonat.py @@ -0,0 +1,1337 @@ +#!/usr/bin/env python3 + +# -*- coding: UTF-8 -*- +#Nom : : Medwet_to_Geonature.py +#Description : +#Copyright : 2022, CEN 74 +#Auteur : Eric Lafon - largement inspiré du script recup_zh_from_medwet.py de Colas Geier pour le CEN 38 (2021) +#Version : 1.1 + +'''Script ayant pour vocation de récolter les informations des tables de la base ZHRMC (Données Medwet) + et de les stocker sur des feuilles Csv en vue de leur implémentation dans Géonature.''' + +#import des librairies +import pandas as pd # pour manipulation des données de la bd medwet +import geopandas as gpd # pour manipulation des données postgis +import pandas_access as mdb # pour lecture de la bd medwet +import numpy as np +import os # pour écriture des fichiers dans un répertoire +from sqlalchemy import create_engine # pour lecture de la bd Géonature +from sqlalchemy.engine import URL # création url conection Geonature +from datetime import datetime as dt # pour transformation des types date en type datetime +import uuid # pour générer des uuid +from geoalchemy2 import Geometry # pour les insertions de geometry + +import sys, inspect +# realpath() will make your script run, even if you symlink it :) +cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0])) +if cmd_folder not in sys.path: + sys.path.insert(0, cmd_folder) +from tools.pr_zh import to_tzh,to_t_,to_cor_,add_bib_cb,update_t_ownership + +#lecture de la BD ZHRMC et des fichiers shapefile qui y sont liés +# DIR = '/home/colas/Documents/13_CEN74/medwet2gn_ZH/' +DIR = os.path.dirname(cmd_folder+'/') +os.chdir(DIR) +db_file = './bdd/Inventaires ZHRMCvivante.mdb' +shp_siteinfo = 'Cartographie_generee/CarteZHGénérée.shp' +#création d'une connection à la bd de géonature +usr = 'geonatadmin' +pdw='g;gCEN74' +host='178.33.42.38' +bd ='geonature2db' + +eng = URL.create('postgresql+psycopg2',username=usr,password=pdw,host=host,database=bd) +conn = create_engine(eng) + +#création d'une connection à la bd foncière +# Utile en cas d'utilisation de la fonction update_t_ownership() +usr_f = 'admin_ra' +pdw_f ='adminRAsig' +host_f ='91.134.194.223' +bd_f = 'bd_cen_74' +eng_f = URL.create('postgresql+psycopg2',username=usr_f,password=pdw_f,host=host_f,database=bd_f,) +con_f = create_engine(eng_f) + +LB_IGNORE = ['99','9','9_1'] +LB_DICT = { + '53_1':'53', + '89.26':'89.23', +} +############################################## +# fonctions essentielles pour requêter ZHRMC # +############################################## +def _aggregate_value__(column1,column2,sep='\n'): + """ + aggregation de la colonne 1 et 2 dans la colonne 1 + """ + df = pd.merge(column1,column2,left_index=True,right_index=True,how='left') + df['join'] = [ + sep.join([y for y in [row.iloc[0],row.iloc[1]] if not pd.isna(y)]) + if not (pd.isna(row.iloc[0]) and pd.isna(row.iloc[1])) else None + for x,row in df.iterrows() + ] + + return df['join'] + + +def t_nomenclature_ZH(bib_mnemo=None): + """@bib_mnemo : ['CRIT_DELIM', 'CRIT_DEF_ESP_FCT', 'ACTIV_HUM', 'LOCALISATION', + 'IMPACTS', 'EVAL_GLOB_MENACES', 'ENTREE_EAU', 'SORTIE_EAU', + 'PERMANENCE_ENTREE', 'PERMANENCE_SORTIE', 'SUBMERSION_FREQ', + 'SUBMERSION_ETENDUE', 'TYPE_CONNEXION', 'FONCTIONNALITE_HYDRO', + 'FONCTIONNALITE_BIO', 'FONCTIONS_HYDRO', 'FONCTIONS_QUALIF', + 'FONCTIONS_CONNAISSANCE', 'FONCTIONS_BIO', 'VAL_SOC_ECO', + 'INTERET_PATRIM', 'ETAT_CONSERVATION', 'STATUT_PROPRIETE', + 'PLAN_GESTION', 'INSTRU_CONTRAC_FINANC', 'NIVEAU_PRIORITE', + 'SDAGE', 'SAGE', 'STATUT_PROTECTION', 'IMPACT_TYP', + 'OCCUPATION_SOLS', 'NIV_PROTECTION', 'PROTECTION_TYP', + 'TYP_DOC_COMM', 'TYP_CLASS_COMM', 'STRAT_GESTION', 'HIERARCHY'] + """ + sch = 'ref_nomenclatures' + tab = ['t_nomenclatures','bib_nomenclatures_types'] + sql = """ + SELECT + a.id_nomenclature, + a.cd_nomenclature, + a.mnemonique mnemo, + a.label_default as label, + a.definition_default def, + b.mnemonique bib_mnemo, + b.label_default bib_label, + b.definition_default bib_def, + a.active + FROM {sch}.{tab0} a + JOIN {sch}.{tab1} b USING (id_type) + WHERE b."source" = 'ZONES_HUMIDES' + """.format(sch=sch,tab0=tab[0],tab1=tab[1]) + if bib_mnemo is not None: + sql += " AND b.mnemonique = '%s'"%bib_mnemo + return pd.read_sql_query(sql,conn).replace({r'[’]':"'"},regex=True) + +#Définition de Standardized_t_nomenclature_ZH(): permet de sélectionner les dictionnaires reliés à t_zh, et d'en faire un dataframe virtuel pour la fonction merge_dics(). +def Standardized_t_nomenclature_ZH(bib_mnemo=None): + sch = 'ref_nomenclatures' + tab = ['t_nomenclatures','bib_nomenclatures_types'] + sql = """ + SELECT + a.id_nomenclature, + --a.cd_nomenclature, + a.mnemonique mnemo, + a.label_default as label, + a.definition_default def, + b.mnemonique bib_mnemo, + b.label_default bib_label, + b.definition_default bib_def, + a.active + FROM {sch}.{tab0} a + JOIN {sch}.{tab1} b USING (id_type) + WHERE b."source" = 'ZONES_HUMIDES' + """.format(sch=sch,tab0=tab[0],tab1=tab[1]) + if bib_mnemo is not None: + sql += " AND b.mnemonique = '%s'"%bib_mnemo + df = pd.read_sql_query(sql,conn) + df['mnemo'] = df['mnemo'].str.lower() + df['mnemo'] = df['mnemo'].replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[ ]':"", + r'[–]':"-" + },regex=True) + df = df[['mnemo','id_nomenclature']] + return df + +#Définition de la fonction merge_dics: permet la jointure d'un dictionnaire medwet donné avec la table t_nomenclature. Tout ce qu'il y a à faire est d'y entrer le nom du dictionnaire de medwet dans les paramètres, +#càd à la place de 'dic'. +#Attention cependant: si cette fonction marche avec la plupart des dictionnaires medwet, quelquefois des erreurs se produisent sans qu'elles vous soient signalées. Pensez à bien contrôler vos résultats issus de +#merge_dics pour être sûr qu'ils soient bons. +def merge_dics(dic,bib_mnemo=None,how='left'): + re_dic = { + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[ ]':"", + } + if dic=='DicGen-SDA': + sd_dic = {r'versant':'',r'bas-fond ':'bas fonds '} + re_dic = {**sd_dic,**re_dic} + df1 = Standardized_t_nomenclature_ZH(bib_mnemo=bib_mnemo) + df2 = mdb.read_table(db_file,dic) + df2['DESCR'] = df2['DESCR']\ + .str.lower()\ + .replace(re_dic,regex=True) + + df = pd.merge(df1,df2,how=how,left_on='mnemo',right_on='DESCR') + if bib_mnemo is None: + df = df[df['DESCR'].notna()] + df.drop(columns=['DESCR']) + return df + +#Enlever les colonnes vides des tables Medwet +def _del_na_col(df): + for col in df.columns: + tmp = pd.notna(df[col]).unique() + if len(tmp) == 1 and False in tmp: + del df[col] + return df + +##################################################################### +# Récupérer, créer ou recréer des dictionnaires Medwet to Geonature # +##################################################################### + +# Création données bib_actions. +# Dictionnaire d'actions réalisé à partir des données homogénéïsées de SITE_RAP4, +# avec l'aide d'Aude Soureillat. +def get_bib_actions(): + dic_actions = {"name":["Zone humide à prendre en compte dans les documents d'urbanisme", + "Prospections faune/flore complémentaires nécessaires", + "Cartographie des habitats d'intérêt nécessaire", + "Pointage des espèces à enjeux nécessaire", + "Meilleure connaissance du fonctionnement hydraulique nécessaire", + "Contours de la zone humide à préciser", + "Maintien de la zone en l'état", + "Maintenir une gestion agricole compatible avec les enjeux du site", + "Gérer la végétation par des actions de bûcheronnage", + "Gérer la végétation par des actions de débroussaillage", + "Gérer la végétation par des actions de broyage", + "Gérer la végétation par des actions de fauche", + "Réduire les amendements agricoles en périphérie de la zone humide", + "Tamponner les apports du bassin versant amont en plantant des haies en périphérie et/ou des bandes enherbées", + "Limiter le pâturage et le sur-piétinement", + "Aménager des abreuvoirs déportés", + "Gérer la végétation par du pâturage extensif", + "Ne pas retourner les sols", + "Creuser des mares", + "Curage nécessaire", + "Adapter les pratiques sylvicoles", + "Gérer la fréquentation", + "Valorisation pédagogique à prévoir", + "Retirer les remblais", + "Gérer les espèces invasives", + "Restauration hydraulique nécessaire", + "Mieux gérer les aménagements et activités au sein du bassin versant"]} + df=pd.DataFrame(dic_actions) + return df + +######################################################################################## +# Remplissage des tables de correspondance de Géonature à partir des données de Medwet # +######################################################################################## + +#Récuparation données pour_cor_zh_ref +def get_cor_zh_ref(): + #lecture SITEREF + dfm = mdb.read_table(db_file,'SITEREF') + #lecture t_references + dfg= pd.read_sql_query("SELECT * FROM pr_zh.t_references",conn) + #jointure des tables t_reference & SITEREF + + df = pd.merge( + dfm, + dfg[['ref_number','id_reference']], + how='left',left_on='REF_NO',right_on='ref_number') + + return df\ + .drop(columns=['REF_NO','ref_number'])\ + .rename(columns={'id_reference':'id_ref'}) + +#Récupération données pour cor_zh_protection +#definition d'un dictionnaire spécial pour DicGen_CNS +def Standardized_dicgenCNS_dic(): # ne se joint pas avec les autres dictionnaires car les étiquettes sont spéciales: nécessite une retransformation individuelle. + df=mdb.read_table(db_file,'DicGen-CNS') + df['temp'] = df['CODE'] + ' - ' + df['DESCR'] + df = df[['CODE','temp']] + df['temp']=df['temp'].str.lower() + df['temp']=df['temp'].replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[ ]':"", + },regex=True) + return df + +#jointure avec t_nomenclature +def merge_dicgenCNS(): + df1 = Standardized_t_nomenclature_ZH() + df2 = Standardized_dicgenCNS_dic() + df = pd.merge(df1,df2,how='left',left_on='mnemo',right_on='temp') + df = df[df['temp'].notna()] + df.drop(columns=['temp']) + return df + +#récupération donnnées cor_zh_protection +def get_cor_zh_protection(): + #lecture SITECONS + df = mdb.read_table(db_file,'SITECONS') + + # Sauvetage de données : rectification d'une erreur utilisateur + # présente dans la colonne status_cod pour sauver la donnée + df['STATUS_COD'] = df['STATUS_COD'].replace('3' , '36') + #lecture dicgen-CNS jointure avec t_nomenclature + dic = merge_dicgenCNS() + #jointure du dictionnaire avec SITEREF + df = pd.merge(df,dic, how='left',left_on='STATUS_COD',right_on='CODE') + #nettoyage colonnes en trop + df.drop(columns=['STATUS_COD', 'DESIGNATIO', 'LEGISLAT', 'COVER', + 'STATUS_MEMO', 'mnemo','CODE', 'temp'],inplace=True) + df.rename(columns={'id_nomenclature':'id_protection'}, inplace=True) + cor_ = pd.read_sql_table('cor_protection_level_type',conn,'pr_zh') + dic2 = dict(zip(cor_.id_protection_status,cor_.id_protection)) + df.id_protection.replace(dic2,inplace=True) + return df + + +def ref_habitats(typo=None): + sql = """ + SELECT cd_hab,lb_code,lb_hab_fr + FROM ref_habitats.habref + JOIN ref_habitats.typoref USING(cd_typo) + """ + if typo is not None: + sql += " WHERE lb_nom_typo = '%s'"%typo + sql += ';' + return pd.read_sql_query(sql, conn) + + +def __get_CBiot__(): + dicbio = mdb.read_table(db_file,'DicGen-CBio') + df = mdb.read_table(db_file,'CATCHCBiot') + #nettoyage des colonnes inutiles: + df.drop(columns=['MAPINFO_ID'],inplace=True) + df.rename(columns={'CB_COD':'lb_code'}, inplace=True) + return df + +def __filter_lb_error__(serie=None,ignore=None,replace=None): + """ + Identifie les codes habitats ne figurant pas + dans le référentiel CORINE biotope présents + dans le table `ref_habitats.habref` + """ + + cbio = __get_CBiot__() if serie is None else serie + if serie is not None : + cbio = pd.DataFrame({'lb_code':serie}) + if ignore is not None: + cbio = cbio[~cbio.lb_code.isin(ignore)] + if replace is not None: + cbio.lb_code.replace(replace,inplace=True) + ref = ref_habitats('CORINE_biotopes') + + not_cbio = cbio[~cbio.lb_code.isin(ref.lb_code)] + is_cbio = cbio[cbio.lb_code.isin(ref.lb_code)] + + return {'is_cbio':is_cbio,'not_cbio':not_cbio} + + +def __filter_lb_code__(serie, join_ch=False): + """ + Identifie les codes CORINE biotope présents + dans le table `pr_zh.bib_cb` + """ + ind = serie.index.name + to_corzhcb = serie.reset_index() + + bib_cb = pd.read_sql_table('bib_cb',conn,'pr_zh') + bib_cb.lb_code = bib_cb.lb_code.astype(str) + + not_bib = to_corzhcb[~to_corzhcb.lb_code.isin(bib_cb.lb_code)] + to_corzhcb.drop(not_bib.index,inplace=True) + not_bib = not_bib\ + .groupby(ind).agg(','.join) + + if join_ch: + to_corzhcb = to_corzhcb.merge( + bib_cb.drop(columns='humidity'),on='lb_code',how='left' + ) + + return to_corzhcb, not_bib + + + +#Récupération données cor_zh_cb +def get_cor_zh_cb(out_notbib=False,ignore=None,replace=None): + + cbio_filer = __filter_lb_error__(ignore=ignore,replace=replace) + cbio = cbio_filer['is_cbio'].set_index('SITE_COD') + to_corzhcb, not_bib = __filter_lb_code__(cbio) + to_corzhcb.drop_duplicates(inplace=True) + + if out_notbib: + return to_corzhcb,not_bib + else: + return to_corzhcb + + +#Récupération données cor_zh_corine_cover +def get_cor_zh_corine_cover(): + #lecture dicgen-clc + dic = merge_dics('DicGen-CLC') + #lecture catchclc + df = mdb.read_table(db_file,'CATCHCLC') + #jointure du dictionaire et de catchclc + df = pd.merge(df,dic,how="left",left_on="LC_COD",right_on="CODE") + #nettoyage des colonnes inutiles + df.drop(columns=['LC_COD','mnemo', 'CODE', + 'DESCR','COVER'],inplace=True) + df.rename(columns={'CATCH_COD':'SITE_COD','id_nomenclature':'id_cover'}, inplace=True) + return df + + +#Récupération données cor_zh_lim_fs #pas de données dans medwet apparently +def get_cor_zh_lim_fs(): + #lecture dictionnaire + dic = merge_dics('DicGen-LIM_1','CRIT_DEF_ESP_FCT') + #lecture sitelim + # df=mdb.read_table(db_file,'SITELIM') Destiné à t_zh + df = mdb.read_table(db_file,'EFLIM') + #correspondance des types des colonnes qui vont servir à la jointure + dic['CODE'] = dic['CODE'].astype(int) + df['LIM1_COD'] = df['LIM1_COD'].astype(int) + #jointure du dictionnaire et SITELIM + df = pd.merge(df,dic,how="left",left_on="LIM1_COD",right_on="CODE") + #nettoyage des colonnes inutiles + df.drop(columns=['LIM1_COD', 'mnemo','CODE', 'DESCR'],inplace=True) + df.rename(columns={'SIT_COD':'SITE_COD','id_nomenclature':'id_lim_fs'},inplace=True) + + return df + +######################################################### +#Recreate most important tables from Medwet to Geonature# +######################################################### + +#Récupération données utilisateurs.bib_organismes + +def get_utilisateurs_bib_organismes(): + table = 'bib_organismes' + return pd.read_sql_table(table,conn,'utilisateurs') + +def to_bib_organismes(): + # df = pd.DataFrame({ + # 'nom_organisme':['CORA Savoie', + # 'Apollon 74', + # 'CPNS', + # 'AGRESTIS', + # 'SIVM Haut-Giffre', + # 'TEREO', + # 'SAGE Environnement', + # 'ONEMA', + # 'Ecotope', + # 'ONF' + # ], + # 'id_organisme':['3','4','5','6','7','8','9','10','11','12']}) + table = 'bib_organismes' + df = pd.DataFrame({ + 'nom_organisme':[ + 'CORA Savoie', + 'Apollon 74', + 'CPNS', + 'AGRESTIS', + 'SIVM Haut-Giffre', + 'TEREO', + 'SAGE Environnement', + 'ONEMA', + 'Ecotope', + 'ONF' + ]}) + bib = get_utilisateurs_bib_organismes() + df = df[~df.nom_organisme.isin(bib.nom_organisme)] + df.to_sql(table,conn,'utilisateurs',if_exists='append',index=False) + + +#Récupération données utilisateurs.t_roles +def get_utilisateurs_t_roles(): + dic1 = { + 'id_role' : ['0','1_2','13','16','17','18','19','2_8','21','7','72','73', + 'ASTERS0002','ASTERS0003','ASTERS0004','ASTERS0005','ASTERS0006','ASTERS0007','ASTERS0008', + '24','25','27','28','29','30','31', '32','33','ASTERS0001','26','23','22'], + 'nom_role' : ['AUTRE','PIENNE','MERY','GUILLEMOT', + 'HERVIEUX','MINSSIEUX','DUBOIS','BOURON','ROUX-VOLLONS','CHOLET','BAL&PIENNE','AVRILIER&BAL' + ,'JORDAN','GAY','LOUIS','MARCHIONINI','MARTIN','ALTHEN','LOPEZ-PINOT','PUXEDDU','ROLLET' + ,'CAILLETTE','BOURGOIN','FAUCON-MOUTON','PERRET','MACCAUD','ONF','CHABERT','BAL','SOUREILLAT','DEGROOT','SOUQUET-BASIEGE'], + 'prenom_role' : ['-','Marc','Luc','Alexandre','Mathilde', 'Elise','Cécile','Manuel','Céline', + 'Jérémie','Bernard&Marc','Jean-Noël&Bernard','Denis','Nicolas','Jean-Claude', + 'Laurence','Thomas','Aurore','Dominique','Mathieu','Olivier','Mariane','Laurent','Philippe', + 'Patrick','Alexandre','Agents','Chloé','Bernard','Aude','Marion','Jules']} #liste des personnes présentes dans MWDKC. dic à lui tout seul car, noms et prénoms mis dans la même colonne dans medwet. + df = mdb.read_table(db_file,'MWDKC') + dic = pd.DataFrame(dic1) + df = pd.merge(df,dic,how='left', left_on='CODE', right_on='id_role') + df.drop(columns=['DESCR','id_role'],inplace=True) + dic2 = { + 'ORGANISAT':{ #homogénéisation des personnes entrées dans la base medwet. + np.NaN:'Autre', + 'ASTERS':'Asters, CEN Haute-Savoie', + 'Asters':'Asters, CEN Haute-Savoie', + 'ASTERS - CEN74':'Asters, CEN Haute-Savoie', + 'CORA Savoie':'CORA Savoie', + 'CORA Savoie et ASTERS':'CORA Savoie', + 'ASTERS / CORA Savoie':'CORA Savoie', + 'Office National des Forêts':'ONF' + } + } + bib_org = get_utilisateurs_bib_organismes() + dic3 = dict(zip(bib_org.nom_organisme,bib_org.id_organisme)) + + df.replace(dic2,inplace=True) + df.ORGANISAT.replace(dic3,inplace=True) + df.drop(columns=['ADDRESS', 'CITY', 'COUNTRY', 'PHONE', 'FAX'],inplace=True) + df.rename(columns={'ORGANISAT':'id_organisme','E_MAIL':'email'},inplace=True) + + return df + + +def get_bib_oragnismes(): + return pd.read_sql_table('bib_organismes',conn,'utilisateurs') + + +def get_t_role(with_orga=False): + df = pd.read_sql_table('t_roles',conn,'utilisateurs') + if with_orga: + df = pd.merge(df,get_bib_oragnismes(),on='id_organisme') + return df + + +def keep_insert_role(df): + t_role = get_t_role() + cols = ['nom_role','prenom_role','id_organisme'] + df = pd.merge(df,t_role[cols+['id_role']],on=cols,how='left') + return df[df.id_role.isna()].drop(columns=['id_role']) + + +#Récupération données t_zh +def prepare_SITEINFO(): + #ouverture de SITEINFO: + df = mdb.read_table(db_file,'SITEINFO') + + #suppression des colonnes vides + df = _del_na_col(df) + df.drop(columns=['DEPT','NUM','CATCH_COD','COMPLE_COD','LENGTH','SITE_LOC','SITE_MANA'],inplace=True) + return df + +#ajouter les colonnes qui n'existent pas dans SITEINFO mais qui dépendent d'autres colonnes qui seront implémentées (remark_is_other_inventory par exemple) +def add_t_zh_cols(): + df = prepare_SITEINFO() + df['id_diag_hydro'] = 759 # corresp à "pas évalué" dans t_nomenc + df['id_diag_bio'] = 763 # corresp à "pas évalué" dans t_nomenc + return df + +#conversion des codes medwet en code geonature (via t_nomenclature ou dictionnaire définit sur-mesure) +# conversion id_frequency +def get_t_zh_id_frequency(freq,on_dict='CODE'): + + dic = merge_dics('DicGen-FREQ') + dic_freq = dict(zip(dic[on_dict],dic.id_nomenclature)) + + return freq.replace(dic_freq) + +# conversion id_spread +def get_t_zh_id_spread(spread,on_dict='CODE'): + + dic = merge_dics('DicGen-ETEND') + dic_sprea = dict(zip(dic[on_dict],dic.id_nomenclature)) + + return spread.replace(dic_sprea) + + # conversion id_connexion +def get_t_zh_id_connexion(connex,on_dict='CODE'): + + dic = merge_dics('DicGen-CONNEX') + dic_conex = dict(zip(dic[on_dict],dic.id_nomenclature)) + + return connex.replace(dic_conex) + + +# Récupération données t_zh +def get_t_zh(): + table = 't_zh' + + t_role = get_t_roles().sort_index() + org = get_id_org_przh() + dicsda = merge_dics('DicGen-SDA',bib_mnemo='SDAGE') + iddefaultsda = dicsda[dicsda.mnemo=='nondetermine'].id_nomenclature.values[0] + dic_sda = dict(zip(dicsda.CODE,dicsda.id_nomenclature)) + dic_org = dict(zip(org.abbrevation,org.id_org)) + dic_role = merge_dic_role() + + #renommage colonnes + dic_cols = { + 'SITE_COD':'code', + 'SITE_NAME':'main_name', + 'OTHER_NAME':'secondary_name', + 'SITE_LIM':'remark_lim', + 'EF_REM':'remark_lim_fs', + 'AREA_WET':'area', + 'AREA_EF':'ef_area', + 'EDITDATE':'create_date', + 'SITE_DIAGNOS':'remark_diag', + 'UPDDATE':'update_date', + 'SITE_RAP1':'remark_eval_function', + 'SITE_RAP2':'remark_eval_heritage', + 'SITE_RAP3':'remark_eval_thread', + 'SITE_RAP4':'remark_eval_actions', + 'MEMO_ACTIV':'global_remark_activity', + 'ZH_REMARKS':'remark_pres', + } + siteinfo = prepare_SITEINFO()\ + .rename(columns=dic_cols) + crit_delim,_cor_lim_list = cor_lim_list(siteinfo.code) + siteinfo = siteinfo.merge(crit_delim, on='code') + + siteinfo['create_date'] = pd.to_datetime(siteinfo['create_date']) + siteinfo['update_date'] = pd.to_datetime(siteinfo['update_date']) + siteinfo.loc[siteinfo.update_date.isna(),'update_date'] = siteinfo[siteinfo.update_date.isna()].create_date + siteinfo['create_author'] = siteinfo.COMPILER.replace(dic_role) + siteinfo['update_author'] = siteinfo.COMPILER.replace(dic_role) + siteinfo['id_org'] = siteinfo.ORG.replace(dic_org) + siteinfo['id_sdage'] = siteinfo.SDAGE_COD.replace(dic_sda) + siteinfo.loc[ + ~siteinfo.id_sdage.isin(dicsda.id_nomenclature), + 'id_sdage'] = iddefaultsda + + siteinfo['id_frequency'] = get_t_zh_id_frequency(siteinfo.FREQUENCE) + siteinfo['id_spread'] = get_t_zh_id_spread(siteinfo.ETENDUE) + siteinfo['id_connexion'] = get_t_zh_id_connexion(siteinfo.CONNEX) + + # Récupération des "AUTRES INVENTAIRES" + siteinfo.loc[~siteinfo.OTHER_INV.str.startswith('ZNIEFF',na=False),'remark_is_other_inventory'] = siteinfo[ + ~siteinfo.OTHER_INV.str.startswith('ZNIEFF',na=False) + ].OTHER_INV + siteinfo['is_other_inventory'] = siteinfo['remark_is_other_inventory'].notna() + + # Récupération nb_hab d'Habitats patrimoniaux + cbio_filer = __filter_lb_error__() + cbio = cbio_filer['is_cbio'] + notcbio = cbio_filer['not_cbio']\ + .groupby('SITE_COD').agg(','.join) + cbio_ch, not_bib = __filter_lb_code__(cbio.set_index('SITE_COD'),join_ch=True) + siteinfo = pd.merge( + siteinfo, + cbio_ch.groupby('SITE_COD',dropna=False)\ + .agg({'is_ch':sum})\ + .rename(columns={'is_ch':'nb_hab'}), + left_on='code', + right_index=True, + how='left' + ) + + # Aggregation des habitats considérés NON-HUMIDE + not_bib.lb_code = 'Autre(s) habitat(s) CORINE BIOTOPE décrit(s) :\n' + not_bib.lb_code + notcbio.lb_code = 'Autre(s) habitat(s) décrit(s) :\n' + notcbio.lb_code + siteinfo.remark_pres = _aggregate_value__( + siteinfo.set_index('code').remark_pres, + not_bib.lb_code, + sep='\n' + ) + siteinfo.remark_pres = _aggregate_value__( + siteinfo.set_index('code').remark_pres, + notcbio.lb_code, + sep='\n' + ) + + # JOINDRE GEOM + carteGen_path = './bdd/CarteZHGénérée.shp' + carteGen = gpd.read_file(carteGen_path)\ + .drop(columns=['Site','Surface'])\ + .rename_geometry('geom')\ + .rename(columns={'NewNumZone':'code'}) + # Correction des géométries invalides + if not carteGen.is_valid.all(): + carteGen.loc[~carteGen.is_valid,'geom'] = \ + carteGen[~carteGen.is_valid].geom.buffer(0) + # Jointure des géometries + df = siteinfo.merge(carteGen,on='code')\ + .set_geometry('geom') + df['area'] = round(df.geom.area,2) + + # reprojection + if hasattr(df,'crs') and df.crs.srs != 'EPSG:4326': + df.to_crs(4326,inplace=True) + + tzh_cols = recup_cols_table(table,conn) + lst_cols = df.columns[df.columns.isin(tzh_cols)] + to_tzh = df[lst_cols].copy() + print('Columns non intégrés : %s'%str([x for x in tzh_cols if x not in lst_cols])) + + return to_tzh,_cor_lim_list + + +def recup_cols_table(table,con,schema='pr_zh'): + lst_cols = con.dialect.get_columns(con,table,schema) + return [x['name'] for x in lst_cols] + + +def get_SITELIM(): + #lecture dictionnaire + dic = merge_dics('DicGen-LIM') + + #lecture sitelim + df = mdb.read_table(db_file,'SITELIM') + dic['CODE'] = dic['CODE'].astype(int) + df['LIM_COD']=df['LIM_COD'].astype(int) + + #jointure du dictionnaire et SITELIM + df = pd.merge(df,dic,how="left",left_on="LIM_COD",right_on="CODE") + df.drop(columns=['LIM_COD', 'mnemo','CODE', 'DESCR'],inplace=True) + df.rename(columns={'SIT_COD':'code','id_nomenclature':'id_lim'},inplace=True) + + return df + + +def cor_lim_list(SITE_COD): + """Remplis la table pr_zh.cor_lim_list et retourn les uuid associés + """ + + delim = t_nomenclature_ZH(bib_mnemo='CRIT_DELIM') + delim.mnemo = delim.mnemo.str.replace(r'.\(.*\)','',regex=True) + dict_delim = dict(zip(delim.mnemo.str.lower(),delim.id_nomenclature)) + + + crit_delim = get_SITELIM() + app_site = SITE_COD[~SITE_COD.isin(crit_delim.code)] + if not app_site.empty: + app_site = pd.DataFrame({'code':app_site}) + app_site['id_lim'] = delim[delim.mnemo=='Non déterminé'].id_nomenclature.values[0] + crit_delim = pd.concat([crit_delim,app_site]) + + + genuuid = crit_delim.groupby('code').agg(list).reset_index() + genuuid['id_lim_list'] = [uuid.uuid4() for x in genuuid.index] + + _cor_lim_list = genuuid.drop(columns=['code']).explode('id_lim') + uuidreturn = genuuid.drop(columns=['id_lim']) + + return uuidreturn,_cor_lim_list + + +def get_t_roles(id_role=None): + table = 't_roles' + t_roles = pd.read_sql_table( + table,conn,'utilisateurs',['id_role'],columns=['nom_role','prenom_role','id_organisme'] + ).replace({r'[’]':"'"},regex=True).sort_index() + if id_role: + t_roles = t_roles.iloc[[id_role]] + return pd.merge(t_roles,get_bib_organismes(),'left',left_on='id_organisme',right_index=True) + + +def get_bib_organismes(schema='utilisateurs'): + table = 'bib_organismes' + return pd.read_sql_table( + table,conn,schema,['id_organisme'],columns=['nom_organisme'] + ).replace({r'[’]':"'"},regex=True) + + +def get_id_org_przh(): + return pd.read_sql_table('bib_organismes',conn,'pr_zh') + +def remove_special_char(obj,space=False): + dict_char = { + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + # r'[ ]':"", + r'[–]':"-" + } + if space: + dict_char = {**dict_char, **{r'[ ]':""}} + return obj.replace(dict_char,regex=True) + + +def to_bib_organismes_przh(dicorga,first_time=False): + """Insertion des organisme dans la table pr_zh + @dicorga : dict. {'abbrev':'nom de la structure', ...} + """ + table = 'bib_organismes' + + if first_time: + sql = 'DELETE FROM pr_zh.%s'%table + with conn.begin() as cnx: + cnx.execute(sql) + + isin_db = pd.read_sql_table( + table,conn,'pr_zh',['id_org'],columns=['name'] + ).replace({r'[’]':"'"},regex=True) + + insert_from = pd.DataFrame({'nom':dicorga.values(),'abbrev':dicorga.keys()}) + + to_insert = insert_from[~insert_from.nom.str.lower().isin(isin_db.name.str.lower())] + to_insert.abbrev = remove_special_char(to_insert.abbrev,space=True)\ + .str.upper()\ + .str[:6]\ + .fillna('XXXXXX') + + to_insert\ + .rename(columns={'nom':'name','abbrev':'abbrevation'})\ + .to_sql(name=table,con=conn,schema='pr_zh',if_exists='append',index=False) + +#Récupération données t_actions +#definition d'un dictionnaire sur-mesure suite à l'incompatibilité avec merge_dics() +def Standardized_DICACT_dic(): # ne se joint pas avec les autres dictionnaires car les étiquettes sont spéciales: nécessite une retransformation individuelle. + df=mdb.read_table(db_file,'DICACT') + df['temp'] = df['CODE'] + ' - ' + df['DESCR'] + df = df[['CODE','temp']] + df['temp']=df['temp'].str.lower() + df['temp']=df['temp'].replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[ ]':"", + },regex=True) + return df + +#jointure avec t_nomenclatures +def merge_dicact(): + df1 = Standardized_t_nomenclature_ZH() + df2 = Standardized_DICACT_dic() + df = pd.merge(df1,df2,how='left',left_on='mnemo',right_on='temp') + df = df[df['temp'].notna()] + df.drop(columns=['temp']) + return df + + +def get_cor_impact_types(): + return pd.read_sql_table('cor_impact_types',conn,'pr_zh') + + +def get_t_activity(): + #lecture du dictionnaire dicgen-pos + dicpos = merge_dics('DicGen-POS') + dicpos = dict(zip(dicpos.CODE,dicpos.id_nomenclature)) + #lecture du dictionnaire dicgen-imp + + no_imp = t_nomenclature_ZH('IMPACTS')\ + .merge(merge_dics('DicGen-IMP')[['id_nomenclature','CODE']],on='id_nomenclature')\ + .rename(columns={'id_nomenclature':'id_impact'}) + di_imp = pd.merge(get_cor_impact_types(),no_imp[['id_impact','mnemo','CODE']],on='id_impact') + di_imp.CODE = di_imp.CODE.str.strip() + dicimp = dict(zip(di_imp.CODE,di_imp.id_cor_impact_types)) + + #lecture du dictionnaire DIDACT + dicact = merge_dicact() + dicact = dict(zip(dicact.CODE,dicact.id_nomenclature)) + #jointure avec la table SITEACT + siteimp = mdb.read_table(db_file,'SITEIMP') # IMPACT_COD + # siteimp.reset_index(inplace=True) + siteimp = _del_na_col(siteimp) + + normimp = dict(zip([x+'.0' if x.find(r'.') == -1 else x for x in no_imp.CODE],no_imp.CODE)) + df = mdb.read_table(db_file,'SITEACT').drop(columns=['COORD_X','COORD_Y']) + df.IMPACT_COD = df.IMPACT_COD.astype(str) + df.IMPACT_COD.replace(normimp,inplace=True) + df = _del_na_col(df) + + # Drop SITEIMP_IMPACT_COD who is in SITEACT_IMPACT_COD + siteimp['ISIN'] = [False if df[(df.SITE_COD==siteimp.SITE_COD[x])&(df.ACTIV_COD==siteimp.ACTIV_COD[x])&(df.IMPACT_COD==siteimp.IMPACT_COD[x])].empty else True + for x in siteimp.index + ] + siteimp_filter = siteimp[~siteimp.ISIN].drop(columns=['ISIN']) + + + activ = df.merge(siteimp_filter,on=['SITE_COD','ACTIV_COD'],how='outer',suffixes=('','_y'))\ + .sort_values(['SITE_COD','ACTIV_COD']) + # Récupération des codes SITEIMP, précisant parfois SITEACT + activ['IMPACT_COD'] = [activ.IMPACT_COD_y[x] if (activ.IMPACT_COD[x] is np.NaN) else activ.IMPACT_COD[x] for x in activ.index] + activ['IMPACT_COD'] = [activ.IMPACT_COD_y[x] if (activ.IMPACT_COD[x]=='0') and (activ.IMPACT_COD_y[x] is not np.NaN) else activ.IMPACT_COD[x] for x in activ.index] + # Suppression des codes SITEIMP existants + activ['IMPACT_COD_y'] = [None if activ.IMPACT_COD_y[x] == activ.IMPACT_COD[x] else activ.IMPACT_COD_y[x] for x in activ.index] + activ_a = activ.drop(columns=['IMPACT_COD_y']).drop_duplicates() + activ_b = activ.copy().drop(columns=['POSITION','REMARKS','IMPACT_COD'])\ + .dropna(subset=['IMPACT_COD_y'])\ + .rename(columns={'IMPACT_COD_y':'IMPACT_COD'}) + # .drop_duplicates() + activ_all = pd.concat([activ_a,activ_b]) + + dict_imp = t_nomenclature_ZH('LOCALISATION') + iddefault = dict_imp[dict_imp.label=='Non déterminée'].id_nomenclature.values[0] + if not activ_all[activ_all.POSITION.isna()].empty: + activ_all[activ_all.POSITION.isna()].to_csv(path + '/t_activity_NA.csv',index=False) + activ_all.POSITION.fillna(iddefault,inplace=True) + + activ_all.POSITION.replace(dicpos,inplace=True) + activ_all.ACTIV_COD.replace(dicact,inplace=True) + activ_all.IMPACT_COD.replace(dicimp,inplace=True) + + + t_activ = activ_all\ + .groupby(['SITE_COD','ACTIV_COD'],dropna=False)\ + .agg(list)\ + .reset_index()\ + .rename(columns={ + 'ACTIV_COD': 'id_activity', + 'POSITION' : 'id_position', + 'IMPACT_COD' : 'id_cor_impact_types', + 'REMARKS' : 'remark_activity' + }) + t_activ.id_position = [list(set(x)) for x in t_activ.id_position ] + t_activ.id_position = [ + x[0] if len(x)==1 else [y for y in x if y != iddefault][0] + if len([y for y in x if y != iddefault]) == 1 else iddefault + for x in t_activ.id_position + ] + t_activ['remark_activity'] = [ + '\n'.join( + list(dict.fromkeys([item for item in x if not(pd.isna(item)) == True])) + ) for x in t_activ['remark_activity'] + ] + t_activ.remark_activity = t_activ.remark_activity\ + .str.strip()\ + .replace({' ',' '},regex=True)\ + .replace({'':None}) + t_activ['id_impact_list'] = [uuid.uuid4() for x in t_activ.index] + + + t_activity = t_activ[['SITE_COD','id_activity', 'id_position','id_impact_list','remark_activity']] + cor_impact_list = t_activ[['id_impact_list','id_cor_impact_types']]\ + .explode('id_cor_impact_types')\ + .drop_duplicates() + + return t_activity,cor_impact_list + + +#Récupération données t_fonctions +#dictionnaire fonctions hydrologiques +def Standardized_DicGen_FVI_dic(): # ne se joint pas avec les autres dictionnaires car les étiquettes sont spéciales: nécessite une retransformation individuelle. + df=mdb.read_table(db_file,'DicGen-FVI') + df['temp'] = df['FVI_CODE'] + ' - ' + df['DESCR'] + df = df[['FVI_CODE','temp']] + df['temp']=df['temp'].str.lower() + df['temp']=df['temp'].replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[ ]':"", + },regex=True) + return df + +#jointure avec t_nomenclature +def merge_DicGen_FVI(): + df1 = Standardized_t_nomenclature_ZH() + df2 = Standardized_DicGen_FVI_dic() + df = pd.merge(df1,df2,how='left',left_on='mnemo',right_on='temp') + df = df[df['temp'].notna()] + df.drop(columns=['temp']) + return df + + +def _get_SITEFVI(func): + #lecture du dictionnaire DicGen-FVI + name_table = 'SITEFVI'+func.upper() + dicgenFVIH = merge_DicGen_FVI() + #jointure avec la table SITEVIH + df = mdb.read_table(db_file,name_table) + df = pd.merge(df,dicgenFVIH,how='left',left_on='FVI_COD',right_on='FVI_CODE') + df = _del_na_col(df) + df.drop(columns=['mnemo','temp','FVI_CODE','FVI_COD'],inplace=True) + #reste à transformer les codes avec les id_zh futurs id de get_t_zh + return df + + +def get_t_functions(clean=True): + #lecture du dictionnaire DicGen-FVI + fvih = _get_SITEFVI(func="H").rename(columns={'MEMO_HYD':'MEMO'}) + fvib = _get_SITEFVI(func="B").rename(columns={'MEMO_BIO':'MEMO'}) + fvis = _get_SITEFVI(func="S").rename(columns={'MEMO_SOCIO':'MEMO'}) + fvip = _get_SITEFVI(func="P").rename(columns={'MEMO_PATRI':'MEMO'}) + fvih['type_func'] = 'HYD' + fvib['type_func'] = 'BIO' + fvis['type_func'] = 'SOCIO' + fvip['type_func'] = 'PATRI' + df = pd.concat([fvih,fvib,fvis,fvip]) + qualif = t_nomenclature_ZH('FONCTIONS_QUALIF') + knowle = t_nomenclature_ZH('FONCTIONS_CONNAISSANCE') + df['id_qualification'] = qualif.loc[qualif.mnemo=='Non évaluée','id_nomenclature'].values[0] + df['id_knowledge'] = knowle.loc[knowle.mnemo=='Lacunaire ou nulle','id_nomenclature'].values[0] + df.rename(columns={'MEMO':'justification','id_nomenclature':'id_function'},inplace=True) + if clean: + df.drop(columns=['type_func','MAPINFO_ID'],inplace=True) + return df + +#Récupération données t_table_héritage +def get_t_table_heritage(): + #lecture dictionnaire dicgen-VIP et jointure avec bib_cb. /!\ ne se merge pas avec utilisteurs.t_nomenclatures, mais avec pr_zh.bib_cb ! + bib = pd.read_sql_query('SELECT * FROM pr_zh.bib_cb',conn) + dic = mdb.read_table(db_file,'DicGen-CBio') + + dict={'1':'10','2':'20','3':'30','4':'40','5':'50','6':'60','7':'70','8':'80','9':'90'} + dic =dic['CODE'].replace(dict) + dic = pd.merge(bib,dic,how='left',left_on='lb_code',right_on='CODE') + #jointure du dictionnaire avec SITEFVIP + df = mdb.read_table(db_file,'SITEFVIP') + df = pd.merge(df,dic,how='left',left_on='FVI_COD',right_on='CODE') + return df + +#Récupération données t_inflow +def get_t_inflow(): + #récupération dictionnaire dicgen-in1 + dicinf1 = merge_dics('DicGen-IN1',bib_mnemo='ENTREE_EAU') + #problème: se mélange avec dicgen-out1 dans t_nomenclatures. Sélection du bon dictionnaire sur les id_nomenclatures + # dicinf1 = dicinf1.loc[dicinf1['id_nomenclature']<=726] + #récupération dictionnaire dicgen-in2 + dicinf2 = merge_dics('DicGen-IN2',bib_mnemo='PERMANENCE_ENTREE') + #problème: se mélange avec dicgen-out2 dans t_nomenclatures. Sélection du bon dictionnaire sur les id_nomenclatures + # dicinf2 = dicinf2.loc[dicinf2['id_nomenclature']<=735] + #jointure des dicttionnaires avec SITEINF + df = mdb.read_table(db_file,'SITEINFL')\ + .rename(columns={'TOPOINFL':'topo'}) + df = pd.merge(df,dicinf1,how = 'left', left_on='INFLOW1', right_on='CODE')\ + .rename(columns={'id_nomenclature':'id_inflow'}) + df = pd.merge(df,dicinf2,how='left',left_on='INFLOW2',right_on='CODE')\ + .rename(columns={'id_nomenclature':'id_permanance'}) + #nettoyage des colonnes en trop + df.drop(columns=['INFLOW1', 'INFLOW2','COORD_X', 'COORD_Y', + 'mnemo_x','CODE_x', 'DESCR_x', 'mnemo_y','CODE_y', 'DESCR_y'],inplace=True) + return df + +#Récupération données t_instruments +def get_t_instruments(): + #récupération dictionnaire dicgen-INS. + dicins = merge_dics('DicGen-INS') + #jointure avec SITEINSTR + df = mdb.read_table(db_file,'SITEINSTR') + df['DATE_STA'] = pd.to_datetime(df['DATE_STA']) + df = pd.merge(df,dicins,how='left', left_on='INSTR_COD',right_on='CODE') + #nettoyage des colonnes en trop + df.drop(columns=['INSTR_COD','mnemo','CODE','DESCR','DATE_END','ORGA_INSTR'],inplace=True) + #renommage des colonnes + df.rename(columns={'id_nomenclature':'id_instrument','DATE_STA':'instrument_date'},inplace=True) + + return df + +#Récupération données t_management_structures +def get_t_management_structures(): # existe pê, à voir avec Aude demain + #lecture sitegest + df = mdb.read_table(db_file,'SITEGEST') + + #remplacement id_structure (manuellement, car ici il n'y a que 2 lignes.) + df['STRUC_COD'].replace('ASTERS0001','1',inplace=True) + df.rename(columns={'STRUC_COD':'id_org'},inplace=True) + return df + + +#Récupération données t_outflow +def get_t_outflow(): + #récupération dictionnaire dicgen-out1 + dicout1 = merge_dics('DicGen-OUT',bib_mnemo='SORTIE_EAU') + #problème: se mélange avec dicgen-in1 dans t_nomenclatures. Sélection du bon dictionnaire sur les id_nomenclatures + # dicout1 = dicout1.loc[dicout1['id_nomenclature']<=726] + #récupération dictionnaire dicgen-out2 + dicout2 = merge_dics('DicGen-OUT2',bib_mnemo='PERMANENCE_SORTIE') + dicout2.loc[dicout2.mnemo=='nondetermine','CODE'] = '0' + dicout2['CODE'] = dicout2['CODE'].astype(int) + #problème: se mélange avec dicgen-in2 dans t_nomenclatures. Sélection du bon dictionnaire sur les id_nomenclatures + # dicout2 = dicout2.loc[dicout2['id_nomenclature']>=735] + #problème: se mélange aussi avec autre dictionnaire dans t_nomenclatures. Sélection du bon dictionnaire sur les id_nomenclatures + # dicout2 = dicout2.loc[dicout2['id_nomenclature']<=942] + #jointure des dicttionnaires avec SITEOUTF + df = mdb.read_table(db_file,'SITEOUTF') + df['PERMANENCE'].fillna( + dicout2.loc[dicout2.mnemo=='nondetermine','CODE'].values[0], + inplace=True + ) + df = pd.merge(df,dicout1,how = 'left', left_on='OUTFLOW', right_on='CODE') + #/!\ attention: Colonne permanence en float64 et non en integer. Conversion en integer + # df['PERMANENCE'] = df['PERMANENCE'].fillna(0).astype(int) + df = pd.merge(df,dicout2,how='left',left_on='PERMANENCE',right_on='CODE') + #nettoyage des colonnes en trop + df.drop(columns=[ + 'OUTFLOW','COORD_X', 'COORD_Y', 'PERMANENCE', + 'mnemo_x','CODE_x', 'DESCR_x', 'mnemo_y','CODE_y','DESCR_y' + ],inplace=True) + #renommage colonnes + df.rename(columns={ + 'TOPOOUTFL':'topo','id_nomenclature_x':'id_outflow','id_nomenclature_y':'id_permanance' + },inplace=True) + return df + + +#Récupération données t_ownership +def get_t_ownership(): + #récupération dictionnaire DicGen-STAT + dicown = merge_dics('DicGen-STAT') + #jointure avec SITESTA + df = mdb.read_table(db_file, 'SITESTA') + df = pd.merge(df,dicown, how='left',left_on='TENUR_COD',right_on='CODE') + #nettoyage des colonnes en trop + df.drop(columns=['DESCR','mnemo','TENUR_COD','CODE'],inplace=True) + df.rename(columns={'id_nomenclature':'id_status','MEMO_TENUR':'remark'},inplace=True) + return df + +def merge_dic_role(): + df = pd.merge( + get_utilisateurs_t_roles(), + get_t_roles().reset_index(drop=False), + on=['nom_role','prenom_role','id_organisme'], + how='left' + ) + return dict(zip(df.CODE,df.id_role)) + + +def to_t_references(): + table = 't_references' + + dic_col_ref = { + 'REF_NO':'ref_number', + 'REFERENCE':'reference', + 'AUTHOR':'authors', + 'TITLE':'title', + 'YEAR':'pub_year', + 'PUBLISHER':'editor', + 'LOCATION':'editor_location', + } + df = mdb.read_table(db_file, 'MWDREF')\ + .rename(columns=dic_col_ref) + + df.loc[df.title.isna(),'title'] = df[df.title.isna()].reference + df.to_sql(name=table,con=conn,schema='pr_zh',if_exists='append',index=False) + + +def OTHERINV_to_tref(): + table = 't_references' + + sitinfo = prepare_SITEINFO().set_index('SITE_COD') + otinv = sitinfo[['OTHER_INV']]\ + .dropna() + otinv = otinv.OTHER_INV.str\ + .split('//',expand=True)\ + .stack().droplevel(-1)\ + .str.strip().to_frame() + otinv.columns = ['title'] + + cren = otinv[otinv.title.str.startswith('CREN')].copy() + cren['authors'] = cren.title.str.split(r' ',1,expand=True)[0] + cren['editor'] = cren.title.str.split(r' ',1,expand=True)[0] + cren['pub_year'] = cren.title.str.split(r'[ |,]',2,expand=True)[1].astype(int) + cren['ref_number'] = cren.title.str.rsplit(r' ',1,expand=True)[1] + cren.drop_duplicates(inplace=True) + + znif = otinv[otinv.title.str.startswith('ZNIEFF')].copy() + znif['ref_number'] = znif.title.str.rsplit(r'n°',1,expand=True)[1] + znif.drop_duplicates(inplace=True) + + cren.to_sql(name=table,con=conn,schema='pr_zh',if_exists='append',index=False) + znif.to_sql(name=table,con=conn,schema='pr_zh',if_exists='append',index=False) + + +def update_t_ownership(con_zh,con_fon): + table = 't_ownership' + + zh = gpd.read_postgis('SELECT id_zh, geom FROM pr_zh.t_zh',con_zh,crs=4326) + zh.to_crs(2154,inplace=True) + sql_fon = ''' + SELECT + pa.par_id, + pr.dnuper, + pr.ddenom, + ccogrm_lib, + pa.geom + FROM cadastre.parcelles_cen pa + JOIN cadastre.lots_cen USING (par_id) + JOIN cadastre.cadastre_cen USING (lot_id) + JOIN cadastre.cptprop_cen USING (dnupro) + JOIN cadastre.r_prop_cptprop_cen USING (dnupro) + JOIN cadastre.proprios_cen pr USING (dnuper) + JOIN cadastre.d_ccogrm USING (ccogrm) + ''' + fon = gpd.read_postgis(sql_fon,con_fon) + dic = { + 'Commune':'Collectivité territoriale (communal, départemental, régional, syndicat mixte)', + 'Personnes morales non remarquables':'Propriété privée', + 'Département':'Collectivité territoriale (communal, départemental, régional, syndicat mixte)', + 'Personnes morales représentant des sociétés':'Propriété privée', + 'État':"Domaine de l'Etat", + 'Établissements publics ou organismes assimilés':'Établissement public (conservatoire du littoral, parcs nationaux…)', + 'Copropriétaire':'Propriété privée', + } + fon['statut'] = fon.ccogrm_lib.replace(dic) + stat = t_nomenclature_ZH('STATUT_PROPRIETE') + dic2 = dict(zip(stat.mnemo,stat.id_nomenclature)) + fon.statut.replace(dic2,inplace=True) + gfon = gpd.sjoin(zh,fon[['geom','statut']])\ + .drop(columns=['geom','index_right'])\ + .drop_duplicates()\ + .rename(columns={'statut':'id_status'}) + + in_db = pd.read_sql_table(table,con_zh,'pr_zh') + gfon = gfon[~(gfon.id_zh.isin(in_db.id_zh) & gfon.id_status.isin(in_db.id_status))] + gfon.to_sql(table,con=con_zh,schema='pr_zh',if_exists='append',index=False) + print('Insert %s news rows !'%gfon.shape[0]) + + del_in_db = in_db[in_db.id_zh.isin(gfon.id_zh) & (~in_db.id_status.isin(gfon.id_status))] + if not del_in_db.empty: + print('Possible données obsolète dans la table %s'%table) + return del_in_db + print('Table %s à jour !'%table) + + +################################################ +# Conversion des fonctions de requêtage en CSV # +################################################ +if __name__ == "__main__": +# def HARVEST(): + + Dossier = "HARVEST" + path = os.path.join(DIR,Dossier) + if not os.path.exists(path): + os.mkdir(path) + dicorgaprzh = { + 'ASTERS':"Conservatoire D'Espaces Naturels De Haute-Savoie", + 'CPNS':"Conservatoire d'espaces naturels de Savoie", + } + # to_bib_organismes_przh(dicorga=dicorgaprzh,first_time=True) + # to_t_references() + + #récupération des dataFrames en listes + print("...début de la récupération des données ...") + # utilisateurs_bib_organismes = get_utilisateurs_bib_organismes() + bib_actions = get_bib_actions() + _cor_zh_cb,not_bib = get_cor_zh_cb(ignore=LB_IGNORE,replace=LB_DICT,out_notbib=True) # 609 rows where CB_COD not in bib_cb + _cor_zh_corine_cover = get_cor_zh_corine_cover() + _cor_zh_protection = get_cor_zh_protection() + _cor_zh_lim_fs = get_cor_zh_lim_fs() + t_activity, _cor_impact_list = get_t_activity() + t_functions = get_t_functions() + t_inflow = get_t_inflow() + t_outflow = get_t_outflow() + # t_table_heritage = get_t_table_heritage() + t_instruments = get_t_instruments() + t_management_structures = get_t_management_structures() + t_ownership = get_t_ownership() + t_roles = get_t_roles() + utilisateur = get_utilisateurs_t_roles() + insert_newrole = keep_insert_role(utilisateur.drop(columns=['CODE'])) + if not insert_newrole.empty: + insert_newrole.to_sql( + name='t_roles',con=conn,schema='utilisateurs',if_exists='append',index=False + ) + + t_zh,_cor_lim_list = get_t_zh() + _cor_zh_ref = get_cor_zh_ref() + + ######### COMPLET / RESOLVE auteur indéterminé + t_zh.loc[t_zh.create_author.isna(),['create_author']] = t_roles[t_roles.nom_role=='AUTRE'].index[0] + t_zh.loc[t_zh.update_author.isna(),['update_author']] = t_roles[t_roles.nom_role=='AUTRE'].index[0] + int_col = ['create_author','update_author','id_sdage','id_connexion'] + t_zh[int_col] = t_zh[int_col].astype(int) + ######### COMPLET / RESOLVE create_date & update_date + t_zh.loc[t_zh.create_date.isna(),['create_date']] = dt.today().date().isoformat() + t_zh.loc[t_zh.update_date.isna(),['update_date']] = dt.today().date().isoformat() + ######### SPEC CEN74 + # Ammendement de la table lb_code + # A faire qu'une fois ! + to_bibcb = pd.DataFrame({'lb_code':not_bib.lb_code.str.split(',',expand=True).stack().unique().tolist()}) + to_bibcb['humidity'] = None + to_bibcb['is_ch'] = None + add_bib_cb(to_bibcb,con=conn,humidity='P',is_ch=False) + # Manip du champ remark_eval_heritage + rmk_her = t_zh[['code','remark_eval_heritage']].copy() + rmk_her.set_index('code',inplace=True) + rmk_her = rmk_her.remark_eval_heritage.str.split('FAUNE',expand=True) + rmk_her.columns = ['flore','faune'] + fau = rmk_her.faune.str.split("Espèce animale d'intérêt : |Espèces animales d'intérêt par ordre décroissant : ",expand=True) + flo = rmk_her.flore.str.split("Espèce végétale d'intérêt : |Espèces végétales d'intérêt par ordre décroissant : ",expand=True) + fau.columns = ['rmk_faune','ic_faune_desc'] + flo.columns = ['rmk_flore','ic_flore_desc'] + rmk_her = pd.merge(flo,fau,right_index=True,left_index=True) + # Suppression des valeurs inutiles + rmk_her.replace({ + r'[\r\n]':'', + 'FLORE':'', + 'Aucune espèce de valeur connue':'', + },regex=True,inplace=True) + for c in rmk_her.columns: + rmk_her[c] = rmk_her[c].str.strip().replace({'':None}) + # Replacement de certaines précisions + rmk_her.replace({ + 'Zone en apparence détruite avant destruction':'Zone en apparence détruite. Espèces observées avant destruction', + 'avant destruction':None + },inplace=True) + # Ecriture du tableau pour consultation + rmk_her.to_csv(path + '/remark_eval_heritage_especes.csv',index=True) + # Récupération des remarques pour t_zh + t_zh.drop(columns='remark_eval_heritage',inplace=True) + t_zh = t_zh.merge(rmk_her[['rmk_flore']],right_index=True,left_on='code').rename(columns={'rmk_flore':'remark_eval_heritage'}) + + ####### ERROR ####### + # Bloquant pour intégration. len(Remark_..) > 2000 characters + crmk = ['code',*t_zh.columns[t_zh.columns.str.contains('remark')]] + t_zh[crmk].iloc[:,5].str.len().max() + t_zh.loc[t_zh[crmk].iloc[:,5].str.len()>2000,['code','remark_diag']].iloc[:,1].values + t_zh.loc[t_zh[crmk].iloc[:,5].str.len()>2000,['code','remark_diag']].to_csv(path + '/remark_diag_TOO_LONG.csv',index=False) + t_zh.loc[t_zh[crmk].iloc[:,5].str.len()>2000,['remark_diag']] = 'Remarques disponibles auprès du référent ZH ou Géomatique' + + t_zh[crmk].iloc[:,9].str.len().max() + t_zh.loc[t_zh[crmk].iloc[:,9].str.len()>2000,['code','remark_eval_heritage']].iloc[:,1].values + t_zh.loc[t_zh[crmk].iloc[:,9].str.len()>2000,['code','remark_eval_heritage']].to_csv(path + '/remark_eval_heritage_TOO_LONG.csv',index=False) + ##################### + + bib_actions.to_sql('bib_actions',conn,'pr_zh','append',False) # Fait + to_tzh(t_zh,conn) + to_cor_(conn,_cor_lim_list,'cor_lim_list') + to_t_(conn,_cor_zh_lim_fs,'cor_zh_lim_fs') + to_t_(conn,t_activity,'t_activity') + to_cor_(conn,_cor_impact_list,'_cor_impact_list') + to_t_(conn,t_functions,'t_functions') + to_t_(conn,t_inflow,'t_inflow') + to_t_(conn,t_outflow,'t_outflow') + to_t_(conn,t_instruments,'t_instruments') + to_t_(conn,t_management_structures,'t_management_structures') + to_t_(conn,t_ownership,'t_ownership') + to_t_(conn,_cor_zh_cb,'cor_zh_cb') + to_t_(conn,_cor_zh_corine_cover,'cor_zh_corine_cover') + to_t_(conn,_cor_zh_protection,'cor_zh_protection') + to_t_(conn,_cor_zh_ref,'cor_zh_ref') + update_t_ownership(con_zh=conn,con_fon=con_f) + # print("fin de la récupération des données.") + # #création du dossier d'acceuil des csv + # print("...création du dossier parent...") + + #écriture des dataframes en csv + print("...écriture des tables en fichier.csv ...") + # get_utilisateurs_bib_organismes().to_csv(path + '/bib_organismes.csv',index=False) # Status : A intégrer dans pr_zh. + # get_bib_actions().to_csv(path + '/bib_actions.csv',index=False) # Status : Prêt à intégrer. + # get_cor_zh_cb().to_csv(path + '/_cor_zh_cb.csv',index=False) # 609 rows where CB_COD not in bib_cb + # get_cor_zh_corine_cover().to_csv(path + '/_cor_zh_corine_cover.csv',index=False) # Status : Prêt à intégrer. + # get_cor_zh_protection().to_csv(path + '/_cor_zh_protection.csv',index=False) # Status : Prêt à intégrer. + # get_cor_zh_lim_fs().to_csv(path + '/_cor_zh_lim_fs.csv',index=False) # Status : Prêt à intégrer. + # t_activity, _cor_impact_list = get_t_activity() + # t_activity.to_csv(path + '/t_activity.csv',index=False) # Status : Prêt à intégrer. + # _cor_impact_list.to_csv(path + '/_cor_impact_list.csv',index=False) # Status : Prêt à intégrer. + # get_t_functions().to_csv(path + '/t_functions.csv',index=False) # Status : Prêt à intégrer. + # get_t_inflow().to_csv(path + '/t_inflow.csv',index=False) + # get_t_outflow().to_csv(path + '/t_outflow.csv',index=False) # Status : Prêt à intégrer. + # get_t_table_heritage().to_csv(path + '/t_table_heritage.csv',index=False) # Status : Incompris # Status : Prêt à intégrer. + # get_t_instruments().to_csv(path + '/t_instruments.csv',index=False) # Status : Prêt à intégrer. + # get_cor_zh_ref().to_csv(path + '/_cor_zh_ref.csv',index=False) + # # liste des structures de gestion des zh + # get_t_management_structures().to_csv(path + '/t_management_structures.csv',index=False) # Status : Insatisfaisant, a revoir ! + # get_t_ownership().to_csv(path + '/t_ownership.csv',index=False) # Status : Prêt à intégrer. + # keep_insert_role(get_utilisateurs_t_roles()).to_csv(path + '/t_roles.csv',index=False) # Status : Prêt à intégrer. + # get_t_zh().to_csv(path + '/t_zh.csv',index=False) # Status : Encore à Faire .. + + # print("Fin de la transciption des tables en CSV.") + # print('Fin de HARVEST. Veuillez passer à TRANSFORM.') + + # # Intégration des données de configs + # dicorgaprzh = { + # 'ASTERS':"Conservatoire D'Espaces Naturels De Haute-Savoie", + # 'CPNS':"Conservatoire d'espaces naturels de Savoie", + # } + # to_bib_organismes_przh(dicorga=dicorgaprzh,first_time=True) + # insert_newrole.to_sql( + # name='t_roles',con=conn,schema='utilisateurs', index=False, if_exists='append') diff --git a/5_GEONATURE/GN_ZH/Notes gn_ZH.md b/5_GEONATURE/GN_ZH/Notes gn_ZH.md new file mode 100644 index 0000000..e1806fe --- /dev/null +++ b/5_GEONATURE/GN_ZH/Notes gn_ZH.md @@ -0,0 +1,19 @@ +# Evolution des configurations dans le schéma `pr_zh` +## table `bib_organismes` +- Augmentation des containtes data_type : +abbreviation::varchar(9) +## table `pr_zh` +- Possibilité d'enregistrer plusieurs auteurs : +create_author::json +update_author::json + +## table `cor_impact_types` +- Les relations [`id_impact` vs `id_impact_type`] implémentées à l'installation du module homètent les relations homologues +Ajout des relations manquantes avec un statut inactif +exemple : [1012 vs 1250] == ['20 - Pollutions et nuisances' vs '20- POLLUTIONS ET NUISANCES'] + +## table `bib_cb` +- Incompréhension sur la liste recensée qui semble incomplète et qui considère une liste CORINE BIOTOPE restrainte. + +# Normalisation du référenciel Nomenclature + diff --git a/5_GEONATURE/GN_ZH/correct_geom.py b/5_GEONATURE/GN_ZH/correct_geom.py new file mode 100644 index 0000000..b586b0e --- /dev/null +++ b/5_GEONATURE/GN_ZH/correct_geom.py @@ -0,0 +1,19 @@ +from pycen import con_gn, update_to_sql +import geopandas as gpd +from geoalchemy2 import Geometry + +sql = 'SELECT id_zh, geom FROM pr_zh.t_zh' +df = gpd.read_postgis(sql, con_gn) +df.geom = df.buffer(0) + +update_to_sql( + # gpd.pd.DataFrame(df), + df, + con_gn, + 't_zh', + 'pr_zh', + 'id_zh', + dtype={ + 'geometry': Geometry(srid=4326) + } +) \ No newline at end of file diff --git a/5_GEONATURE/GN_ZH/tools/__init__.py b/5_GEONATURE/GN_ZH/tools/__init__.py new file mode 100755 index 0000000..e5a0d9b --- /dev/null +++ b/5_GEONATURE/GN_ZH/tools/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/5_GEONATURE/GN_ZH/tools/pr_zh.py b/5_GEONATURE/GN_ZH/tools/pr_zh.py new file mode 100755 index 0000000..41b1240 --- /dev/null +++ b/5_GEONATURE/GN_ZH/tools/pr_zh.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +import geopandas as gpd +from geoalchemy2 import Geometry + + +def _calc_recouvrmt(df1,df2,how='inner'): + ''' + Calcule le recouvrement de df2 sur df1 + pour chaque géométrie de df1: + Parameters + ---------- + df1 : GeoDataFrame. + df2 : GeoDataFrame. + ''' + iddf1 = df1.columns[0] + iddf2 = df2.columns[0] + + # Jointure spaciale + tmp = gpd.sjoin( + df1, + df2[['geom']], + predicate = 'intersects', + how = how) + + tmp.dropna(subset=['index_right'],inplace=True) + tmp.index_right = tmp.index_right.astype(int) + tmp.reset_index(inplace=True) + + tmp = tmp.join( + df2[['geom',iddf2]].rename(columns={'geom': 'right_geom'}), + on=['index_right'], how='left') + + tmp2 = tmp[['index_right','right_geom',iddf2]].copy() \ + .rename(columns={'right_geom': 'geom'}) \ + .set_geometry('geom') + + tmp1 = tmp[[iddf1,'geom']].copy() \ + .set_geometry('geom') + + if not tmp1.geom.values.is_valid.all(): + tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) + + if not tmp2.geom.values.is_valid.all(): + tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) + + tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 + + return tmp[[iddf1,iddf2,'perc_rcvmt']] + + +def _cor_zh_area(con,tzh_code,typ,cover=False): + """ + @tzh : pd.Serie. Série de valeurs + correspondants à la colonne pr_zh.t_zh."code". + @typ : str. COM, DEP, ref_geo + """ + from math import ceil + + table = 'cor_zh_area' + + sqltzh = """ + SELECT zh.id_zh, zh.geom FROM pr_zh.t_zh zh WHERE zh."code" in {tzh_code} + """.format(tzh_code=tuple(tzh_code)) + tzh = gpd.read_postgis(sqltzh,con,crs=4326) + + if tzh.crs.srs=='epsg:4326': + tzh.to_crs(2154,inplace=True) + + if typ == 'COM': + cd2 = ' AND l."enable";' + else: cd2 = ';' + + # sql = ''' + # SELECT l.id_area,l.geom,zh.id_zh + # FROM ref_geo.l_areas l + # JOIN ref_geo.bib_areas_types bib USING (id_type), + # pr_zh.t_zh zh + # WHERE zh."code" in {tzh_code} + # AND bib.type_code='{typ}' + # AND ST_INTERSECTS( ST_SetSRID(l.geom,2154), ST_MakeValid(ST_Transform(ST_SetSRID(zh.geom,4326),2154)) ) + # AND (l.id_area,zh.id_zh) NOT IN (SELECT id_area,id_zh FROM pr_zh.cor_zh_area) + # {cd2} + # '''.format(tzh_code=tuple(tzh_code),typ=typ,cd2=cd2) + # # larea = gpd.pd.read_sql_query(sql,con) + # larea = gpd.read_postgis(sql,con) + sqllarea = """ + SELECT l.id_area, l.geom FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type) + WHERE bib.type_code='{typ}' and l."enable" + """.format(typ=typ) + larea = gpd.read_postgis(sqllarea,con,crs=2154) + + df = _calc_recouvrmt(larea,tzh).rename(columns={'perc_rcvmt':'cover'}) + + if cover: + df['cover'] = [ceil(x) for x in df.cover] + else : + df.drop(columns=['cover'],inplace=True) + + + if not df.empty: + df.to_sql( + name=table, con=con, schema='pr_zh', + if_exists='append', index=False + ) + print('INSERT %i correspondances'%df.shape[0]) + else: + sql = ''' + SELECT l.id_area + FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type) + WHERE bib.type_code='{typ}' + '''.format(typ=typ) + res = gpd.pd.read_sql_query(sql,con) + if not res.empty: + print('AUCUNE nouvelles correspondances identifiées') + else : + print('AUCUNE geometrie dans la table `ref_geo.l_areas` pour le `type_code` %s'%typ) + + + +def _cor_zh_(con,tzh_code,typ): + """ + @tzh : pd.Serie. Série de valeurs + correspondants à la colonne pr_zh.t_zh."code". + @typ : str. [hydro,rb] + """ + typ = typ.lower() + table = 'cor_zh_%s'%typ + tab_typ = 't_hydro_area' if typ == 'hydro' else 't_river_basin' + id_typ = 'id_hydro' if typ == 'hydro' else 'id_rb' + sql = ''' + SELECT h.{id_typ},zh.id_zh + FROM pr_zh.{tab_typ} h, pr_zh.t_zh zh + WHERE zh."code" in {tzh_code} + AND ST_INTERSECTS( ST_SetSRID(h.geom,4326),ST_MakeValid(ST_SetSRID(zh.geom,4326))) + AND (h.{id_typ},zh.id_zh) NOT IN (SELECT {id_typ},id_zh FROM pr_zh.{tab_to}) + ;'''.format( + tzh_code = tuple(tzh_code), + id_typ = id_typ, + tab_typ = tab_typ, + tab_to = table) + df = gpd.pd.read_sql_query(sql,con) + + if not df.empty: + df.to_sql( + name=table, con=con, schema='pr_zh', + if_exists='append', index=False + ) + print('INSERT %i correspondances'%df.shape[0]) + else: + print('AUCUNE nouvelles correspondances identifiées') + + + +def get_id_t_zh(con,code=None): + """@code : str, list, Serie, Index. Code à 12 characters maximum de la zone humide. + """ + sql = "SELECT id_zh,code FROM pr_zh.t_zh" # "SELECT id_zh,zh_uuid,code FROM pr_zh.t_zh" + if isinstance(code,str): + sql += " WHERE code='%s'"%code + elif isinstance(code,list) or isinstance(code,gpd.pd.Series) or isinstance(code,gpd.pd.Index): + sql += " WHERE code IN {lst_code}".format(lst_code=str(tuple(code))) + return gpd.pd.read_sql_query(sql,con) + + +def to_tzh(df,con): + table = 't_zh' + + if isinstance(df,gpd.GeoDataFrame): + epsg = df.crs.srs.split(':')[1] + df = df.to_wkt() + + df.to_sql( + name=table, con=con, schema='pr_zh', + if_exists='append', index=False, + dtype={ + 'geom': Geometry(srid=epsg) + # 'id_lim_list':uuid.SafeUUID + } + ) + + if isinstance(df,gpd.pd.DataFrame): + df.geom = gpd.GeoSeries.from_wkt(df.geom) + df = df.set_geometry('geom',crs=epsg) + + _cor_zh_area(con,tzh_code=df.code,typ='ZNIEFF1') + print('INSERT cor_zh_area ZNIEFF1 OK !') + _cor_zh_area(con,tzh_code=df.code,typ='ZNIEFF2') + print('INSERT cor_zh_area ZNIEFF2 OK !') + _cor_zh_area(con,tzh_code=df.code,typ='DEP') + print('INSERT cor_zh_area DEP OK !') + _cor_zh_area(con,tzh_code=df.code,typ='COM',cover=True) + print('INSERT cor_zh_area COM OK !') + _cor_zh_area(con,tzh_code=df.code,typ='ZPS') + print('INSERT cor_zh_area ZPS OK !') + _cor_zh_area(con,tzh_code=df.code,typ='SIC') + print('INSERT cor_zh_area SIC OK !') + _cor_zh_area(con,tzh_code=df.code,typ='SRAM') + print('INSERT cor_zh_area SRAM OK !') + _cor_zh_area(con,tzh_code=df.code,typ='ZICO') + print('INSERT cor_zh_area ZICO OK !') + + _cor_zh_(con,tzh_code=df.code,typ='hydro') + print('INSERT cor_zh_hydro OK !') + _cor_zh_(con,tzh_code=df.code,typ='rb') + print('INSERT cor_zh_rb OK !') + + + +def to_cor_(con,df,table): + + df.to_sql( + name=table,con=con,schema='pr_zh',if_exists='append',index=False + ) + + +def to_t_(con,df,table): + + res = gpd.pd.merge(df,get_id_t_zh(con,df.SITE_COD),left_on='SITE_COD',right_on='code')\ + .drop(columns=['SITE_COD','code']) + + res.to_sql( + name=table,con=con,schema='pr_zh',if_exists='append',index=False + ) + + +def add_bib_cb(df,con,humidity=None,is_ch=None): + """ + @df : pd.Dataframe. Tableau à 3 colonnes |lb_code|humidity|is_ch|. + @con : engine. Connexion PostgreSQL à la base de données cible. + @humidity : str. Caractère d'humidité a attribuer par défault + à chaque lb_code du tableau où la colonne est Null. + Défaut : None (désactivé) + Liste : P (Potentiellement), H (Humide). + @is_ch : bool. Caractère de présence/absence de carto d'habitat + à attribuer par défault à chaque lb_code du tableau où la colonne est Null. + Défaut : None (désactivé) + Liste : True (existe), False (n'existe pas). + """ + table = 'bib_cb' + + if humidity is not None: + df.loc[df.humidity.isna(),'humidity'] = humidity.upper() + if is_ch is not None: + df.loc[df.is_ch.isna(),'is_ch'] = is_ch + + bib_cb = gpd.pd.read_sql_table(table,con,'pr_zh') + df = df[~df.lb_code.isin(bib_cb.lb_code)] + + if df.empty: + print('NO news data to insert !') + else: + df.to_sql( + name=table,con=con,schema='pr_zh',if_exists='append',index=False + ) + print('INSERT %s news rows !'%df.shape[0]) + + +if __name__ == '__main__': + from ..MEDWET2Geonat import get_t_zh + zh,_cor_lim_list = get_t_zh() diff --git a/5_GEONATURE/GN_ZH/tools/source_zone.py b/5_GEONATURE/GN_ZH/tools/source_zone.py new file mode 100755 index 0000000..3ac5476 --- /dev/null +++ b/5_GEONATURE/GN_ZH/tools/source_zone.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from owslib.wfs import WebFeatureService +import geopandas as gpd +from geoalchemy2 import Geometry +from sqlalchemy import dialects + + + +def get_wfs(url, layer, bbox=None): + from geopandas import read_file + from requests import Request + + wfs = WebFeatureService(url=url) + item = dict(wfs.items())[layer] + crs = item.crsOptions[0].getcode() + + params = dict(service='WFS', version=wfs.version, request='GetFeature', + typeName=layer) + q = Request('GET', url, params=params).prepare().url + + data = read_file(q, bbox=bbox) + data.set_crs(crs=crs, inplace=True) + + if crs != 'EPSG:2154': + data.to_crs(epsg=2154, inplace=True) + + return data + + +def list_layer(url): + wfs = WebFeatureService(url=url) + lst = list(wfs.contents) + return lst + + +def Polygons_to_MultiPolygon(df,geom_col=None): + from shapely.geometry import MultiPolygon + from pandas import concat + + if not geom_col: + geom_col = df.geometry.name + + df = df.copy() + multi = df.loc[df.geom_type=='MultiPolygon'].copy() + poly = df.loc[df.geom_type=='Polygon'].copy() + poly[geom_col] = [MultiPolygon([geom]) for geom in df.loc[df.geom_type=='Polygon',geom_col] ] + df = concat([multi,poly]) + df.sort_index(inplace=True) + return df + + +def ref_geo(type_code,con): + sql = """ + SELECT l.* FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type) + WHERE bib.type_code='{typ}' + """.format(typ=type_code) + return gpd.read_postgis(sql,con) + + +def to_lareas(df,dic,layer,con,dtypes={}): + + id_type = gpd.pd.read_sql_query(""" + SELECT id_type FROM ref_geo.bib_areas_types WHERE type_code='%s' + """%layer,con).id_type.values[0] + + ref = ref_geo(layer,con) + + df.rename(columns=dic, inplace=True) + df = Polygons_to_MultiPolygon( + df[~df.area_name.isin(ref.area_name)] + ) + df.rename_geometry('geom', inplace=True) + del_col = df.columns[~df.columns.isin(['geom',*[*set(dic.values())]])] + df.drop(columns=del_col, inplace=True) + + df['id_type'] = id_type + df['geojson_4326'] = df.to_crs(4326).geom.__geo_interface__['features'] + df['geojson_4326'] = [x['geometry'] for x in df['geojson_4326']] + df['centroid'] = 'SRID=2154;'+df.geom.centroid.to_wkt() + df['enable'] = True + + if df.empty: + print('AUCUN nouveaux zonages identifiés') + else: + df.to_postgis( + name='l_areas', + con=con, + schema='ref_geo', + if_exists='append', + index=False, + index_label=None, + chunksize=None, + dtype={ + 'centroid': Geometry(geometry_type='POINT',srid=2154), + **dtypes + }, + ) + print('INSERT %i zones'%df.shape[0]) + + + +def update_larea(con,layer,df,cols_updt=[],on='area_name'): + from pycen import update_to_sql + table = 'l_areas' + # idtyp = tuple(df.id_type.unique()) + + idtyp = gpd.pd.read_sql_query(""" + SELECT id_type FROM ref_geo.bib_areas_types WHERE type_code='%s' + """%layer,con).id_type.values[0] + + # if len(idtyp) > 1: + # where = 'in %s'%tuple(df.id_type.unique()) + # else : + where = '=%i'%idtyp + + sql = 'SELECT id_area, %s FROM ref_geo.l_areas WHERE id_type %s;'%(on,where) + larea = gpd.pd.read_sql_query(sql,con) + + to_updt = df.merge(larea, on=on) + update_to_sql( + df=to_updt[['id_area',*cols_updt]], + con=con, + table_name=table, + schema_name='ref_geo', + key_name='id_area', + ) + + +carmen = 'https://ws.carmencarto.fr/WFS/119/fxx_inpn?' +dic_layer = { + # ref_geo.bib_areas_types.type_code : layer_name from list_layer + 'ZNIEFF2':'Znieff2', + 'ZNIEFF1':'Znieff1', + 'APB':'Arretes_de_protection_de_biotope', + 'RNN':'Reserves_naturelles_nationales', + 'RNR':'Reserves_naturelles_regionales', + 'ZPS':'Zones_de_protection_speciale', + 'SIC':'Sites_d_importance_communautaire', + 'ZICO':'ZICO', + 'RNCFS':'Reserves_nationales_de_chasse_et_faune_sauvage', + 'RIPN':'Reserves_Integrales_de_Parcs_Nationaux', # NO zone in 74 + 'SCEN':'Terrains_acquis_des_Conservatoires_des_espaces_naturels', # Pas fait ; + # 'SCL':'', + # 'PNM':'', + 'PNR':'Parcs_naturels_regionaux', + 'RBIOL':'Reserves_biologiques', + 'RBIOS':'Reserves_de_la_biosphere', # NO zone in 74 + # 'RNC':'', + 'SRAM':'Sites_Ramsar', + # 'AA':'', + # 'ZSC':'', + # 'PSIC':'', + # 'PEC':'', + # ... +} + +if __name__ == "__main__": + from sqlalchemy import create_engine # pour lecture de la bd Géonature + from sqlalchemy.engine import URL + usr = 'geonatadmin' + pdw='g;gCEN74' + host='178.33.42.38' + bd ='geonature2db' + + eng = URL.create('postgresql+psycopg2',username=usr,password=pdw,host=host,database=bd) + conn = create_engine(eng) + # from pycen import con_gn as conn + + ref = ref_geo('DEP',conn) + ter = ref[ref.area_code=='74'] + + layer = 'Znieff1' + # layer_name = dic_layer[layer] + zon = get_wfs(carmen,layer,bbox=ter.unary_union) + zon = zon[zon.intersects(ter.unary_union)] + + dic_cols = { + 'ID_MNHN':'area_code', + 'NOM':'area_name', + 'URL':'source', + } + + dtyp = {} + if layer in ['ZPS','SIC']: + dic_cols = {**dic_cols,**{'SITECODE':'area_code','SITENAME':'area_name',}} + elif layer == 'RNN': + dic_cols = {**dic_cols,**{'URL_FICHE':'source','NOM_SITE':'area_name',}} + elif layer == 'ZICO': + dic_cols = {**dic_cols,**{'ID_SPN':'area_code'}} + elif layer == 'SCEN': + zon.NOM = zon.NOM.str.title() + elif layer == 'RBIOL': + dic_cols = { + **dic_cols, + **{'URL_FICHE':'source','NOM_SITE':'area_name', + 'comment':'comment','additional_data':'additional_data',} + } + zon['comment'] = 'Réserves biologiques ' + zon.CODE_R_ENP.replace( + {'I': 'intégrale','D' : 'dirigée'} + ) + zon['additional_data'] = [ + str({ zon.CODE_R_ENP.name : x }).replace("\'",'\"') + for x in zon.CODE_R_ENP + ] + dtyp = {'additional_data':dialects.postgresql.JSONB} + + + # IF UPDATE DATA + # update_larea(conn,layer,zon.rename(columns=dic_cols),['comment'],on='area_code') + + + to_lareas( + df=zon, + dic=dic_cols, + layer=layer, + con=conn, + dtypes=dtyp + ) + diff --git a/5_GEONATURE/GN_ZH/tools/zh2gn.py b/5_GEONATURE/GN_ZH/tools/zh2gn.py new file mode 100755 index 0000000..ff4a74f --- /dev/null +++ b/5_GEONATURE/GN_ZH/tools/zh2gn.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import os +import pandas as pd +import geopandas as gpd +from geoalchemy2 import Geometry + +def move2filedir(): + ''' + Orientation vers le répertoire où est localiser le script zh2gn.py + ''' + dir_path = os.path.dirname(os.path.realpath(__file__)) + os.chdir(dir_path) + + # print(os.getcwd()) + + +if __name__ == '__main__': + + # move2filedir() + os.chdir('/home/colas/Documents/13_CEN74/medwet2gn_ZH/tools') + + PATH = '../template/' + FILE_DATA = 'BDD_ZH_CCPR.xlsx' + GEO_DATA = 'ZH_CCPR_finalisées/ZH_asters_finalisées.shp' + + data = pd.read_excel(PATH+FILE_DATA,'formulaire_type') + geo = gpd.read_file(PATH+GEO_DATA) + + diff --git a/5_GEONATURE/MIGRATION/PLATIERE/reproject_data.py b/5_GEONATURE/MIGRATION/PLATIERE/reproject_data.py new file mode 100644 index 0000000..7d27714 --- /dev/null +++ b/5_GEONATURE/MIGRATION/PLATIERE/reproject_data.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import geopandas as gpd +from shapely import wkt +from shapely.ops import nearest_points +from os import path + +PATH = '/home/colas/Documents/9_PROJETS/6_GEONATURE/MIGRATION/PLATIERE/Envoi Colas Correction Données Mailles Par Les Ensembles Fonctionnels' +FILE_DATA = 'Lépido_Mailles_2.csv' +FILE_EF = 'Ensembles_Fonctionnels_Platière_Lambert93.shp' +FILE_GRID = 'Mailles_Platière_Lambert93.shp' + +ef = gpd.read_file(path.join(PATH,FILE_EF),encoding='UTF-8') +grid = gpd.read_file(path.join(PATH,FILE_GRID)) +data = gpd.pd.read_csv(path.join(PATH,FILE_DATA),sep=";",encoding='Windows 1252') # ['ASCII','Windows 1252'] + + +def remove_special_char(obj,space=False): + dict_char = { + r'[’]':"'", + r'[àáâãäå]':'.', + r'[èéêë]':'.', + r'[ìíîï]':'.', + r'[òóôõö]':'.', + r'[ùúûü]':'.', + r'[ ]':"", + r'[–?|!^]':"." + } + if space: + dict_char = {**dict_char, **{r'[ ]':""}} + return obj.replace(dict_char,regex=True) + + +def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): + """Find the nearest point and return the corresponding value from specified column.""" + + # Find the geometry that is closest + nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1] + + # Get the corresponding value from df2 (matching is based on the geometry) + value = df2[nearest][src_column].get_values()[0] + + return value + + +def near(point,df2,geom_union,src_column): + # find the nearest point and return the corresponding Place value + geom2_col = df2.geometry.name + nearest = df2[geom2_col] == nearest_points(point, geom_union)[1] + # print(nearest) + # print(df2[nearest][src_column]) + return df2[nearest][src_column].values[0] + + +if __name__ == "__main__": + + # Si Long/Lat est un champ text, transformation des colonnes en nombre. + if data['Long autre'].dtype == object: + data['Long autre'] = data['Long autre'].str.replace(',','.').astype(float) + if data['Lat autre'].dtype == object: + data['Lat autre'] = data['Lat autre'].str.replace(',','.').astype(float) + if data['Longitude grade'].dtype == object: + data['Longitude grade'] = data['Longitude grade'].str.replace(',','.').astype(float) + if data['Latitude grade'].dtype == object: + data['Latitude grade'] = data['Latitude grade'].str.replace(',','.').astype(float) + + if grid['Champ1'].dtype == object: + grid['Champ1'] = grid['Champ1'].str.replace(',','.').astype(float) + + # Isolement des données précises + data_ok = data.loc[ + data['Long autre'].notna() & data['Lat autre'].notna() + ].copy() + + df = data.loc[ + data['Long autre'].isna()&data['Lat autre'].isna() + ].copy().sort_values('Numéro').reset_index() # 'Numero' + + # Data to GéoData + gdf_ok = gpd.GeoDataFrame( + data_ok, + geometry=gpd.points_from_xy(data_ok['Long autre'],data_ok['Lat autre']), + crs=2154 + ) + + + ## Traitement des données Non-Précises + # Re-construction de la grille + long = grid[grid.Champ1 < 25].copy() + lat = grid[grid.Champ1 > 25].copy() + grd = gpd.GeoSeries(list(long.unary_union.intersection(lat.unary_union).geoms),crs=2154)\ + .to_frame()\ + .rename_geometry('geom') + grd['x'] = grd.geom.x.copy() + grd['y'] = grd.geom.y.copy() + grd = grd.sort_values(['y','x'],ascending=[True,False]).reset_index(drop=True) + x0 = (grd.x[1]-grd.x[0])/2 + y0 = (grd.y[1]-grd.y[0])/2 + grd = grd.sort_values(['x','y'],ascending=[True,False]).reset_index(drop=True) + x1 = (grd.x[1]-grd.x[0])/2 + y1 = (grd.y[0]-grd.y[1])/2 + X = x0+x1 + Y = y0+y1 + + # test résultats sur grd + # grd = grd.sort_values(['y','x'],ascending=[True,False]).reset_index(drop=True) + # grd.x = grd.x + X + # grd.y = grd.y + Y + # grd.set_geometry(gpd.points_from_xy(grd.x,grd.y),inplace=True) + # grd.to_file(path.join(PATH,'result.gpkg'),driver='GPKG',layer='test_grid') + + # Correspondance [ Grades - Grid ] + DICT_GRID = dict(zip(grid.Champ1,grid.geometry.to_wkt())) + # Replace Grade by L93 + df[['Latitude grade','Longitude grade']] = df[['Latitude grade','Longitude grade']].replace(DICT_GRID) + + # Reconstruction du GéoDataframe + gdf = gpd.GeoDataFrame( + df.copy(), + geometry=gpd.GeoSeries.from_wkt(df['Latitude grade'])\ + .intersection(gpd.GeoSeries.from_wkt(df['Longitude grade'])), + crs=2154 + ) + gdf.rename_geometry('geom',inplace=True) + gdf['x'] = gdf.geom.x + X + gdf['y'] = gdf.geom.y + Y + gdf.set_geometry(gpd.points_from_xy(gdf.x,gdf.y),inplace=True) + + # + # Test + # ef_exp = ef.explode(index_parts=True) + # [ + # near(geom,ldt,ldt_union,'id_lieu_dit') + # for geom in df.geometrie + # ] + + # gdf['Code ensemble fonct'] + + gdf['esmb'] = remove_special_char(gdf['Ensemble fonctionnel']) + ef.nom_ensemb = remove_special_char(ef.nom_ensemb) + ef[ef.nom_ensemb.isin(gdf.esmb)].shape + # gdf[~gdf.esmb.isin(ef.nom_ensemb)] + # gdf['Code secteur fonctionnel'] + # df1.apply(nearest, geom_union=unary_union, df1=df1, df2=df2, geom1_col='centroid', src_column='id', axis=1) + # gdf.apply(nearest, geom_union=ef_exp.unary_union, df1=gdf, df2=ef_exp, geom1_col='centroid', src_column='code_ensem', axis=1) + + gdf.to_file(path.join(PATH,'result_2.gpkg'),driver='GPKG',layer='test_data') diff --git a/5_GEONATURE/MONITORINGS/get_data.py b/5_GEONATURE/MONITORINGS/get_data.py new file mode 100644 index 0000000..f056d52 --- /dev/null +++ b/5_GEONATURE/MONITORINGS/get_data.py @@ -0,0 +1,103 @@ +from pycen import con_gn +import geopandas as gpd +import requests + +def get_list_observateur(code_liste,with_data=False): + """ + @code_liste : str. Nom du sous-module monitoring où chercher les sites. + @with_data : bool. + """ + + if with_data is True: + sql = ''' + SELECT * FROM utilisateurs.t_listes + JOIN (utilisateurs.cor_role_liste JOIN utilisateurs.t_roles t USING (id_role)) USING(id_liste) + ''' + elif with_data is False: + sql = ''' + SELECT code_liste list_name,id_role,nom_role||' '||prenom_role nom_complet FROM utilisateurs.t_listes + JOIN (utilisateurs.cor_role_liste JOIN utilisateurs.t_roles t USING (id_role)) USING(id_liste) + ''' + else : + raise('Argument `with_data` dont accept value "%s"'%with_data) + + wh = " WHERE (id_liste = {mod} OR code_liste = '{mod}')".format(mod=code_liste) + + SQL = sql + wh + return gpd.pd.read_sql_query(sql=SQL,con=con_gn) + + + +def get_site(module,id_site=None,with_data=False): + """ + @module : str. Nom du sous-module monitoring où chercher les sites. + @id_site : str. Nom du site au sein du sous-monitoring où chercher les sites. + @with_data : bool. + """ + + if with_data is True: + sql = ''' + SELECT * FROM gn_monitoring.t_base_sites + JOIN (gn_monitoring.cor_site_module JOIN gn_commons.t_modules t USING (id_module)) USING (id_base_site) + ''' + elif with_data is False: + sql = ''' + SELECT id_base_site,base_site_name, + (ST_Y(geom)::numeric)::text||' '||(ST_X(geom)::numeric)::text geometry + FROM gn_monitoring.t_base_sites + JOIN (gn_monitoring.cor_site_module JOIN gn_commons.t_modules t USING (id_module)) USING (id_base_site) + ''' + else : + raise('Argument `with_data` dont accept value "%s"'%with_data) + + wh = " WHERE (t.module_code = '{mod}' OR t.module_label = '{mod}')".format(mod=module) + + if id_site is not None: + wh = wh + " AND id_site = '%s'"%id_site + + SQL = sql + wh + return gpd.pd.read_sql_query(sql=SQL,con=con_gn) + # return gpd.read_postgis(sql=SQL,con=con_gn,geom_col='geometry') + + +def get_jdd(module): + url = 'https://geonature.cen-isere.fr/geonature/api/monitorings/util/init_data/{mod}' + r = requests.get(url.format(mod=module), auth=('user', 'pass')) + if r.status_code == 200: + d = r.json() + return gpd.pd.DataFrame(d['dataset'])[['id_dataset','dataset_name']] + + +def get_taxon(id_list=100,args={}): + url = 'https://geonature.cen-isere.fr/taxhub/api/taxref/allnamebylist/{list}?limit=100000&offset=0' + if args is not None: + formArgs = '&'.join(['='.join([x,args[x]]) for x in args]) + url = 'https://geonature.cen-isere.fr/taxhub/api/taxref/?'+formArgs + r = requests.get(url.format(list=id_list), auth=('user', 'pass')) + if r.status_code == 200: + d = r.json() + if args is not None: + d = r.json()['items'] + df = gpd.pd.DataFrame(d) + return df[['cd_nom','lb_nom','nom_vern']].rename(columns={'lb_nom':'nom_complet'}) + + +if __name__ == "__main__": + + module_name = "chiro" + id_tax_list = 100 # taxons occtax + + PATH = '/media/colas/SRV/homer_geonat/home/geonatureadmin/protocoles_suivi/{mod}/odk_form/'.format(mod=module_name) + PATH = '/home/colas/Documents/9_PROJETS/6_GEONATURE/MODULES/ODK/test/CHIRO/' + csv_site_name = 'gn_sites.csv' + csv_jdd_name = 'gn_jdds.csv' + csv_tax_name = 'gn_taxons.csv' + get_site(module_name).to_csv(PATH+csv_site_name,sep=';',index=False) + get_jdd(module_name).to_csv(PATH+csv_jdd_name,sep=';',index=False) + get_taxon(args={'ordre':'Chiroptera'}).to_csv(PATH+csv_tax_name,sep=';',index=False) + + code_liste = '2' + csv_obs_name = 'gn_observateurs.csv' + get_list_observateur(code_liste).to_csv(PATH+csv_obs_name,sep=';',index=False) + + diff --git a/5_GEONATURE/create_listTaxon.py b/5_GEONATURE/create_listTaxon.py new file mode 100644 index 0000000..31170b2 --- /dev/null +++ b/5_GEONATURE/create_listTaxon.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import geopandas as gpd +import pandas as pd +from sqlalchemy.engine import URL +from sqlalchemy import create_engine + +# Parametres bdd HOMER - sicen2 +user_hom = 'cen_admin' +pwd_hom = '#CEN38@venir' +adr_hom = '91.134.194.221' +port_hom = '5432' +base_hom = 'sicen2' +schema_hom = 'saisie' +table_hom = 'saisie_observation' + +url_hom = URL.create('postgresql+psycopg2', + username = user_hom, + password = pwd_hom, + host = adr_hom, + database = base_hom, +) +con_sicen = create_engine(url_hom) + +# Parametres bdd HOMER - geonature +user_geo = 'geonatadmin' +pwd_geo = 'Ge0naT38*aDm1n' +adr_geo = '91.134.194.221' +port_geo = '5432' +base_geo = 'geonature2db' +schema_geo = 'taxonomie' +table_geo = 'taxref' + +url_geo = URL.create('postgresql+psycopg2', + username = user_geo, + password = pwd_geo, + host = adr_geo, + database = base_geo, +) +con_geo = create_engine(url_geo) + +file = True +path_file = '/home/colas/Documents/9_PROJETS/6_GEONATURE/MODULES/Macrophytes/Module_Macrophytes_Liste_Espèces.xlsx' + +taxref = False +args = { + 'sous_famille': 'Zygaeninae' +} + + +if file: + if path_file.split(r'.',1)[-1] == 'xlsx': + f = pd.read_excel(path_file) + f.columns = f.colufmns.str.lower() + lst_cdnom = f.cd_nom.astype(str).unique() + elif path_file.rsplit(r'.',1)[-1] == 'csv': + f = pd.read_csv(path_file) + f.columns = f.columns.str.lower() + lst_cdnom = f.cd_nom.astype(str).unique() +elif taxref: + sql = 'SELECT * FROM {sch}.{tab}'.format(sch = schema_geo,tab = table_geo) + sql += ' WHERE %s'%' AND '.join([ + x+'='+"'%s'"%args[x] if isinstance(args[x],str) else x+' in %s'%str(tuple(args[x])) for x in args + ]) + df_tax = pd.read_sql_query(sql,con_geo) + lst_cdnom = df_tax.cd_nom.astype(str).unique() +else: + sql = 'SELECT * FROM {sch}.{tab}'.format(sch=schema_hom, tab=table_hom) + gdf_saisie = gpd.read_postgis( + sql = sql, + con = con_sicen, + geom_col = 'geometrie' + ) + lst_cdnom = gdf_saisie.cd_nom.unique() + +# Récupération de la liste taxref non présents +# dans la table bib_noms +sql = """ +SELECT DISTINCT cd_nom,cd_ref,nom_vern,id_rang FROM {sch}.{tab} +WHERE cd_nom IN ('{lst}') +--AND cd_nom NOT IN (SELECT cd_nom FROM {sch}.bib_noms WHERE cd_nom IN ('{lst}')) +""".format( + sch = schema_geo, + tab = table_geo, + lst = "','".join(lst_cdnom) +) +df_taxref = pd.read_sql_query(sql,con_geo) +df_taxref.rename(columns={'nom_vern':'nom_francais'}, inplace=True) + +# Envoie de la liste dans la table taxonomie.bib_noms +df_taxref.drop(columns=['id_rang']).to_sql( + name = 'bib_noms', + con = con_geo, + schema=schema_geo, + if_exists='append', + index=False +) + +# récupération des IDs la table taxonomie.bib_noms +# sql = 'SELECT id_nom FROM {sch}.{tab} WHERE id_nom > {id_nom_max}'.format( +id_liste = 104 # id_liste de la liste présente dans la table taxonomie.bib_listes +sql = """ +SELECT id_nom FROM {sch}.{tab} WHERE cd_nom IN ({id_nom_max}) +AND id_nom NOT IN (SELECT id_nom FROM {sch}.cor_nom_liste WHERE id_liste = '{id_liste}') +""".format( + sch = schema_geo, + tab = 'bib_noms', + id_nom_max = ",".join(lst_cdnom), + id_liste = id_liste +) +df = pd.read_sql_query(sql,con_geo) +# df = pd.read_sql_table( +# table_name='bib_noms', +# con= con_geo, +# schema=schema_geo +# ) +df['id_liste'] = id_liste # id_liste de la liste présente dans la table taxonomie.bib_listes +df = df[['id_liste','id_nom']] +# envoie de la liste dans la table taxonomie.bib_noms +df.to_sql( + name = 'cor_nom_liste', + con = con_geo, + schema=schema_geo, + if_exists='append', + index=False +) + +sql = 'SELECT * FROM {sch}.cor_nom_liste'.format( + sch = schema_geo) +cor_nom_liste = pd.read_sql_query(sql,con_geo) \ No newline at end of file diff --git a/5_GEONATURE/crsp_taxref_old.py b/5_GEONATURE/crsp_taxref_old.py new file mode 100644 index 0000000..330c228 --- /dev/null +++ b/5_GEONATURE/crsp_taxref_old.py @@ -0,0 +1,29 @@ + +import pandas as pd +import geopandas as gpd + +from pycen import con_gn + +path = '/home/colas/Documents/5_BDD/TAXON/TAXREF/TAXREF_V14_2020/' +cdnom_disparu = pd.read_excel(path+'CDNOM_DISPARUS.xls') +cdnom_disparu.columns = cdnom_disparu.columns.str.lower() + +def get_bib_list(name): + sql = """SELECT bn.* + FROM taxonomie.bib_noms bn + JOIN taxonomie.cor_nom_liste c USING (id_nom) + JOIN taxonomie.bib_listes bl USING (id_liste) + WHERE lower(bl.nom_liste) = '%s'; + """%name.lower() + return pd.read_sql_query(sql,con_gn) + +amp = get_bib_list('Amphibiens') +amp[amp.cd_nom.isin(cdnom_disparu.cd_nom_remplacement)] +odo = get_bib_list('Odonates') +odo[odo.cd_nom.isin(cdnom_disparu.cd_nom_remplacement)] +flo = get_bib_list('Flore') +flo[flo.cd_nom.isin(cdnom_disparu.cd_nom_remplacement)] + +cdnom_disparu[cdnom_disparu.cd_nom_remplacement.isin(amp.cd_nom)] +cdnom_disparu[cdnom_disparu.cd_nom_remplacement.isin(flo.cd_nom)] + diff --git a/5_GEONATURE/get_refNomencalture.py b/5_GEONATURE/get_refNomencalture.py new file mode 100644 index 0000000..2a82f5f --- /dev/null +++ b/5_GEONATURE/get_refNomencalture.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + + +from pycen import con_gn as con +import pandas as pd + + +sch = 'ref_nomenclatures' +tab = ['t_nomenclatures','bib_nomenclatures_types'] +sql = """ +SELECT + a.id_nomenclature, + --a.cd_nomenclature, + a.mnemonique mnemo, + a.label_default as label, + a.definition_default def, + b.mnemonique bib_mnemo, + b.label_default bib_label, + b.definition_default bib_def, + a.active +FROM {sch}.{tab0} a + JOIN {sch}.{tab1} b USING (id_type) +""".format(sch=sch,tab0=tab[0],tab1=tab[1]) +df = pd.read_sql_query(sql,con) +df.set_index('id_nomenclature', inplace=True) + + +info_releve = [ + "Type de regroupement", # Type de relevé/regroupement + "Type d'information géographique", + "Nature d'objet géographique", + "Méthode de regroupement", +] +infos_occ_sens = [ + # "Techniques d'observation", # "Techniques d'observation" + "Statut biologique", + "Etat biologique de l'observation", # Etat biologique + "Statut biogéographique", + "Niveau de naturalité", # Naturalité + "Niveaux de sensibilité", # Sensibilité + "Niveaux de précision de diffusion souhaités", # Niveau de diffusion + "Statut d'observation", + "Existence d'un floutage sur la donnée", # Niveau de Floutage + "Statut de la source", + # "Référence bibliographique", + "Comportement des occurrences observées", # Comportement +] + +infos_denombremt = [ + "Stade de vie : stade de développement du sujet", # Stade de vie + "Sexe", + "Type de dénombrement", # Type du dénombrement + "Objet du dénombrement", +] +infos_determ_valid = [ + "Méthode de détermination", + # "Existence d'une preuve", + # "Preuve numérique", + # "Preuve non-numérique", + "Statut de validation", +] +df[df.bib_label.isin(info_releve)] +df[df.bib_label.isin(infos_occ_sens)] +df[df.bib_label.isin(infos_denombremt)] +df[df.bib_label.isin(infos_determ_valid)] + + +for i in df.sort_values('bib_label').bib_label.unique(): + if 'Télédétection' not in df[df.bib_label==i].label.tolist(): + continue + print() + print(i+' :') + print(df[df.bib_label==i].label.tolist()) + + +lst_champs = [ + "Type de regroupement", + # "Type d'information géographique", + # "Nature d'objet géographique", + # "Méthodes d'observation", + # "Statut biologique", + # "Etat biologique de l'observation", + # "Statut biogéographique", + # "Niveau de naturalité", + # "Niveaux de sensibilité", + # "Niveaux de précision de diffusion souhaités", + # "Statut d'observation", + # "Existence d'un floutage sur la donnée", + # "Statut de la source", + # "Comportement des occurrences observées", + # "Stade de vie : stade de développement du sujet", + # "Sexe", + # "Type de dénombrement", + # "Objet du dénombrement", + # "Méthode de détermination", + # "Statut de validation", +] +df[df.bib_label.isin(lst_champs)].bib_def.unique() \ No newline at end of file diff --git a/5_GEONATURE/insert_lareas.py b/5_GEONATURE/insert_lareas.py new file mode 100644 index 0000000..82c178a --- /dev/null +++ b/5_GEONATURE/insert_lareas.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + + +import geopandas as gpd +from geoalchemy2 import Geometry + +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import pycen + +# Parametres bdd +user = 'geonatadmin' +pwd = 'Ge0naT38*aDm1n' +adr = '91.134.194.221' +port = '5432' +base = 'geonature2db' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +# con = conn +con = create_engine(url) + +def update_larea(df, cols_updt=[]): + from pycen import update_to_sql + table = 'l_areas' + idtyp = tuple(df.id_type.unique()) + if len(idtyp) > 1: + where = 'in %s'%tuple(df.id_type.unique()) + else : + where = '=%s'%tuple(df.id_type.unique()) + sql = 'SELECT id_area, area_name FROM ref_geo.l_areas WHERE id_type %s;'%where + larea = gpd.pd.read_sql_query(sql,con) + + to_updt = df.merge(larea, on='area_name') + update_to_sql( + df=to_updt[['id_area',*cols_updt]], + con=con, + table_name=table, + schema_name='ref_geo', + key_name='id_area', + ) + + + +# path0 = '/home/colas/Documents/5_BDD/BASSIN_VERSANT/' +path0 = '/home/colas/Documents/9_PROJETS/6_GEONATURE/AREA/' +dic = { + # 'ID_MNHN':'area_code', + 'NOM':'area_name', + 'URL':'source', +} +# path = 'BD_Topage_FXX_2022-shp/BD_TOPAGE_2022-shp/BassinVersantTopographique_FXX-shp/' +# file = 'BassinVersantTopographique_FXX.shp' +path = 'IN DATA BASE/znieff1/' +file = 'znieff1.shp' +id_type = 3 # id_type in bdd geonature : ref_geo.bib_areas_types +# 3 : 'ZNIEFF1', +# 4 : 'APB', +# 5 : 'RNN', +# 6 : 'RNR', +# 7 : 'ZPS', +# 8 : 'SIC', +# 9 : 'ZICO', +# 10 : 'RNCFS', +# 12 : 'SCEN', +# 15 : 'PNR', +# 16 : 'RBIOL', +# 20 : 'AA', +# 32 : 'Zones biogéographiques', + +# DIC for pr_zh +# dic = { +# # 'index' : 'id_hydro', +# 'index' : 'id_rb', +# 'TopoOH' : 'name', +# 'lib_ssbv' : 'name', +# } + +# sql = ''' +# SELECT a.* FROM ref_geo.l_areas a +# WHERE a.id_type = 25 +# ''' +sql = ''' +SELECT a.* FROM ref_geo.l_areas a +JOIN ref_geo.bib_areas_types b USING (id_type) +WHERE b.type_code = 'DEP' AND a.area_code = '38' +''' +reg = gpd.read_postgis(sql,con) + + +df = gpd.read_file(path0+path+file) +if file == 'N_ENP_PN_S_000.shp': + if id_type == 20: + df = df[df.CODE_R_ENP == 'AAPN'] + elif id_type == 1: + df = df[df.CODE_R_ENP == 'CPN'] + else: + raise Exception('ERROR N_ENP_PN_S_000.shp') +if file == 'N_ENP_SCEN_S_000.shp': + df = df[df.ID_LOCAL.str.contains('CENIS')] +if file != 'N_ENP_SCEN_S_000.shp': + intersect = gpd.sjoin(df,reg,op='intersects').index + df = df[df.index.isin(intersect)] + + +# SPEC t_river_basin +# df.reset_index(drop=True,inplace=True) +# df.reset_index(drop=False,inplace=True) +# df.loc[df['index']==0,'index'] = 28 +# df.loc[df['index']==1,'index'] = 29 +# df['lib_ssbv'] = df['lib_ssbv']+' - '+df['code_ssbv'] + +df.rename_geometry('geom', inplace=True) +df.dropna(how='all',axis=1,inplace=True) +df = pycen.tools.Polygons_to_MultiPolygon(df) +df.rename(columns=dic, inplace=True) +# df['geom'] = df.geometry.to_wkt().copy() +del_col = df.columns[~df.columns.isin(['geom',*[*dic.values()]])] +del_col = df.columns[~df.columns.isin([*[*dic.values()]])] +df.drop(columns=del_col, inplace=True) +df['id_type'] = id_type + +update_larea(df,cols_updt=['area_code']) + +# SPEC t_river_basin / t_hydro_area +# df.to_crs(4326,inplace=True) +# df.geom = df.geom.to_wkt().copy() +# df.to_wkt().to_sql( +# name='t_river_basin', con=con, schema='pr_zh', +# if_exists='append', index=False, +# dtype={ +# 'geom': Geometry(srid=4326) +# # 'id_lim_list':uuid.SafeUUID +# } +# ) + +df['geojson_4326'] = df.to_crs(4326).geom.__geo_interface__['features'] +df['geojson_4326'] = [x['geometry'] for x in df['geojson_4326']] +# df['geojson_4326'] = df['geojson_4326'].astype(str) +df['centroid'] = df.geom.centroid.to_wkt() +df['enable'] = True +df['source'] = 'IGN' + +df.to_postgis( + name='l_areas', + con=con, + schema='ref_geo', + if_exists='append', + index=False, + index_label=None, + chunksize=None, + dtype={ + 'centroid': Geometry(geometry_type='POINT',srid=2154) + }, +) + + +sql = ''' +SELECT a.id_area,a.area_name,a.geom FROM ref_geo.l_areas a +JOIN ref_geo.bib_areas_types b USING (id_type) +WHERE b.type_code = 'COM' +''' +tu = gpd.read_postgis(sql,con_gn) + +sql = ''' +SELECT a.id_zh,a.code, +ST_MakeValid(ST_Transform(ST_SetSRID(a.geom,4326),2154)) geom +FROM pr_zh.t_zh a +--JOIN ref_geo.bib_areas_types b USING (id_type) +--WHERE b.type_code = 'COM' +''' +to = gpd.read_postgis(sql,con_gn) \ No newline at end of file diff --git a/5_GEONATURE/insert_utilisateurs.py b/5_GEONATURE/insert_utilisateurs.py new file mode 100644 index 0000000..7b9389a --- /dev/null +++ b/5_GEONATURE/insert_utilisateurs.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from sqlalchemy import dialects +from pycen import con_gn +import pandas as pd +import json +import ast +import os + +DIR = '/home/colas/Documents/9_PROJETS/6_GEONATURE' +FILE = 'IMPORTS/OBSERVATEURS/Table Observateurs Platière.csv' + + +def to_organismes(df): + sch = 'utilisateurs' + tab = 'bib_organismes' + org = pd.read_sql_table(tab,con_gn,sch) + df = df[~df.nom_organisme.isin(org.nom_organisme)] + df.to_sql( + tab,con_gn,sch, + if_exists='append', + index=False) + + +def get_organismes(LIST): + sch = 'utilisateurs' + tab = 'bib_organismes' + formlist = LIST.replace({"'":"''"},regex=True) + formlist = str(tuple(formlist)).replace('"',"'") + sql = ''' + SELECT id_organisme,nom_organisme FROM {sch}.{tab} + WHERE nom_organisme IN {n_org} + ;'''.format(sch=sch,tab=tab,n_org=formlist) + + return pd.read_sql_query(sql, con_gn) + + +def to_roles(df): + sch = 'utilisateurs' + tab = 't_roles' + dtypes = {} + if 'champs_addi' in df.columns: + dtypes = {**dtypes,'champs_addi':dialects.postgresql.JSONB} + + df.to_sql( + tab,con_gn,sch, + if_exists='append', + index=False, + dtype=dtypes) + + +if __name__ == "__main__": + + df = pd.read_csv(os.path.join(DIR,FILE)) + df = df[df.rqe!='à ne pas importer'] + obs = df[['organisme_long']]\ + .drop_duplicates()\ + .rename(columns={'organisme_long':'nom_organisme'}) + to_organismes(obs) + + org = get_organismes(obs.nom_organisme) + dic_org = dict(zip(org.nom_organisme,org.id_organisme)) + df.organisme_long.replace(dic_org,inplace=True) + + role = df[['nom','prenom','organisme_long','statut','poste']]\ + .rename(columns={ + 'nom':'nom_role', + 'prenom':'prenom_role', + 'organisme_long':'id_organisme', + }) + role['champs_addi'] = json.loads( + role[['statut','poste']].to_json(orient="records",force_ascii=False) + ) + role.drop(columns=['statut','poste'],inplace=True) + to_roles(role) diff --git a/5_GEONATURE/pivot_bdc_status.py b/5_GEONATURE/pivot_bdc_status.py new file mode 100644 index 0000000..850d5c0 --- /dev/null +++ b/5_GEONATURE/pivot_bdc_status.py @@ -0,0 +1,78 @@ +import pandas as pd +import numpy as np +from pycen import con_gn + + +NAME_OUT = '/home/colas/Documents/tmp/v_bdc_status2.xlsx' +FileIn = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/BDC_STATUTS_15.xls' +db = False + +if db : + sql = "SELECT * FROM taxonomie.v_bdc_status" + df = pd.read_sql_query(sql,con_gn) +if FileIn is not None : + df = pd.read_excel(FileIn) + df.columns = df.columns.str.lower() + geo = ['Isère','Rhône-Alpes','France','France métropolitaine',np.nan] + df = df[df.lb_adm_tr.isin(geo)] + + + +df.drop(columns=['full_citation'],inplace=True) +df.replace({ + r'[’]':"'", + r'[àáâãäå]':'a', + r'[èéêë]':'e', + r'[ìíîï]':'i', + r'[òóôõö]':'o', + r'[ùúûü]':'u', + r'[–]':"-" +},regex=True,inplace=True) + +DF = df.copy() + +# ['cd_nom', 'cd_ref', 'rq_statut', 'code_statut', 'label_statut', +# 'cd_type_statut', 'thematique', 'lb_type_statut', 'regroupement_type', +# 'cd_st_text', 'cd_sig', 'cd_doc', 'niveau_admin', 'cd_iso3166_1', +# 'cd_iso3166_2', 'doc_url', 'type_value'] +pivot = pd.pivot_table( + DF, + values='code_statut', + index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) +for c in pivot.columns: + pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]] +pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']] +pivot.DH.replace({'CDH':''},regex=True,inplace=True) + +pivlib = pd.pivot_table( + DF, + values='label_statut', + index=['cd_nom', 'cd_ref'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) +for c in pivlib.columns: + pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]] +pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']] +pivlib.DH.replace({'CDH':''},regex=True,inplace=True) + +print('INIT writer') +with pd.ExcelWriter(NAME_OUT) as writer: + DF.to_excel( + writer,sheet_name='v_bdc_status' + ) + # writer.save() + print('v_bdc_status OK !') + pivot.to_excel( + writer,sheet_name='pivot_table' + ) + # writer.save() + print('pivot_table OK !') + pivlib.to_excel( + writer,sheet_name='pivot_libel' + ) + writer.save() + print('pivot_libel OK !') diff --git a/5_GEONATURE/pivot_bdc_status_v2.py b/5_GEONATURE/pivot_bdc_status_v2.py new file mode 100644 index 0000000..46edf58 --- /dev/null +++ b/5_GEONATURE/pivot_bdc_status_v2.py @@ -0,0 +1,78 @@ +from pycen import con_gn +import numpy as np +import pandas as pd + +# Liste des CD_NOM en entrée +taxlist = pd.read_excel('/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/CG/BOCA_CD_nom.xlsx') + +sql = """ + SELECT + t.cd_nom, + t.cd_ref, + t.regne, + t.phylum, + t.classe, + t.ordre, + t.famille, + t.group1_inpn, + t.group2_inpn, + t.nom_vern, + t.nom_complet, + t.nom_valide, + t.lb_nom, + --s.* + s.code_statut, + s.cd_type_statut, + s.label_statut + FROM taxonomie.v_taxref_all_listes t + JOIN taxonomie.v_bdc_status s USING (cd_nom) + WHERE t.cd_nom IN {cd_nom} +;""".format(cd_nom = tuple(taxlist.CD_NOM.astype(str)) ) +df = pd.read_sql_query(sql,con_gn) +df.to_csv('/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/LC/BOCA_CD_NOM_STATUS.csv') + +pivot = pd.pivot_table( + df, + values='code_statut', + index=['cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) + +for c in pivot.columns: + pivot[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivot[c]] +pivot['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivot['DH']] +pivot.DH.replace({'CDH':''},regex=True,inplace=True) + + +pivlib = pd.pivot_table( + df, + values='label_statut', + index=['cd_nom', 'cd_ref','lb_nom'#,'niveau_admin','lb_adm_tr' + ], + columns=['cd_type_statut'], + aggfunc=list,fill_value=None) +for c in pivlib.columns: + pivlib[c] = [x[0] if x is not np.NaN and len(x)==1 else x for x in pivlib[c]] +pivlib['DH'] = [','.join(x) if (x is not np.NaN) and (len(x)==2) else x for x in pivlib['DH']] +pivlib.DH.replace({'CDH':''},regex=True,inplace=True) + + +print('INIT writer') +NAME_OUT = '/media/colas/SRV/FICHIERS/TRANSFERTS-EQUIPE/LC/BOCA_CD_NOM_STATUS.xlsx' +with pd.ExcelWriter(NAME_OUT) as writer: + df.to_excel( + writer,sheet_name='v_bdc_status' + ) + # writer.save() + print('v_bdc_status OK !') + pivot.to_excel( + writer,sheet_name='pivot_table' + ) + # writer.save() + print('pivot_table OK !') + pivlib.to_excel( + writer,sheet_name='pivot_libel' + ) + writer.save() + print('pivot_libel OK !') diff --git a/5_GEONATURE/source_zone.py b/5_GEONATURE/source_zone.py new file mode 100644 index 0000000..3ac5476 --- /dev/null +++ b/5_GEONATURE/source_zone.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from owslib.wfs import WebFeatureService +import geopandas as gpd +from geoalchemy2 import Geometry +from sqlalchemy import dialects + + + +def get_wfs(url, layer, bbox=None): + from geopandas import read_file + from requests import Request + + wfs = WebFeatureService(url=url) + item = dict(wfs.items())[layer] + crs = item.crsOptions[0].getcode() + + params = dict(service='WFS', version=wfs.version, request='GetFeature', + typeName=layer) + q = Request('GET', url, params=params).prepare().url + + data = read_file(q, bbox=bbox) + data.set_crs(crs=crs, inplace=True) + + if crs != 'EPSG:2154': + data.to_crs(epsg=2154, inplace=True) + + return data + + +def list_layer(url): + wfs = WebFeatureService(url=url) + lst = list(wfs.contents) + return lst + + +def Polygons_to_MultiPolygon(df,geom_col=None): + from shapely.geometry import MultiPolygon + from pandas import concat + + if not geom_col: + geom_col = df.geometry.name + + df = df.copy() + multi = df.loc[df.geom_type=='MultiPolygon'].copy() + poly = df.loc[df.geom_type=='Polygon'].copy() + poly[geom_col] = [MultiPolygon([geom]) for geom in df.loc[df.geom_type=='Polygon',geom_col] ] + df = concat([multi,poly]) + df.sort_index(inplace=True) + return df + + +def ref_geo(type_code,con): + sql = """ + SELECT l.* FROM ref_geo.l_areas l + JOIN ref_geo.bib_areas_types bib USING (id_type) + WHERE bib.type_code='{typ}' + """.format(typ=type_code) + return gpd.read_postgis(sql,con) + + +def to_lareas(df,dic,layer,con,dtypes={}): + + id_type = gpd.pd.read_sql_query(""" + SELECT id_type FROM ref_geo.bib_areas_types WHERE type_code='%s' + """%layer,con).id_type.values[0] + + ref = ref_geo(layer,con) + + df.rename(columns=dic, inplace=True) + df = Polygons_to_MultiPolygon( + df[~df.area_name.isin(ref.area_name)] + ) + df.rename_geometry('geom', inplace=True) + del_col = df.columns[~df.columns.isin(['geom',*[*set(dic.values())]])] + df.drop(columns=del_col, inplace=True) + + df['id_type'] = id_type + df['geojson_4326'] = df.to_crs(4326).geom.__geo_interface__['features'] + df['geojson_4326'] = [x['geometry'] for x in df['geojson_4326']] + df['centroid'] = 'SRID=2154;'+df.geom.centroid.to_wkt() + df['enable'] = True + + if df.empty: + print('AUCUN nouveaux zonages identifiés') + else: + df.to_postgis( + name='l_areas', + con=con, + schema='ref_geo', + if_exists='append', + index=False, + index_label=None, + chunksize=None, + dtype={ + 'centroid': Geometry(geometry_type='POINT',srid=2154), + **dtypes + }, + ) + print('INSERT %i zones'%df.shape[0]) + + + +def update_larea(con,layer,df,cols_updt=[],on='area_name'): + from pycen import update_to_sql + table = 'l_areas' + # idtyp = tuple(df.id_type.unique()) + + idtyp = gpd.pd.read_sql_query(""" + SELECT id_type FROM ref_geo.bib_areas_types WHERE type_code='%s' + """%layer,con).id_type.values[0] + + # if len(idtyp) > 1: + # where = 'in %s'%tuple(df.id_type.unique()) + # else : + where = '=%i'%idtyp + + sql = 'SELECT id_area, %s FROM ref_geo.l_areas WHERE id_type %s;'%(on,where) + larea = gpd.pd.read_sql_query(sql,con) + + to_updt = df.merge(larea, on=on) + update_to_sql( + df=to_updt[['id_area',*cols_updt]], + con=con, + table_name=table, + schema_name='ref_geo', + key_name='id_area', + ) + + +carmen = 'https://ws.carmencarto.fr/WFS/119/fxx_inpn?' +dic_layer = { + # ref_geo.bib_areas_types.type_code : layer_name from list_layer + 'ZNIEFF2':'Znieff2', + 'ZNIEFF1':'Znieff1', + 'APB':'Arretes_de_protection_de_biotope', + 'RNN':'Reserves_naturelles_nationales', + 'RNR':'Reserves_naturelles_regionales', + 'ZPS':'Zones_de_protection_speciale', + 'SIC':'Sites_d_importance_communautaire', + 'ZICO':'ZICO', + 'RNCFS':'Reserves_nationales_de_chasse_et_faune_sauvage', + 'RIPN':'Reserves_Integrales_de_Parcs_Nationaux', # NO zone in 74 + 'SCEN':'Terrains_acquis_des_Conservatoires_des_espaces_naturels', # Pas fait ; + # 'SCL':'', + # 'PNM':'', + 'PNR':'Parcs_naturels_regionaux', + 'RBIOL':'Reserves_biologiques', + 'RBIOS':'Reserves_de_la_biosphere', # NO zone in 74 + # 'RNC':'', + 'SRAM':'Sites_Ramsar', + # 'AA':'', + # 'ZSC':'', + # 'PSIC':'', + # 'PEC':'', + # ... +} + +if __name__ == "__main__": + from sqlalchemy import create_engine # pour lecture de la bd Géonature + from sqlalchemy.engine import URL + usr = 'geonatadmin' + pdw='g;gCEN74' + host='178.33.42.38' + bd ='geonature2db' + + eng = URL.create('postgresql+psycopg2',username=usr,password=pdw,host=host,database=bd) + conn = create_engine(eng) + # from pycen import con_gn as conn + + ref = ref_geo('DEP',conn) + ter = ref[ref.area_code=='74'] + + layer = 'Znieff1' + # layer_name = dic_layer[layer] + zon = get_wfs(carmen,layer,bbox=ter.unary_union) + zon = zon[zon.intersects(ter.unary_union)] + + dic_cols = { + 'ID_MNHN':'area_code', + 'NOM':'area_name', + 'URL':'source', + } + + dtyp = {} + if layer in ['ZPS','SIC']: + dic_cols = {**dic_cols,**{'SITECODE':'area_code','SITENAME':'area_name',}} + elif layer == 'RNN': + dic_cols = {**dic_cols,**{'URL_FICHE':'source','NOM_SITE':'area_name',}} + elif layer == 'ZICO': + dic_cols = {**dic_cols,**{'ID_SPN':'area_code'}} + elif layer == 'SCEN': + zon.NOM = zon.NOM.str.title() + elif layer == 'RBIOL': + dic_cols = { + **dic_cols, + **{'URL_FICHE':'source','NOM_SITE':'area_name', + 'comment':'comment','additional_data':'additional_data',} + } + zon['comment'] = 'Réserves biologiques ' + zon.CODE_R_ENP.replace( + {'I': 'intégrale','D' : 'dirigée'} + ) + zon['additional_data'] = [ + str({ zon.CODE_R_ENP.name : x }).replace("\'",'\"') + for x in zon.CODE_R_ENP + ] + dtyp = {'additional_data':dialects.postgresql.JSONB} + + + # IF UPDATE DATA + # update_larea(conn,layer,zon.rename(columns=dic_cols),['comment'],on='area_code') + + + to_lareas( + df=zon, + dic=dic_cols, + layer=layer, + con=conn, + dtypes=dtyp + ) + diff --git a/6_SQL/foreign_table.py b/6_SQL/foreign_table.py new file mode 100644 index 0000000..7e148d1 --- /dev/null +++ b/6_SQL/foreign_table.py @@ -0,0 +1,121 @@ +from pycen import con + + +url = 'https://wxs.ign.fr/agriculture/geoportail/wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetCapabilities' +layer = 'RPG.2020:parcelles_graphiques.title' +wpr_name = 'rpgwfs' +tab_name = 'rpg2020_parcellaire_graphique' + +drop_fgn = 'DROP SERVER IF EXISTS %s CASCADE;'%wpr_name +fgn_data_wrapper = ''' +CREATE SERVER IF NOT EXISTS %s + FOREIGN DATA WRAPPER ogr_fdw + OPTIONS (datasource 'WFS:%s', format 'WFS') +'''%(wpr_name,url) + +table = ''' +DROP FOREIGN TABLE IF EXISTS %s; +CREATE FOREIGN TABLE IF NOT EXISTS flux_geo.%s ( + id_parcel varchar, + surf_parcel float, + code_cultu varchar, + code_group varchar, + culture_d1 varchar, + culture_d2 varchar, + geom public.geometry(multipolygon, 4326) +) +SERVER %s +OPTIONS (layer '%s'); +'''%(tab_name,tab_name,wpr_name,layer) + +with con.begin() as cnx: + cnx.execute(drop_fgn) +with con.begin() as cnx: + cnx.execute(fgn_data_wrapper) + +with con.begin() as cnx: + cnx.execute(table) + +####### +####### +from pycen import con_fon as con +from sqlalchemy import text +db = 'azalee' +host = '91.134.194.221' +port = '5432' +srv_name = 'fdw_azalee' +tab_name = 'inventaire_zh' +sch_name = 'inventaires' +user = 'cen_admin' +pwd = '#CEN38@venir' + + +create_ext = 'CREATE EXTENSION IF NOT EXISTS postgres_fdw;' +drop_fgn = 'DROP SERVER IF EXISTS %s CASCADE;'%srv_name +fgn_data_wrapper = ''' +CREATE SERVER IF NOT EXISTS %s + FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '%s', host '%s', port '%s') +'''%(srv_name,db,host,port) + +fgn_user_wrapper = ''' +CREATE USER MAPPING IF NOT EXISTS FOR %s + SERVER %s + OPTIONS (user '%s', password '%s'); +'''%(user,srv_name,user,pwd) + +table = ''' +CREATE FOREIGN TABLE IF NOT EXISTS {sch_name}.{tab_name} ( + site_code varchar(10), + nom varchar, + auteur_site varchar, + auteur_geom varchar, + auteur_last_maj varchar, + date_site date, + date_geom date, + date_last_maj date, + type_milieu varchar, + type_site varchar, + typo_sdage varchar, + rmq_site text, + rmq_fct_majeur text, + rmq_interet_patri text, + rmq_bilan_menace text, + rmq_orient_act text, + rmq_usage_process text, + code_cb varchar, + lib_cb text, + activ_hum varchar, + impact varchar, + "position" varchar, + rmq_activ_hum text, + connexion varchar, + subm_orig varchar, + subm_freq varchar, + subm_etend varchar, + fct_bio varchar, + fct_hydro varchar, + int_patri varchar, + "val_socioEco" varchar, + crit_delim varchar, + crit_def_esp varchar, + entree_eau_reg varchar, + entree_eau_perm varchar, + entree_eau_topo varchar, + sortie_eau_reg varchar, + sortie_eau_perm varchar, + sortie_eau_topo varchar, + geom geometry(geometry,2154) +) + SERVER {fgn_server} + OPTIONS (schema_name 'zones_humides', table_name 'v_zoneshumides'); +'''.format( +sch_name=sch_name, tab_name=tab_name, fgn_server=srv_name +) + +with con.begin() as cnx: + cnx.execute(create_ext) + cnx.execute(drop_fgn) + cnx.execute(fgn_data_wrapper) + cnx.execute(fgn_user_wrapper) + cnx.execute(text(table)) \ No newline at end of file diff --git a/6_SQL/manage_user.py b/6_SQL/manage_user.py new file mode 100644 index 0000000..aa2c3ca --- /dev/null +++ b/6_SQL/manage_user.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from sqlalchemy.engine import URL +from sqlalchemy import create_engine,text +import pandas as pd +from pycen import con_bdcen,con_fon + +# Parametres bdd +# user = 'cen_admin' +# pwd = '#CEN38@venir' +# user = 'admin_ra' +# pwd = 'adminRAsig' + +# adr = '91.134.194.221' +# port = '5432' +# base = 'bd_cen38' + +# url = URL.create('postgresql+psycopg2', +# username=user, +# password=pwd, +# host=adr, +# database=base, +# ) +# con = create_engine(url) + +usr = { + 'usr_siege': ['abavarot','aguedou','apagano','cgeier','dlopez-pinot','dmichallet','jlgrossi','mjuton','msimean','nbiron'], + 'usr_glps': ['lquay','gmaillet','jlucas'], + 'usr_plt' : ['bpont','yprat-mairet','clebreton','cfregat','mbounous'] +} + + +def revoke_group(con,user,group): + alter = 'ALTER GROUP "{grp}" DROP USER "{usr}";' + with con.begin() as cnx: + cnx.execute(alter.format(usr=user,grp=group)) + + +def add_group(con,user,group): + alter = 'ALTER GROUP "{grp}" ADD USER "{usr}";' + with con.begin() as cnx: + cnx.execute(alter.format(usr=user,grp=group)) + + +def deactivate_user(con,user): + alter = 'ALTER USER "{usr}" WITH NOLOGIN;' + with con.begin() as cnx: + cnx.execute(alter.format(usr=user)) + + +def add_limit_user(con,user,date_limit): + alter = """ALTER USER "{usr}" VALID UNTIL '{date} 00:00:00';""" + with con.begin() as cnx: + cnx.execute(alter.format(usr=user,date=date_limit)) + + +def create_grp(con,grp): + sql = """CREATE ROLE {grp} WITH + NOSUPERUSER + NOCREATEDB + NOCREATEROLE + INHERIT + NOLOGIN + NOREPLICATION + NOBYPASSRLS + CONNECTION LIMIT -1;""".format(grp=grp) + with con.begin() as cnx: + cnx.execute(sql) + +# sql = "select * from pg_catalog.pg_user" # where tableowner = 'gpasquier'" + +for grp in usr.keys(): + # create grp_role + create_grp(con_fon,grp) + + # create usr + for user in usr[grp]: + sql = """ + DO + $do$ + BEGIN + IF NOT EXISTS ( + SELECT FROM pg_catalog.pg_roles + WHERE rolname = '{usr}') THEN + + CREATE ROLE "{usr}" LOGIN PASSWORD '{usr}'; + END IF; + GRANT {grp} TO "{usr}" ; + END $do$""".format(usr=user,grp=grp) + with con.begin() as cnx: + cnx.execute(text(sql)) + + +# grant grp_role +for grp in usr.keys(): + sql = """GRANT grp_consult TO {usr} ;""".format(usr=grp) + with con.begin() as cnx: + cnx.execute(sql) + diff --git a/6_SQL/memo.py b/6_SQL/memo.py new file mode 100644 index 0000000..e8dfc79 --- /dev/null +++ b/6_SQL/memo.py @@ -0,0 +1,13 @@ + +sql = ''' +CREATE TABLE IF NOT EXISTS sites.hist_id_site ( + id serial4 NOT NULL, + id_site_old varchar(10) NOT NULL, + id_site_new varchar(10) NOT NULL, + "date" timestamp NOT NULL DEFAULT NOW(), + CONSTRAINT hist_id_site_pkey PRIMARY KEY (id), + CONSTRAINT hist_id_site_un UNIQUE (id_site_old, id_site_new) +);''' + +with con.begin() as cnx: + cnx.execute(sql) \ No newline at end of file diff --git a/6_SQL/trigger.py b/6_SQL/trigger.py new file mode 100644 index 0000000..a282f87 --- /dev/null +++ b/6_SQL/trigger.py @@ -0,0 +1,50 @@ + +from pycen import con +# import pycen + +s1 = '''create trigger get_communnes_intersects after +insert + or +delete + or +update + on + sites.r_sites_geom for each row execute function sites.refresh_site_comm()''' + +s2 = '''CREATE OR REPLACE FUNCTION sites.refresh_site_comm() + RETURNS trigger + LANGUAGE plpgsql +AS $function$ + BEGIN + with t1 as ( + SELECT + id, + nom_com, + insee_com, + geom + FROM flux_geo.vm_communes_isere + ), + t2 as (SELECT + s.id_site, + json_object_agg(t1.insee_com,t1.nom_com) all_comm, + max(s.date) date, + s.geom + -- FROM s1 s, t1 + FROM sites.r_sites_geom s, t1 + WHERE st_intersects(s.geom,t1.geom) + and s.id_site = NEW.id_site + GROUP BY s.id_site,s.geom,s.date + ORDER BY s.id_site, s.date DESC) + UPDATE sites.sites SET nom_com_json = t2.all_comm + FROM t2 + WHERE id = t2.id_site; + IF NOT FOUND THEN + UPDATE sites.sites SET nom_com = null WHERE id = t2.id_site; + END IF; + RETURN NEW; + END; +$function$ +;''' +with con.begin() as cnx: + cnx.execute(s1) + cnx.execute(s2) \ No newline at end of file diff --git a/6_SQL/vm.py b/6_SQL/vm.py new file mode 100644 index 0000000..365f7d2 --- /dev/null +++ b/6_SQL/vm.py @@ -0,0 +1,22 @@ +from pycen import con + +tab = 'rpg2020_ilots_anonymes_reg' +vm_tab = 'vm_'+tab[:-3]+'isere' + + +vm = ''' +CREATE MATERIALIZED VIEW ref_territoire.{vm_name} +TABLESPACE pg_default +AS SELECT s.* + FROM ref_territoire.{table} s, ref_territoire.dept_isere reg + WHERE ST_INTERSECTS(s.geom,reg.geom) +WITH DATA; + +-- View indexes: +CREATE UNIQUE INDEX {vm_name}_idx ON ref_territoire.{vm_name} USING btree (id_ilot); +CREATE INDEX {vm_name}_geom ON ref_territoire.{vm_name} USING gist (geom); +'''.format(vm_name=vm_tab,table=tab) + +with con.begin() as cnx: + cnx.execute(vm) + diff --git a/7_LOGEPROJ/funcs_macro (pyxll).py b/7_LOGEPROJ/funcs_macro (pyxll).py new file mode 100644 index 0000000..6fb8802 --- /dev/null +++ b/7_LOGEPROJ/funcs_macro (pyxll).py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +from pyxll import xl_func + +@xl_func +def python_func(): + return "Hello from Python !" + diff --git a/7_LOGEPROJ/funcs_macro (xlwings).py b/7_LOGEPROJ/funcs_macro (xlwings).py new file mode 100644 index 0000000..f351301 --- /dev/null +++ b/7_LOGEPROJ/funcs_macro (xlwings).py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +import xlwings as xw +import pandas as pd + +wb = xw.Book() # this will open a new workbook +wb = xw.Book('FileName.xlsx') # connect to a file that is open or in the current working directory +sheet = wb.sheets['Sheet1'] + + +df = pd.DataFrame([[1,2], [3,4]], columns=['a', 'b']) +sheet['A1'].value = df +sheet['A1'].options(pd.DataFrame, expand='table').value + + +@xw.func +@xw.arg('x', pd.DataFrame) +def correl2(x): + # x arrives as DataFrame + return x.corr() \ No newline at end of file diff --git a/7_LOGEPROJ/recup_backups.py b/7_LOGEPROJ/recup_backups.py new file mode 100644 index 0000000..f0d1c4b --- /dev/null +++ b/7_LOGEPROJ/recup_backups.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 + +# PLT 2022-03-29 +# Script de téléchargement des sauvegardes des bases de données +# logeproj sur le serveur SFTP + +from contextlib import nullcontext +import argparse +import subprocess +import shutil +import datetime +from time import strftime +from uuid import SafeUUID +import paramiko +import getpass + + +def demande_nom_utilisateur(p_hote): + return str(input(f"Entrer le nom d'utilisateur de connexion à {p_hote} :")) + + +def demande_mot_de_passe(p_hote, p_utilisateur): + return getpass.getpass( + f"Entrer le mot de passe de l'utilisateur {p_utilisateur} pour le serveur {p_hote} :" + ) + + +def verifie_ping_host(p_hote): + commande_ping = ["ping", f"{p_hote}", "-c1"] + commande_ping_run = subprocess.run(commande_ping, stdout=None) + commande_returncode = commande_ping_run.returncode + if commande_returncode != 0: + return False + else: + return True + + +def verifie_presence_commande_sftp(): + return shutil.which("sftp") + + +def effectue_verifications(p_hote): + if not verifie_ping_host(p_hote): + print("Erreur : hôte injoignable") + exit(0) + if not verifie_presence_commande_sftp(): + print("Erreur : commande sftp introuvable") + exit(0) + + +def demande_infos_utilisateur(p_hote): + utilisateur = demande_nom_utilisateur(p_hote) + mdp = "" + if not utilisateur: + print("Erreur : aucun nom d'utilisateur saisi") + exit(0) + else: + mdp = demande_mot_de_passe(p_hote, utilisateur) + return utilisateur, mdp + + +def etablit_connexion_SFTP(p_hote, p_port, p_utilisateur, p_motdepasse): + connexion_ssh = paramiko.SSHClient() + connexion_ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + connexion_ssh.connect(p_hote, p_port, p_utilisateur, p_motdepasse) + return connexion_ssh.open_sftp() + + +def clos_connexion_SFTP(p_connexion_SFTP): + p_connexion_SFTP.close() + + +def cherche_chemin_rep_sauvegardes_du_jour(p_connexion_sftp, p_rep_racine, p_date): + liste_rep_backups_du_jour = [] + date_backup = "1900-01-01" + rep_split = ["1900", "01", "01"] + + p_connexion_sftp.chdir(p_rep_racine) + + for rep in p_connexion_sftp.listdir(): + rep_split = rep.split("-") + date_backup = f"{rep_split[0]}-{rep_split[1]}-{rep_split[2]}" + + if date_backup == p_date.strftime("%Y-%m-%d"): + liste_rep_backups_du_jour.append(rep) + + nombre_de_rep = len(liste_rep_backups_du_jour) + + if nombre_de_rep > 1: + print( + f"Le serveur de sauvegarde dispose de plusieurs répertoires de sauvegardes datés du {p_date.strftime('%d/%m/%Y')} :" + ) + for indice in range(0, nombre_de_rep): + numero_rep = indice + 1 + print(f"- {numero_rep} : {liste_rep_backups_du_jour[indice]}") + choix = int( + input( + "Entrer le numéro du répertoire dont vous souhaitez télécharger les sauvegardes : " + ) + ) + rep_sauvegarde_du_jour = liste_rep_backups_du_jour[choix - 1] + else: + rep_sauvegarde_du_jour = liste_rep_backups_du_jour[0] + + chemin_sauvegardes_du_jour = f"/{p_rep_racine}/{rep_sauvegarde_du_jour}" + return chemin_sauvegardes_du_jour + + +def telecharge_un_fichier( + p_chemin_fichier_distant, p_chemin_fichier_local, p_connexionSFTP +): + print( + f"- Téléchargement de {p_chemin_fichier_distant} dans {p_chemin_fichier_local}" + ) + p_connexionSFTP.get(p_chemin_fichier_distant, p_chemin_fichier_local) + + +def telecharge_toutes_sauvegardes( + p_chemin_rep_distant_sauvegardes, p_chemin_rep_local, p_connexionSFTP +): + print( + f"Téléchargement de toutes les sauvegardes de {p_chemin_rep_distant_sauvegardes}" + ) + liste_fichiers = p_connexionSFTP.listdir(p_chemin_rep_distant_sauvegardes) + for sauvegarde in liste_fichiers: + sauvegarde_split = sauvegarde.split("-") + if len(sauvegarde_split) > 1: + if sauvegarde_split[1] == "logeproj": + chemin_sauvegarde_locale = f"{p_chemin_rep_local}/{sauvegarde}" + chemin_sauvegarde_distante = ( + f"{p_chemin_rep_distant_sauvegardes}/{sauvegarde}" + ) + telecharge_un_fichier( + chemin_sauvegarde_distante, + chemin_sauvegarde_locale, + p_connexionSFTP, + ) + + +def main(): + print("recup_base.py : téléchargement des bases sur le serveur SFTP") + + aujourdhui = datetime.date.today() + chemin_rep_local_sauvegarde = "./backups" + sftp_hote = "ftpmdv.myriadev.fr" + sftp_port = "22060" + sftp_utilisateur = "cen-isere" + sftp_mot_de_passe = "gaic4Ao'Do1O" + sftp_rep_racine_sauvegardes = "pg_db_backup" + + parser = argparse.ArgumentParser() + parser.add_argument( + "-b", + "--base", + required=False, + help="nom de la base à télécharger (si non fourni, le script télécharge toutes les bases", + ) + + arguments = parser.parse_args() + + effectue_verifications(sftp_hote) + sftp_utilisateur, sftp_mot_de_passe = demande_infos_utilisateur(sftp_hote) + + connexion_sftp = etablit_connexion_SFTP( + sftp_hote, sftp_port, sftp_utilisateur, sftp_mot_de_passe + ) + chemin_rep_sauvegardes_du_jour = cherche_chemin_rep_sauvegardes_du_jour( + connexion_sftp, sftp_rep_racine_sauvegardes, aujourdhui + ) + + if arguments.base is None: + # Télécharger toutes les sauvegardes + telecharge_toutes_sauvegardes( + chemin_rep_sauvegardes_du_jour, chemin_rep_local_sauvegarde, connexion_sftp + ) + else: + formats = ["custom", "sql.gz"] + for format in formats: + # Télécharger la base fournie en argument au format custom + chemin_sauvegarde_distante = ( + f"{chemin_rep_sauvegardes_du_jour}/{arguments.base}-logeproj-centrale.{format}" + ) + chemin_sauvegarde_locale = ( + f"{chemin_rep_local_sauvegarde}/{arguments.base}-logeproj-centrale.{format}" + ) + telecharge_un_fichier( + chemin_sauvegarde_distante, chemin_sauvegarde_locale, connexion_sftp + ) + + clos_connexion_SFTP(connexion_sftp) + + +if __name__ == "__main__": + main() diff --git a/7_LOGEPROJ/restaure_sauvegarde.py b/7_LOGEPROJ/restaure_sauvegarde.py new file mode 100644 index 0000000..0fd0873 --- /dev/null +++ b/7_LOGEPROJ/restaure_sauvegarde.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 + +import argparse +import pathlib +import subprocess +import os +import psycopg2 +from pathlib import Path + +from sqlalchemy.engine import URL +from sqlalchemy import create_engine +import paramiko +import datetime + + +def etablit_connexion_SFTP(p_hote, p_port, p_utilisateur, p_motdepasse): + connexion_ssh = paramiko.SSHClient() + connexion_ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + connexion_ssh.connect(p_hote, p_port, p_utilisateur, p_motdepasse) + return connexion_ssh.open_sftp() + + +def creer_chaine_connexion(p_utilisateur, p_pass, p_serveur, p_port,p_base): + # return f"postgresql://{p_utilisateur}@{p_serveur}:{p_port}" + url_geo = URL.create('postgresql+psycopg2', + username = p_utilisateur, + password = p_pass, + host = p_serveur, + port = p_port, + database = p_base, + ) + return create_engine(url_geo) + + +def creer_base_de_donnee(p_base_a_creer, p_base_connexion, p_chaine_connexion): + requete_SQL = f'CREATE DATABASE "{p_base_a_creer}" WITH OWNER logeproj' + commande = [ + "psql", + f"{p_chaine_connexion}/{p_base_connexion}", + "-c", + f"{requete_SQL}", + ] + commande_run = subprocess.run(commande) + code_retour = commande_run.returncode + if code_retour != 0: + return False + else: + return True + + +def supprimer_base_de_donnee(p_base_a_supprimer, p_base_connexion, p_chaine_connexion): + requete_SQL = f'DROP DATABASE "{p_base_a_supprimer}"' + commande = [ + "psql", + f"{p_chaine_connexion}/{p_base_connexion}", + "-c", + f"{requete_SQL}", + ] + commande_run = subprocess.run(commande) + code_retour = commande_run.returncode + if code_retour != 0: + return False + else: + return True + + +def lister_bases_de_donnees(p_connexion): + tableau_bases = [] + requete_SQL = "SELECT datname FROM pg_database WHERE datistemplate = false;" + curseur = p_connexion + curseur.execute(requete_SQL) + for enregistrement in curseur: + tableau_bases.append(enregistrement[0]) + return tableau_bases + + +def restaurer_sauvegarde( + p_chemin_sauvegarde, p_base, p_base_connexion, p_chaine_de_connexion +): + # connexion = psycopg2.connect(p_chaine_de_connexion) + connexion = p_chaine_de_connexion + if p_base in lister_bases_de_donnees(connexion): + supprimer_base_de_donnee(p_base, p_base_connexion, p_chaine_de_connexion) + creer_base_de_donnee(p_base, p_base_connexion, p_chaine_de_connexion) + connexion.close() + commande = [ + "pg_restore", + "-d", + f"{p_chaine_de_connexion}/{p_base}", + p_chemin_sauvegarde, + ] + commande_run = subprocess.run(commande) + code_retour = commande_run.returncode + if code_retour != 0: + return False + else: + return True + + +def restaurer_toutes_sauvegardes( + p_chemin_sauvegarde, p_base_de_connexion, p_chaine_connexion +): + repertoire_des_sauvegardes = pathlib.Path(p_chemin_sauvegarde) + for sauvegarde in repertoire_des_sauvegardes.glob("*-logeproj-centrale.custom"): + sauvegarde.resolve() + chemin = str(sauvegarde) + fichier = sauvegarde.name + fichier_split = fichier.split("-") + base = f"{fichier_split[0]}-logeproj-test" + restaurer_sauvegarde(chemin, base, p_base_de_connexion, p_chaine_connexion) + + +def cherche_chemin_rep_sauvegardes_du_jour(p_connexion_sftp, p_rep_racine, p_date, p_format=None): + liste_rep_backups_du_jour = [] + date_backup = "1900-01-01" + rep_split = ["1900", "01", "01"] + + p_connexion_sftp.chdir("/"+p_rep_racine) + + for rep in p_connexion_sftp.listdir(): + rep_split = rep.split("-") + + if len(rep_split) < 3 : continue + if p_format is not None and rep.find(p_format) == -1: continue + + date_backup = f"{rep_split[0]}-{rep_split[1]}-{rep_split[2]}" + + if date_backup == p_date.strftime("%Y-%m-%d"): + liste_rep_backups_du_jour.append(rep) + + nombre_de_rep = len(liste_rep_backups_du_jour) + + + if nombre_de_rep > 1: + print( + f"Le serveur de sauvegarde dispose de plusieurs répertoires de sauvegardes datés du {p_date.strftime('%d/%m/%Y')} :" + ) + for indice in range(0, nombre_de_rep): + numero_rep = indice + 1 + print(f"- {numero_rep} : {liste_rep_backups_du_jour[indice]}") + choix = int( + input( + "Entrer le numéro du répertoire dont vous souhaitez télécharger les sauvegardes : " + ) + ) + rep_sauvegarde_du_jour = liste_rep_backups_du_jour[choix - 1] + else: + rep_sauvegarde_du_jour = liste_rep_backups_du_jour[0] + + chemin_sauvegardes_du_jour = f"/{p_rep_racine}/{rep_sauvegarde_du_jour}" + return chemin_sauvegardes_du_jour + + +def telecharge_un_fichier( + p_chemin_fichier_distant, p_chemin_fichier_local, p_connexionSFTP +): + print( + f"- Téléchargement de {p_chemin_fichier_distant} dans {p_chemin_fichier_local}" + ) + p_connexionSFTP.get(p_chemin_fichier_distant, p_chemin_fichier_local) + + +def clos_connexion_SFTP(p_connexion_SFTP): + p_connexion_SFTP.close() + + +def main(): + aujourdhui = datetime.date.today() + chemin_rep_local_sauvegarde = "./" + sftp_hote = "ftpmdv.myriadev.fr" + sftp_port = "22060" + sftp_utilisateur = "cen-isere" + sftp_mot_de_passe = "gaic4Ao'Do1O" + sftp_rep_racine_sauvegardes = 'cen-isere' + + repertoire_sauvegardes = "/cen-isere" + serveur_postgresql = "91.134.194.221" + port_postgresql = "5432" + utilisateur_postgresql = "cen_admin" + mot_de_passe_postgresql = "#CEN38\@venir" + base_de_connexion = "logeproj" + + # Si le mot de passe de postgre est stocké dans une variable d'environnement, plus + # besoin de le fournir par la suite + os.environ["PGPASSWORD"] = mot_de_passe_postgresql + + parser = argparse.ArgumentParser() + parser.add_argument( + "-b", "--base", + required=False, + help="nom de la base à restaurer (si non fourni, le script restaure toutes les bases", + ) + + + arguments = parser.parse_args() + arguments.base = 'logeproj' + + connexion_sftp = etablit_connexion_SFTP( + sftp_hote, sftp_port, sftp_utilisateur, sftp_mot_de_passe + ) + chemin_rep_sauvegardes_du_jour = cherche_chemin_rep_sauvegardes_du_jour( + connexion_sftp, sftp_rep_racine_sauvegardes, aujourdhui,'gz' + ) + + + # Télécharger la base fournie en argument au format custom + chemin_sauvegarde_distante = chemin_rep_sauvegardes_du_jour + chemin_sauvegarde_locale = ( + f"{chemin_rep_local_sauvegarde}"+chemin_rep_sauvegardes_du_jour.rsplit('/',1)[-1] + ) + telecharge_un_fichier( + chemin_sauvegarde_distante, chemin_sauvegarde_locale, connexion_sftp + ) + + clos_connexion_SFTP(connexion_sftp) + + chaine_de_connexion = creer_chaine_connexion( + utilisateur_postgresql,mot_de_passe_postgresql, serveur_postgresql, port_postgresql,base_de_connexion + ) + + if arguments.base is None: + # Restaurer toutes les sauvegardes + print(f'Restauration de toutes les sauvegardes') + restaurer_toutes_sauvegardes( + repertoire_sauvegardes, base_de_connexion, chaine_de_connexion + ) + else: + # Restaurer la base fournie en argument + base=f'{arguments.base}' + print(f'Restauration de {base}') + fichier_sauvegarde = chemin_sauvegarde_locale + path_sauvegarde = pathlib.Path(fichier_sauvegarde) + restaurer_sauvegarde( + p_chemin_sauvegarde = path_sauvegarde, + p_base = base, + p_base_connexion = base_de_connexion, + p_chaine_de_connexion = chaine_de_connexion, + ) + + +if __name__ == "__main__": + main() diff --git a/7_LOGEPROJ/test_xlwings.xlsx b/7_LOGEPROJ/test_xlwings.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d3e7a2991d87d58b13e6b314ccacef168aba9d27 GIT binary patch literal 4353 zcmaJ^2T+sS)(r%t1wm;7qEUKBqzNcUlh8$auZp3XP%gcO5~OzsC{3Dhk={Xy)KCI~ z^dd+RL_~>F!k=8X;srm<{58BkOSzblaW6x4XIzkFI$EffdBqq#IWE;{N*J4lN&Er zn4^uWtE13g&Q3phiXWux6rokdo*+ehvnCOdVjk=Y^}2H4f^o7sNMHB8(}l=9#wNFA zXv4k~R&@G6;_G%utaLtU!4qOot*CwxD^V}3-Ka|Tw^4*IH8H7y!Fc2zd z`?^2wwk}w0>N(Tri6={dXZ4o2aU66m%0%fD^4SI97MIx8sl@!uPpTbAAR&&J7|@De zVmi^ENTB&i!rA4=Jm`0s6`^g#@{=q$3tB)~Q-OxD@ql^SqW+9T6-GxSecxVc-s!>x z`$FCsMVdq@s>PNJZ<7U-<0ji{qg?%=PU{X?okpRsnV#)gTj}&0Bi9W{(@gFT30X~} zx8}bTpznv1Aw6YdU^y;W4{K zje<~pYI%dFJ90o{ zO(XTDUvqk(Y*(Mrb$Dk=!Oc0HP+<&B)GO*DNrpnk;1IE!X2zz0l*CE;dbIEYDBm(9 zw+74@-UQMp=k1SSr?{fg>Bd6d$)QEpNDsHB9+=QIsT@=6lU#%Wif*V_d{SGpTH{aA zql*m{qmT|3<4vC1=za%$Y?Cv_z6`$D1cQ)#d#{deQMgO;K8B3Dyny?YJPkH{K|+ZB zjrY|75?`<;)JE_v>^yMd8vD4IkXPmV8r} z5Axmlb{FY#_{x>Tyqr}Q4LU6#ZMs3SH#4G9}?_N-r%i=fBUTK7Fn}tj4<$-RW2^fOa~{sCCPX z_qbM?{BQ)k!&tdpS9`=KICyz=k%?X1@F1e9yDX@PTXTyv$Ujo!!%p|-A-&B02t;Nz z?nbvBR)W@W8i=0a;!g#7=rF%TGC>eos62D{mR}nPN=}nx;u?yzeFziT56{SMFAvT5 z|I2)+yZ7j;I`ci>HLeAI$s~Um9QO=O()GSsZir@hZ>*yCyZ);kzm4^)XVNh%;IC?LGu2ji}k*`{U! z)wG}diWjTb;z=txdobhUWAI{!EH5D(4EU_zlT{QWbae$aQaZZ>f2uU zQ}$l^C3KdJ$0x3wNkAiR9brtr1B&Z8d{tqWX|q1-+o0s* z71?nVWTH}ax1(@3ZREqfn)(Wo&lE%s^{VQ-=ve`{HwCis0?X(s?nz1`8;`t6d0|r>K5!>D1XG#9b%WW(X4xED(~cjcIW$4Hr}_XBS9<5 zJDZ=m{=T}*n_%j0^!_7{Lm16%+7(Gr1rjfQ!9^jm$ zph|N<1;Z3tVTgW!X1f@VCFxFZ=l<>=ZK__Abrcd_H`AdA-Ht-oB;;s^$oRBVNj1HO zuc*mZM454BIKIl;%V#*&bpV&u>)Re9dl?3_^+=&*x$n7k8cjkOjuJP_6_#aGM?6jA z<9(O9e6t4BDXUr^$Rx7?B0}^rWR&#hogL+9-z%3Y3Uc1fqqt@Owja9D-_`t#;&jy` zbB?{#d6+}rL6n(h*l_Pq(Xn`qv$=~e713h*B^>hZ+QPX99c*L>8^+waa74*tqXG){ zk3>EaCyi~H4VFOhDy@)ZI6qptj%ssGFQoLdqMPNp$^7ZEX**BRY!8p?`giUY-)byk z&)o9W&94>v*4s3i<_3W5-cwLyRW9Bi#9bf#+-Q|^;jt~^fR&D0l(&S+0eM(4jb+;( zRdAk#8s#xyi*G0Kv%o8JuVzj?HuO%A^R)BW4Yc+Y=Ba$@FMcJv?N+updh#kHw&CN$ z@$pZJ4AZ&TV%!?9?M*CzH18^^+T|0UB9gVz^oe%WvxFQ?FN9Xq{yVja%QBi64tXc1 zr(Ju+u4<#K1F0O9A&D_EE(t6(3!-;hYmx-oZ%-ZHVEwt+X~_%&%u{+$2KJ zqG0yX&ta`hZAU>2X@1K{PE0O&r883bkdFfc9W{lCU3Xxat*e-np5$51<5A__VGq!* z7I&38bpy=1imbLQunn$jWO$W*KRGr;;O42z1=7sMR1T5IO{Q{tSa*Nx*Y z0vv@-NiXpwcCUYooRLRYTc^5j<7m?Y|LMVSqES;y_%lS0>`#@5FH?Y>tE~>q)eSCe z>k6|cl%TV&sdlIceE`-lzD;p6^5I3RsLb1q-5XD%U2AXK>hoy&O4J3q2FG|$&;AfP z8Gfu?J>6Cj!e&&&l1<0h!vPRvDRIw8np%@ZC3ak!N;HfKM_uknFn*Fkk%WBHndA&I z(pw_FT42tnp(PQ?RX7nvJu#=eklWhe;)EVkG}Z0uVG<73TKOJ68Ds`qt)T*;(^X>= zbe|?XbS%3gPf5uz@qFw);|-sK>+fdFWk>ntP_><*!O@dRQcYN`m^XFK`4XGu0Rbw8 z(YrFa-`5QvLD@bqYb=p#n3Fx^mM6bCVYQ=K~#61kN=a~a$1`Kc=!CB_r&YG zR5EYwxAy987A)q87uXU(?)G65nvk4FG(W^M zo|5%AIlq4NuFM}YL%U6F+XlV+L_3BwCS^vd2N=*4Zft?}25!ToT5M9rh}`=WQFq_; zuub@lA+`e$v#de`5YOh#=;A~-M;YV2AbZBLmxk-l_wR+N3m(j^nLeUVDe0}p97cDM zlx5ux(z{_iw`^ygHF-gSdgRFmnem$anS0PKrCMu64US$Oi zkwJ@wjp8IRuQ$)#(UvWRM~8NhddvQZjm$qqU0PwI1GiO^k*d>7eE-MK7%h+JL-4yW zLP8J}B4T>LFTZ=%@Dcp(zvGPi{XOukNh0{cUtq@n>VLb#-{(2oH4`k=FI>Ub8-AX% z2J83WvrP-3vHk^p{95tBzuN5IL(i%#p{w}?I=t@w>%afe 1200].copy() +PSinfeq1200.rename(columns={'bilan':'max_alti'}, inplace=True) +PSsup1200.rename(columns={'bilan':'max_alti'}, inplace=True) +PSinfeq1200.to_postgis( + name='cr_PS_CBNA_habitats_inf-eq_1200m', + con=con, + schema='pelouse_seche', + if_exists='replace', + index=False, + index_label='id', + geom_col='geom' +) +PSsup1200.to_postgis( + name='cr_PS_CBNA_habitats_sup_1200m', + con=con, + schema='pelouse_seche', + if_exists='replace', + index=False, + index_label='id', + geom_col='geom' +) diff --git a/get_obs_znieff.py b/get_obs_znieff.py new file mode 100644 index 0000000..fcaa615 --- /dev/null +++ b/get_obs_znieff.py @@ -0,0 +1,118 @@ +from pycen import con_gn, con_sicen +import geopandas as gpd +from datetime import datetime as dt +import os + +cols_sicen_out = { + 'id_origine':'id_origine', + '':'organisme_gestionnaire', + 'cd_nom':'cd_nom', + 'nom_complet':'info_nomcomplet', + 'nom_vern':'info_nomvern', + 'date_debut_obs':'date_debut_obs', + 'date_fin_obs':'date_fin_obs', + 'date_obs':'date_obs', + 'date_textuelle':'date_textuelle', + 'effectif':'effectif', + 'effectif_min':'effectif_min', + 'effectif_max':'effectif_max', + 'effectif_textuel':'effectif_textuel', + 'remarque_obs':'remarque_obs', + 'localisation':'remarque_loc', + 'determination':'determination', + 'type_effectif':'type_effectif', + 'phenologie_age':'phenologie_age', + 'observateur':'observateur', + 'structure':'structure', + 'etude':'nom_etude', + 'protocole':'nom_protocole', + '':'id_mnhn', + '':'info_nomznieff', + '':'statut', +} +cols_geom_out = 'geometrie' + +def get_zonage(type_code): + sql = ''' + SELECT a.* FROM ref_geo.l_areas a + JOIN ref_geo.bib_areas_types b USING (id_type) + WHERE b.type_code = 'DEP' AND a.area_code = '38' + ''' + return gpd.read_postgis(sql,con_gn) + + +def get_obs_gn(date_min = None): + from shapely.geometry import Point + sql = ''' + SELECT * FROM gn_synthese.v_synthese_for_export + ''' + if date_min: + sql += " WHERE date_debut > '{date_min}'".format(date_min=date_min) + res = gpd.pd.read_sql_query(sql,con_gn) + XY = tuple(zip(res.x_centroid_4326,res.y_centroid_4326)) + res['geom'] = [Point(xy) for xy in XY] + + return res.set_geometry('geom',crs=4326)\ + .to_crs(2154) + + +def get_obs_sicen(date_min = None,intersects=False): + sql = ''' + SELECT v.* FROM saisie.v_saisie_observation v + JOIN saisie.suivi_saisie_observation s USING (id_obs) + ''' + if date_min or intersects: + sql += ' WHERE ' + if date_min: + dtm = """ + v.date_obs > '{date_min}' + -- OR s.date_operation > '{date_min}') AND s.operation = 'INSERT' + """.format( + date_min=date_min + ) + sql += dtm + if intersects : + '' + + sql += ';' + return gpd.read_postgis( + sql,con_sicen,geom_col='geometrie',#parse_dates=['date_obs','date_debut_obs','date_fin_obs'], + )\ + .rename_geometry('geom') + +if __name__ == "__main__": + + today = dt.today().date().strftime('%Y%m%d') + path = '/home/colas/Documents/tmp' + file_name = os.path.join(path,'export_znieff1_cen38_%s.gpkg'%today) + + date_min = '2021-12-31' # Extraction des données observées ou intégrées après cette date. + date_max = '2023-01-01' # Extraction des données observées avant cette date. + + zon = get_zonage('ZNIEFF1').unary_union + gn = get_obs_gn(date_min=date_min) + sicen = get_obs_sicen(date_min=date_min) + sicen['type_effectif'] = sicen['strate_flore'] + sicen['age_faune'] + sicen['phenologie_age'] = sicen['phenologie_flore'] + sicen['sexe_faune'] + + gn.date_debut = gpd.pd.to_datetime(gn.date_debut) + sicen.date_obs = gpd.pd.to_datetime(sicen.date_obs) + + obs_gn = gn[gn.date_debut < dt.strptime(date_max,'%Y-%m-%d')] + obs_sicen = sicen[sicen.date_obs < dt.strptime(date_max,'%Y-%m-%d')] + + gn_date = obs_gn.columns[obs_gn.columns.str.contains('date_|time|heure|uuid')] + sicen_date = obs_sicen.columns[obs_sicen.columns.str.contains('date_|time|heure')] + + obs_gn[gn_date] = obs_gn[gn_date].astype(str) + obs_sicen[sicen_date] = obs_sicen[sicen_date].astype(str) + + obs_sicen.rename(columns=cols_sicen_out,inplace=True) + obs_sicen.rename_geometry(cols_geom_out,inplace=True) + keep_col = cols_sicen_out.values() + si_cols = obs_sicen.columns[obs_sicen.columns.isin([*keep_col,cols_geom_out])] + + obs_sicen.loc[obs_sicen.intersects(zon),si_cols].to_file(file_name,driver='GPKG',layer='sicen_obs') + obs_gn[obs_gn.intersects(zon)].to_file(file_name,driver='GPKG',layer='geonature_obs') + + \ No newline at end of file diff --git a/hydro_analyse.py b/hydro_analyse.py new file mode 100644 index 0000000..5ef63aa --- /dev/null +++ b/hydro_analyse.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +# from osgeo import gdal, gdalconst +# from osgeo_utils import gdal_polygonize +# import rasterio +# import numpy as np +# from pcraster import setclone,lddcreate,catchment,readmap,__version__,streamorder,spatial,threading,report +# from pcraster.multicore._operations import set_nr_worker_threads,nr_worker_threads + +reclassif_flux = { + 5:{'min':1,'max':1}, + 4:{'min':2,'max':2}, + 3:{'min':3,'max':3}, + 6:{'min':4,'max':4}, + 255:{'min':5,'max':5}, + 2:{'min':6,'max':6}, + 7:{'min':7,'max':7}, + 0:{'min':8,'max':8}, + 1:{'min':9,'max':9}, +} + +def createldd( + src_ds:str,ldd_file:str,outflowdepth:int=9999999,corevolume:int=9999999,corearea:int=9999999, + catchmentprecipitation:int=9999999,lddin=False,unitcell=False): + """ + Transformation du GeoTIFF au format PCRaster + par l'utilisation de la fonction gdal.Translate . + Calcul de la direction des flux. + Création d'un fichier .map représentant + la direction des flux. + + Parameters + ---------- + src_ds : Elevation (MNT/DEM) au format GeoTIFF. + ldd_file : Fichier de sortie de direction des flux + au format ``.map`` . + outflowdepth : spatial, non spatial scalar + corevolume : spatial, non spatial scalar + corearea : spatial, non spatial scalar + catchmentprecipitation : spatial, non spatial scalar + lddin : spatial, non spatial scalar + unitcell : spatial, non spatial scalar + + See Also + -------- + https://pcraster.geo.uu.nl/pcraster/4.4.0/documentation/pcraster_manual/sphinx/op_lddcreate.html#index-0 + """ + from pcraster import lddcreate,report,setglobaloption,setclone + from osgeo import gdalconst,gdal + from osgeo_utils import gdal_calc + from datetime import datetime as dt + from os import sched_getaffinity,listdir,path + from pcraster.multicore._operations import set_nr_worker_threads,nr_worker_threads + + def _lddcreate(dst_file,outflowdepth,corevolume,corearea,catchmentprecipitation): + yield lddcreate(dst_file,outflowdepth,corevolume,corearea,catchmentprecipitation) + + cpu_dipo = len(sched_getaffinity(0))-1 + # if nr_worker_threads() != cpu_dipo: + # set_nr_worker_threads(cpu_dipo) + + timeinit = dt.now() + print(timeinit) + + OUTPATH = ldd_file.rsplit('/',1)[0] + pcr_temp = 'flow_direction_'+ldd_file.rsplit('/',1)[1] + dst_file = path.join(OUTPATH,pcr_temp) + + if pcr_temp in listdir(OUTPATH): + print(" PCRaster format already exist.") + else: + # Convertir GeoTIFF au format PCRaster + print(" INIT gdal.Translate to format PCRaster.") + src_ds = gdal.Open(raster50) + gdal.Translate(dst_file, src_ds, format='PCRaster', outputType=gdalconst.GDT_Float32, + metadataOptions="VS_SCALAR",callback=gdal.TermProgress) + print(" END gdal.Translate to format PCRaster :",dt.now()-timeinit) + # gdal_calc.py -A input_raster.tif --calc="(A==max(A.flatten()))*1" --outfile=max_pixel.tif --overwrite + + if ldd_file.rsplit('.',1)[1] != 'map': + ldd_file += '.map' + # Calcule de la direction du flux + setglobaloption("lddin") if lddin else setglobaloption("lddout") + setglobaloption("unitcell") if unitcell else setglobaloption("unittrue") + setclone(dst_file) + print(" INIT create local drain direction map with flow directions.") + ldd = _lddcreate(dst_file, + outflowdepth, + corevolume, + corearea, + catchmentprecipitation) + + # Save result + try : + report(next(ldd),ldd_file) + print(" END create local drain direction map with flow directions :",dt.now()-timeinit) + print(" END :",dt.now()-timeinit) + return ldd + except Exception as e: + print(" END :",dt.now()-timeinit) + print(e) + return ldd + + + +if __name__ == "__main__": + + # if nr_worker_threads != 7: + # set_nr_worker_threads(7) + + PATH = "/media/colas/SRV/FICHIERS/SITES/DISTRICTS NATURELS/BASSE VALLEE DE L'ISERE/SONE/TUFI_Sone-à-Soi/SIG/" + raster = PATH+"MNT_1m.tif" + dst_filename = '/home/colas/Documents/tmp/OUTPT.map' + xyz_filename = '/home/colas/Documents/tmp/OUTPT.xyz' + ldd_filename = '/home/colas/Documents/tmp/ldd.map' + strahler_file = '/home/colas/Documents/tmp/strahler.map' + + localPATH = "/home/colas/Documents/tmp/LOUIS/sone_tuff/" + raster50 = PATH+"MNT_50cm.tif" + dst_file50 = localPATH+'ldd_50cm.map' + createldd( + src_ds = raster50, + ldd_file = dst_file50 + ) + + + + # # src = rasterio.open(raster) + # src_ds = gdal.Open(raster50) + # srcband = src_ds.GetRasterBand(1) + # # srcdata = srcband.ReadAsArray() + # stats = srcband.GetStatistics(True, True) + # min_alti = srcband.ComputeRasterMinMax()[0] + # print("[ STATS ] = Minimum=%i, Maximum=%i, Mean=%i, StdDev=%i"%(stats[0], stats[1], stats[2], stats[3])) + + # # Calcule du pixel ayant la plus basse altitude + # # gdal_calc.Calc(A=dst_file,calc="A==min(A.flatten())",outfile=OUTPATH+'/min_pixel.tif',overwrite=True) + # gdal_calc.Calc(A=dst_file,calc="(A==numpy.min(A.flatten()))*1",outfile=OUTPATH+'/min_pixel.tif',overwrite=True) + + + + + + + # # Convertir GeoTIFF au format PCRaster + # gdal.Translate(dst_file50, src_ds, format='PCRaster', outputType=gdalconst.GDT_Float32, + # metadataOptions="VS_SCALAR") + # # gdal.Translate(xyz_filename, src_ds, format='XYZ') + # # gdal.Translate(xyz_filename, src_ds, format='XYZ') + # # gdal.Info(src_ds) + # # gdal.Info(raster) + + # setclone(dst_filename) + # Dem = readmap(dst_filename) + # # Calculer la direction du flux + # ldd = lddcreate(Dem, 9999999,9999999,9999999,9999999) + # ldd = lddcreate(dst_filename, 1e31, 1e31, 1e31, 1e31) # 4h + # ldd = readmap(ldd_filename) + + # # Détermination des flux / Calculer les commandes Strahler + # # Plus l'ordre est élevé, plus le flux est important. + # strahler = streamorder(ldd) + # report(strahler,strahler_file) # save result + + # # Calculer le réseau de canaux + # from pcraster import spatial,boolean,ordinal + # input_nonspatial = 8 + # setclone(strahler_file) + # SpatialResultB = spatial(boolean(input_nonspatial)) + # SpatialResultO = spatial(ordinal(input_nonspatial)) + # strahler = readmap(strahler_file) + # ResultComparison = strahler >= SpatialResultO + + + + # # Définir le point de sortie + # points = '' + # catchm = catchment(ldd, points) + + + # # processing.run("native:reclassifybytable", {'INPUT_RASTER':'/home/colas/Documents/tmp/ldd.map','RASTER_BAND':1,'TABLE':['1','1','5','2','2','4','3','3','3','4','4','6','5','5','255','6','6','2','7','7','7','8','8','0','9','9','1'],'NO_DATA':-9999,'RANGE_BOUNDARIES':2,'NODATA_FOR_MISSING':False,'DATA_TYPE':5,'OUTPUT':'TEMPORARY_OUTPUT'}) + + # # processing.run("pcraster:spatial", {'INPUT':8,'INPUT1':0,'INPUT2':'/home/colas/Documents/tmp/ldd.map','OUTPUT':'TEMPORARY_OUTPUT'}) + + # # qgis_process run pcraster:spatial --distance_units=meters --area_units=m2 --ellipsoid=EPSG:7019 --INPUT=8 --INPUT1=0 --INPUT2=/home/colas/Documents/tmp/ldd.map --OUTPUT=TEMPORARY_OUTPUT + + # # qgis_process run pcraster:comparisonoperators --distance_units=meters --area_units=m2 --ellipsoid=EPSG:7019 --INPUT='%3Fcrs%3DEPSG%3A2154%26extent%3D868999.5%2C6447000.5%2C885999.5%2C6467000.5%26width%3D17000%26height%3D20000%26formula%3D%2522strahler%401%2522%2520%253E%253D5%2520%26strahler%3Auri%3D%2Fhome%2Fcolas%2FDocuments%2Ftmp%2Fstrahler.map%26strahler%3Aprovider%3Dgdal' --INPUT1=0 --INPUT2='%3Fcrs%3DEPSG%3A2154%26extent%3D868999.5%2C6447000.5%2C885999.5%2C6467000.5%26width%3D17000%26height%3D20000%26formula%3D%2522strahler%401%2522%2520%253E%253D5%2520%26strahler%3Auri%3D%2Fhome%2Fcolas%2FDocuments%2Ftmp%2Fstrahler.map%26strahler%3Aprovider%3Dgdal' --OUTPUT=TEMPORARY_OUTPUT + + # # processing.run("pcraster:comparisonoperators", {'INPUT':'%3Fcrs%3DEPSG%3A2154%26extent%3D868999.5%2C6447000.5%2C885999.5%2C6467000.5%26width%3D17000%26height%3D20000%26formula%3D%2522strahler%401%2522%2520%253E%253D5%2520%26strahler%3Auri%3D%2Fhome%2Fcolas%2FDocuments%2Ftmp%2Fstrahler.map%26strahler%3Aprovider%3Dgdal','INPUT1':0,'INPUT2':'%3Fcrs%3DEPSG%3A2154%26extent%3D868999.5%2C6447000.5%2C885999.5%2C6467000.5%26width%3D17000%26height%3D20000%26formula%3D%2522strahler%401%2522%2520%253E%253D5%2520%26strahler%3Auri%3D%2Fhome%2Fcolas%2FDocuments%2Ftmp%2Fstrahler.map%26strahler%3Aprovider%3Dgdal','OUTPUT':'TEMPORARY_OUTPUT'}) + + # # processing.run("pcraster:col2map", {'INPUT':'/home/colas/Documents/tmp/bilan_sites.csv','INPUT1':"/media/colas/SRV/FICHIERS/SITES/DISTRICTS NATURELS/BASSE VALLEE DE L'ISERE/SONE/TUFI_Sone-à-Soi/SIG/MNT_1m.tif",'INPUT2':0,'OUTPUT':'TEMPORARY_OUTPUT'}) \ No newline at end of file diff --git a/intersection.py b/intersection.py new file mode 100644 index 0000000..4d09d52 --- /dev/null +++ b/intersection.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +#Nom : view_data.py +#Description : Test de connection en base et d'accès aux données +#Copyright : 2021, CEN 38 +#Auteur : Colas Geier + + +import geopandas as gpd + + +PATHIN = '/home/colas/Documents/5_BDD' +coucheA = '/ZONES_HUMIDES/TronconHydrographique/TronconHydrographique_FXX.shp' +coucheB = '/1_QGIS/dept_isere.shp' +coucheSortie = 'TronconHydrographique_Isere2' + +A = gpd.read_file(PATHIN + coucheA) +B = gpd.read_file(PATHIN + coucheB) + + +C = gpd.sjoin(A, B, op='intersects') +C.to_file(PATHIN + 'tmp/{0}.shp' %(model,dateEch), driver='ESRI Shapefile', layer='{0}'.format(coucheSortie)) \ No newline at end of file diff --git a/maj_site.py b/maj_site.py new file mode 100644 index 0000000..5894291 --- /dev/null +++ b/maj_site.py @@ -0,0 +1,57 @@ +from pycen import con_bdcen,con_fon,update_to_sql +from pycen.tools import Polygons_to_MultiPolygon +import geopandas as gpd + +PATHIN = '/media/colas/SRV/FICHIERS/OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/SITES/LEZE/PG2022/' +FILEIN = 'LEZE_zonage_2022.shp' +SITE_NAME = 'LEZE' +SITE_TYPE = '' +site = gpd.read_file(PATHIN+FILEIN) +tab = 'sites' +sch = 'sites' +key = 'site_id' +con = con_fon +geom_name = 'geom_ecolo' + +if FILEIN == 'LEZE_zonage_2022.shp': + site = gpd.GeoDataFrame( + data={key:SITE_NAME,geom_name:gpd.GeoSeries(site.unary_union)},geometry=geom_name,crs=2154 + ) + +if site.geometry.name != geom_name: + site.rename_geometry(geom_name, inplace=True) + +if 'Polygon' in site.geom_type.unique(): + site = Polygons_to_MultiPolygon(site,geom_name) + +# if SITE_TYPE == 'ZO' and con.url.database == 'bd_cen': +# zi = gpd.read_postgis("SELECT * FROM sites.sites WHERE site_id = '%s_ZI'"%SITE_NAME,con) +# site = site.overlay(zi, how='difference') + # site.rename_geometry(geom_name, inplace=True) + + +if con.url.database == 'bd_cen': + if SITE_TYPE != '': + site[key] = '_'.join([SITE_NAME,SITE_TYPE]) + else : + site[key] = SITE_NAME + update_to_sql( + df= site[[key,geom_name]], + con=con, + table_name=tab, + schema_name=sch, + key_name=key + ) +elif con.url.database == 'bd-cen-38': + from datetime import datetime as dt + site[key] = SITE_NAME + site['type_zonage'] = SITE_TYPE + site['date_maj'] = dt.now().date().isoformat() + update_to_sql( + df= site[[key,'type_zonage','date_maj',geom_name]], + con=con, + table_name=tab, + schema_name=sch, + key_name=[key,'type_zonage'] + ) + diff --git a/multi_auteurTOrelation_table.py b/multi_auteurTOrelation_table.py new file mode 100644 index 0000000..9233c77 --- /dev/null +++ b/multi_auteurTOrelation_table.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : multi_auteurTOrelation_table.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + +import pycen +import pandas as pd +import geopandas as gpd +from sqlalchemy import create_engine +from geoalchemy2 import Geometry + +# Parametres bdd OUT +user = 'colas_g' +pwd = 'adm1n*38*' +adr = '192.168.60.10' +base = 'bd_cen' +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False) + +aut = pycen.sites().auteur +aut.loc[aut.prenom.isna(),'auteur'] = aut.nom +aut.loc[~aut.prenom.isna(),'auteur'] = aut.nom + ' ' + aut.prenom + + +df = pycen.sites().get_sitesInfos() +df_aut = df[['id', 'auteur']] +df_aut.set_index('id', inplace=True) +df_aut = df_aut.auteur.str.split(' \& ',expand=True) + +df_rSitAut = pd.DataFrame(columns=['auteur']) +for col in df_aut.columns: + tmp = pd.DataFrame(data=df_aut.loc[~df_aut[col].isna(),col], copy=True) + tmp.columns = ['auteur'] + df_rSitAut = pd.concat([df_rSitAut, tmp]) + +df_rSitAut.auteur.replace(aut.auteur.tolist(),aut.id.tolist(), inplace=True) +df_rSitAut.columns = ['id_auteur'] +df_rSitAut.index.name = 'id_site' +df_rSitAut.reset_index(inplace=True) +df_rSitAut.to_sql( + name='r_sites_auteur', + con = con, + schema='sites', + index=False, + if_exists='append', +) + + + +df = pycen.sites().get_sitesGeom() +df_aut = df[['id', 'auteur']] +df_aut.set_index('id', inplace=True) +df_aut = df_aut.auteur.str.split(' \& ',expand=True) + +df_rSitAut = pd.DataFrame(columns=['auteur']) +for col in df_aut.columns: + tmp = pd.DataFrame(data=df_aut.loc[~df_aut[col].isna(),col], copy=True) + tmp.columns = ['auteur'] + df_rSitAut = pd.concat([df_rSitAut, tmp]) + +df_rSitAut.auteur.replace(aut.auteur.tolist(),aut.id.tolist(), inplace=True) +df_rSitAut.columns = ['id_auteur'] +df_rSitAut.index.name = 'id_geom_site' +df_rSitAut.reset_index(inplace=True) +df_rSitAut.to_sql( + name='r_geomsites_auteur', + con = con, + schema='sites', + index=False, + if_exists='append', +) diff --git a/raster2pgsql_multithread.py b/raster2pgsql_multithread.py new file mode 100644 index 0000000..9203906 --- /dev/null +++ b/raster2pgsql_multithread.py @@ -0,0 +1,59 @@ +import logging +import threading +import time +from subprocess import run +from os import listdir,getcwd,chdir,system + + +def thread_function(name): + logging.info("Thread %s: starting", name) + time.sleep(2) + if not isinstance(name,list) : name = [name] + cmd = 'export PGPASSWORD=#CEN38@venir;raster2pgsql -s 2154 -a -t 5x5 {} ref_territoire.mnt_5m | psql -h 91.134.194.221 -U cen_admin -d azalee'.format(" ".join(name)) + system(cmd) + logging.info("Thread %s: finishing", name) + +def last_thread_function(name): + logging.info("Thread %s: starting", name) + time.sleep(2) + if not isinstance(name,list) : name = [name] + cmd = 'export PGPASSWORD=#CEN38@venir;raster2pgsql -s 2154 -a -C -I -M -t 5x5 {} ref_territoire.mnt_5m | psql -h 91.134.194.221 -U cen_admin -d azalee'.format(" ".join(name)) + system(cmd) + logging.info("Thread %s: finishing", name) + + +if __name__ == "__main__": + + format = "%(asctime)s: %(message)s" + logging.basicConfig(format=format, level=logging.INFO, + datefmt="%H:%M:%S") + # reste 38 + workspace = r'/home/colas/Documents/8_SIG/MNT/IGN - RGE Alti 5M/38' + chdir(workspace) + list_f = listdir() + list_asc = ["/vsizip/"+f+"/"+f.split('.')[0]+"_MNT_LAMB93_IGN69.asc" for f in list_f] + + # Création d'une séquence au pas de 70 + seq = list(range(0,len(list_asc),70)) + end_seq = len(list_asc) + + for i,j in enumerate(seq): + k = end_seq if i == len(seq)-1 else seq[i+1] + print(len(list_asc[j:k])) + + threads = list() + for file in list_asc[j:k]: + logging.info("Main : create and start thread %s.", file) + if file == list_asc[-1]: + x = threading.Thread(target=last_thread_function, args=(file,)) + else: + x = threading.Thread(target=thread_function, args=(file,)) + threads.append(x) + x.start() + # [t.start() for t in threads] + [t.join() for t in threads] + + # for file, thread in enumerate(threads): + # logging.info("Main : before joining thread %s.", file) + # thread.join() + # logging.info("Main : thread %s done", file) diff --git a/search_col.py b/search_col.py new file mode 100644 index 0000000..8027a9f --- /dev/null +++ b/search_col.py @@ -0,0 +1,31 @@ +from pycen import con_gn as con +import pandas as pd +from sqlalchemy import create_engine + +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '91.134.194.221' +base = 'cadastre' +con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False) + +lst_sch = con.dialect.get_schema_names(con) + +for s in lst_sch: + lst_tab = con.dialect.get_table_names(con,s) + for t in lst_tab: + lst_col = con.dialect.get_columns(con,t,s) + lst_col = [x['name'] for x in lst_col ] + if 'dnulot' in lst_col: + sql = '''SELECT * FROM "%s".%s WHERE TRIM(dnulot) <> '' LIMIT 5;'''%(s,t) + res = pd.read_sql_query(sql = sql, con=con) + if not res.empty: + print('Schema : %s ; Table : %s ; nrows : %s'%(s,t,str(res.shape[0]))) + + +# before +obs_op = 880 +veille = 1202 + +# after +obs_op = 2082 +veille = 0 diff --git a/taxref.py b/taxref.py new file mode 100644 index 0000000..ae82748 --- /dev/null +++ b/taxref.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +#Nom : : taxref.py +#Description : +#Copyright : 2021, CEN38 +#Auteur : Colas Geier +#Version : 1.0 + + +import pandas as pd + +PATHIN = '~/Documents/5_BDD/TAXON/TAXREF/TAXREF_V14_2020/' +FILE = 'TAXREFv14.txt' +sep = '\t' +LB_NOM = 'Taraxacum bulbosum' + +df = pd.read_csv(PATHIN+FILE, sep = sep) + +# tar = df[df.LB_NOM.str.contains('Taraxacum')].iloc[:,:15] +# tarbulb = tar[tar.LB_NOM == 'Taraxacum bulbosum'] +# taxref = df[df.CD_NOM == tarbulb.CD_REF] + +# lst_taxon = df[df.LB_NOM.str.contains('Taraxacum') & df.RANG.str.fullmatch('ES')].iloc[:,:15].CD_NOM + + +esp2 = df[df.LB_NOM == LB_NOM] +df2 = df3 = esp3 = esp2 +rang3 = rang2 = esp2.RANG.values[0] + + +def get_taxonomie (taxon, taxref, detail = False): + # Définition du niveau de détail + if detail: + cd_taxsup = 'CD_SUP' + else: + cd_taxsup = 'CD_TAXSUP' + + esp = taxref[taxref.CD_NOM == taxon] + esp_nom = esp.LB_NOM.values[0] + + # Récupértation de l'espèce de référence + if esp.CD_NOM.equals(esp.CD_REF) : + print('OK') + espref = esp + else: + espref = taxref[taxref.CD_NOM == esp.CD_REF.values[0]] + + esptemp = esp + rang = esp.RANG.values[0] + + # Récupération de la taxonomie + while (rang != 'KD'): + reftemp = taxref[taxref.CD_NOM == esptemp.CD_REF.values[0]] + taxsup = taxref[taxref.CD_NOM == reftemp[cd_taxsup].values[0]] + esptemp = taxsup + rang = esptemp.RANG.values[0] + esp = esp.combine_first(taxsup) + + # Transpose les noms de rangs en ligne + tmp = esp[['RANG', 'LB_NOM']] + tmp.set_index('RANG', inplace=True) + tmp = tmp.T + tmp.columns.name = None + tmp.reset_index(drop=True, inplace=True) + id_cdnom = esp.columns.to_list().index('CD_NOM') + esp = esp[esp.LB_NOM == esp_nom].iloc[:,id_cdnom:] + esp.reset_index(drop=True, inplace=True) + tmp.join(esp, how='outer') + + + + + + + + +while (rang2 != 'KD'): + print(rang2) + espref = df[df.CD_NOM == esp2.CD_REF.values[0]] + taxsup = df[df.CD_NOM == espref.CD_TAXSUP.to_list()[0]] + esp2 = taxsup + rang2 = esp2.RANG.to_list()[0] + df2 = pd.concat([df2, taxsup]) + +df2.iloc[:,:15] + +while (rang3 != 'KD'): + print(rang3) + espref = df[df.CD_NOM == esp3.CD_REF.to_list()[0]] + taxsup = df[df.CD_NOM == espref.CD_SUP.to_list()[0]] + esp3 = taxsup + rang3 = esp3.RANG.to_list()[0] + df3 = pd.concat([df3, taxsup]) + +df3.iloc[:,:15] diff --git a/tmp_save/README.md b/tmp_save/README.md new file mode 100644 index 0000000..6460d8d --- /dev/null +++ b/tmp_save/README.md @@ -0,0 +1,58 @@ +# PGSZH +Script de calcul d'indices caratérisants les zones humides d'un territoire. + +## Install +pip install git+https://framagit.org/cen-colas/pycen.git + + ## Critères concidérés + +- Fonctions : + - Fonctions biologiques et écologiques : + - **znieff_1 :** Calcul la présence/absence des zones humides sur des ZNIEFF1 par intersection. (Présence 2 / Absence 0) + - **redi :** Calcul la capacité de déplacement de la faune du réseau écologique départemental de l'Isère (REDI) par intersection. Utilisation d'un buffer de 100m (Présence 2 / Absence 0) + - **fct_bio :** Dans le cas où un zone humide n'intersecte pas une ZNIEFF1, attribution d'un poids à chaque zone humide pour ses fonctions Biologiques et Ecologiques décritent dans les inventaires "zones humides". + (Fonction 1 / Multi-fonctions 2) + - **fct_hab :** Attribution d'un poids à chaque zone humide en fonction du type d'habitat décrit dans les inventaires "zones humides". ('prioritaire|très rare' 2) + - **fct_esppatri :** Attribution d'un poids à chaque zone humide en fonction des espèces protégées (DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE) décritent dans les inventaires "zones humides". (0 < nb esp < 3 : 0.5 / nb esp >= 3 : 1) + + + - Fonctions hydrologiques : + - **zone_inond :** Calcul la présence/absence des zones humides sur des zones inondables par intersection (Présence 1 / Absence 0). + - **eabf :** Calcul de l'espace alluvial de bon fonctionnement (EABF) + ou de fond de vallée par intersection (Présence 1 / Absence 0). + - **dist_reso_hydro :** Si la zone humide ne possède pas d'espace + alluviale de bon fonctionnement d'après la fonction "eabf()", calcul la distance au réseau hydrographique linéaire (le plus proche). Attribution d'un poids en fonction de la distance. Si la zone ne possède pas d'eabf et ne semble pas à proximité d'un réseau hydrique, recherche de la présence d'un cours d'eau dans la base de données zones humes. (_> 50m_ : 0 / _]10 m – 50 m]_ : 0.5 / _<= 10m_ : 1) + - **reghydro_out :** Pour chaque zone humide, en cas de distance + au réseau hydrographique linéaire > 50 et d'absence d'espace alluviale de bon fonctionnement, recherche dans la base de données des zones humides si une sortie d'eau "Cours d'eau"est définie. Attribution d'un poids en fonction (Si "Cours d'eau" : 1). + - **connex_molasse :** Attribution d'un poids à chaque zone humide en fonction de sa connexion avérée à la molasse ou non (Présence 1 / Absence 0). + - **idpr :** Calcul de l'Indice de Développement et de Persistance des Réseaux. Calcul réalisé dans le cas où connex_molasse = 0 (Si recouvrement > 25% : 1). + - **fct_hydro :** Attribution d'un poids à chaque zone humide en fonction du nombre de rôles hydro-biologiques à caractères hydrauliques et hydrologiques qu'elle remplie (Fonction 0.5 / Multi-fonctions 1). + - **zse_zsnea :** Attribution d'un poids à chaque zone humide en fonction de sont appartenance à une zone de sauvegarde exploitée actuelle (zse) ou future (zsnea) (Présence 1 / Absence 0). + + - Fonctions physiques et biochimiques : + - **perim_captage :** Identification de la présence/absence de zones de captages à proximité des zones humides par intersection (Si intersection : 2). + - **fct_hydrobio :** Attribution d'un poids à chaque zone humide en fonction du nombre de rôles hydro-biologiques à caractères physiques et biochimiques qu'elle remplie (Fonction 1 / Multi-fonctions 2). + - **occup_sol :** Pour chaque zone humide, identification de la nature d'occupation du sol et de sa surface de recouvrement global (). Déduction de la surface d'espace naturel concernée par les zonnages. Attribution d'un poids en fonction de la surface de recouverte (_[ 25% ; 50% [_ : 1 / _[ 50% ; 75% [_ : 1.5 / _[ 75% ; 100% ]_ : 2). + + - Criètes « multi-fonctions » : + - **surface :** Calcul de la surface totale des zones humides. Attribution d'un poid en fonction du résultat ( _[ 1ha ; 20ha [_ : 0.5 / _[ 20ha ; 100ha [_ : 1 / _>= 100ha_ : 1.5). + - **pente :** Calcul de la pente moyenne des zones humides via le MNT. Attribution d'un poid en fonction du résultat (moy(pente) < 5% : 1). + - **dir_exp :** Ajout d'un champ dir_exp dans le tableau de sortie qui sera à remplir manuellement par celui-ci. + +- Pressions : + - Directes : + - **artif_directe :** Récupération des résultats des pressions directes d'artificialisation Rhoméo I12. Application de la discrimination de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + - **urbani_directe :** Récupération des résultats des pressions directes d'urbanisation Rhoméo I12. Application de la discrimination de Jenks pour catégoriser les résultats en 4 classes [0, 0.5, 1, 1.5]. + - **pressAgri_directe :** Récupération des résultats des pressions directes agricoles Rhoméo I13. Application de la discrimination de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + - **projet_plu_U :** Intersections des zones relevant du projet d'Urbanisme (PLU) avec les polygones de l'étude. Considération du champs Typezone == 'U'. Attribution des points en cas d'intersections (Présence 1 / Absence 0). + - **conflit_redi :** Intersections des zones de conflits redi (Points, Lignes) avec les polygones de l'étude. Utilistaion d'un buffer de 100m (Présence 2 / Absence 0). + - **prelev_eau :** Identification da la proximité des zones humides avec des sources de captages. Application d'un buffer de 50m. Identification par intersection (Présence 1 / Absence 0). + - **icpe :** Identification da la proximité des zones humides avec des installations classés.Application d'un buffer de 500m. Identification par intersection (Présence 1 / Absence 0). + - **ouvrage :** Identification da la présence d'ouvrages et de dépôts au sein des zones humides. Identification par intersection (Présence 1 / Absence 0). + - **vulnerabilite :** Identification da la proximité des zones humides avec des espèces exotiques envahissantes. Application d'un buffer de 100m. Identification par intersection (Présence 1 / Absence 0). + - Indirectes : + - **artif_indir :** Récupération des résultats des pressions indirectes d'artificialisation Rhoméo I12. Application de la discrimination de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + - **urbani_indir :** Récupération des résultats des pressions indirectes d'urbanisation Rhoméo I12. Application de la discrimination de Jenks pour catégoriser les résultats en 4 classes [0, 0.5, 1, 1.5]. + - **pressAgri_indir :** Récupération des résultats des pressions indirectes agricoles Rhoméo I13. Application de la discrimination de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + - **projet_plu_AU :** Intersections des zones relevant du projet d'Urbanisme (PLU) avec les polygones de l'étude. Considération du champs Typezone == 'AU'. Attribution des points en cas d'intersections (Présence 1 / Absence 0). + - **projet_scot :** En cas d'absence de PLU, recherche d'espaces de développements potentiels au alentours des sites (SCOT). Attribution des points en cas d'intersections (Présence 1 / Absence 0). \ No newline at end of file diff --git a/tmp_save/agreg_zone.py b/tmp_save/agreg_zone.py new file mode 100644 index 0000000..44b1e8e --- /dev/null +++ b/tmp_save/agreg_zone.py @@ -0,0 +1,31 @@ +from os.path import join +from os import listdir +import geopandas as gpd + +SRV = '/media/colas/SRV/FICHIERS' +PATH = 'OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/ETUDES/PGSZH_Belledonne/Pressions/PLU' + +DICT_COLS = { + 'TYPESECT':'TYPEZONE' +} +KEEP_ZONE = ['U','AU','AUs','AUc'] + +DF = gpd.GeoDataFrame() +for f in listdir(join(SRV,PATH)): + + if f[-3:] != 'shp' : pass + else: + df = gpd.read_file(join(SRV,PATH,f)) + df.columns = df.columns.str.upper() + df.rename(columns=DICT_COLS, inplace=True) + DF['SOURCE'] = f + print(df.TYPEZONE.unique()) + tmp = df[df.TYPEZONE.isin(KEEP_ZONE)] + DF = gpd.pd.concat([DF,tmp]) + +DF.set_geometry('GEOMETRY',inplace=True) +DF.rename_geometry('geom', inplace=True) +DF.set_crs(2154,inplace=True) + +DF.to_file(join(SRV,PATH,'COUCHES_AGREGEES','FILTREZONE_URBA_U&AU.gpkg'),options={'OVERWRITE=YES'}) +DF.to_file(join('~/Documents/tmp','FILTREZONE_URBA_U&AU.gpkg')) \ No newline at end of file diff --git a/tmp_save/pgszh_Belledo.py b/tmp_save/pgszh_Belledo.py new file mode 100644 index 0000000..c505b39 --- /dev/null +++ b/tmp_save/pgszh_Belledo.py @@ -0,0 +1,2475 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# from binascii import Incomplete +from lib2to3.pgen2 import driver +from warnings import filterwarnings +import geopandas as gpd +# from shapely import geometry +from shapely.geometry import MultiPolygon, MultiLineString, MultiPoint, Polygon, LineString #, collection +from shapely import ops +import pycen +filterwarnings("ignore",category=RuntimeWarning) +zh = pycen.zh() + +# Chemin +path0 = '/home/colas/Documents/9_PROJETS/3_PGZH/' +PATH_OUT = path0 + 'RESULTATS/Résultats_etude_PGSZH_Belledonne.xlsx' + +Path0 = '/media/colas/SRV/FICHIERS/' +Path_tmp = 'OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/ETUDES/PGSZH_Belledonne/Fonctions/' +p_bio_eco = Path_tmp+'Biologique/' +p_hydro = Path_tmp+'Hydrologique/' +p_phybio = Path_tmp+'Physico-chimique/' +p_mltifct = Path_tmp+'multi_fonctions/IGN - BD Alti 25M/' +p_press = Path_tmp+'../Pressions/' +P_expert = path0 + 'DIRE_EXPERT/' +Path_alti = '/home/colas/Documents/8_SIG/MNT' +p_mltifct = '/IGN - BD Alti 25M/' + +# Couche des fonctions biologiques et écologiques +c_znieff = 'PGSZH_Bell_ZNIEFF-I.gpkg' +# c_zico = 'PGZSH_zico.gpkg' +c_redi = 'PGSZH_Bell_axe_faune_REDI.gpkg' + +# Couche des fonctions hydrauliques et hydrologiques +c_alea_inond = 'alea_inondation/utsg_gpu.gpkg' +c_ebf_crseau = ['Breda_EBF_Optimal.shp','Salin_EBF_Optimal_2021.shp','Sonnant_EBF_Optimal.shp'] +c_connex_molasse = 'ZH_CONNECT_ESO.shp' +c_idpr = 'BRGM_IDPR/IDPR_2154_CLIP.tif' +c_idpr2 = 'BRGM_IDPR/IDPR_2154_CLIP.gpkg' +c_piezo = 'Piezo_SAGE BDPV/carte_piézo_HE_2021.shp' +c_piezo_interp = 'Piezo_SAGE BDPV/piezo_interpoler.tif' +c_captage_ppi = [ + 'captage PPI/38350_INFO_SURF_20200219.shp', + 'captage PPI/38501_INFO_SURF_20200311.shp', + 'captage PPI/38181_PRESCRIPTION_SURF_20180220.shp', + 'captage PPI/38206_PRESCRIPTION_SURF_20200218.shp', + 'captage PPI/38314_PRESCRIPTION_SURF_20220922.shp', + 'captage PPI/38439_PRESCRIPTION_SURF_20180628.shp', + 'captage PPI/38567_PRESCRIPTION_SURF_20191125.shp' +] + +# Couche des fonctions physiques et biochimiques +c_artif = 'indicateurs rhomeo_pressions_vecteur/PRESSIONS_ARTIFICIALISATION_2020.shp' +c_captage_aep = [ + 'captage PPR/38303_INFO_SURF_20100308.shp', + 'captage PPR/38350_INFO_SURF_20200219.shp', + 'captage PPR/38501_INFO_SURF_20200311.shp', + 'captage PPR/38181_PRESCRIPTION_SURF_20180220.shp', + 'captage PPR/38206_PRESCRIPTION_SURF_20200218.shp', + 'captage PPR/38314_PRESCRIPTION_SURF_20220922.shp', + 'captage PPR/38439_PRESCRIPTION_SURF_20180628.shp', + 'captage PPR/38567_PRESCRIPTION_SURF_20191125.shp' +] +# c_smvic_PPR1 = 'Captage/smvic_PPR1_SMVI.shp' +# c_smvic_PPR2 = 'Captage/smvic_PPR2_SMVI.shp' +# c_smvic_PPi = 'Captage/smvic_PPi_SMVI.shp' +# c_smvic_PPe = 'Captage/smvic_PPe_SMVI.shp' +c_rpg = 'RPG/RPG_2017.shp' +c_zse = 'ZSE.shp' +c_zsea = 'ZSNEA.shp' +c_occupsol = 'PGSZH_oscom.gpkg' + +# Couche des critères « multi-fonctions » +c_alti = 'BDALTIV2_25M_FXX_' +c_mnt = 'BDALTIV2_25M.tif' + +# Couche des pressions +c_rhomeo = 'sig_indicateurs_2021_simby' +c_artif = 'indicateurs rhomeo_pressions_vecteur/PRESSIONS_ARTIFICIALISATION_2020.shp' +c_captag = 'PGSZH_Bell_OuvragePrel_sandre.gpkg' +c_agric = 'indicateurs rhomeo_pressions_vecteur/PRESSIONS_AGRICOLES_2019.shp' +c_urba_plu = 'PLU/COUCHES_AGREGEES/FILTREZONE_URBA_U&AU.gpkg' +# c_urba_scot = 'SYMBHI/SCOT/PGSZH_ref_scot_esp_pot_dev.shp' +c_iClass = 'PGSZH_Bell_ICPE.gpkg' +# c_Purb = 'tache_urbaine.shp' +c_lign_confliredi = 'PGSZH_Bell_conflits_ligne_REDI.gpkg' +c_poin_confliredi = 'PGSZH_Bell_conflits_point_REDI.gpkg' +c_ouvrag = 'PGSZH_Bell_ouvrages_symbhi.shp' +c_barage = 'PGSZH_Bell_ROE.shp' +# c_depot = 'PGSZH_Bell_ouvrages_symbhi.shp' +c_invas = 'SYMBHI/TerrainInvasives2020.tab' +c_fallo = 'SYMBHI/fallopia.TAB' +c_cd38_eee = 'CD38/PGSZH_EEE_CDIsere.gpkg' +# c_vulner = 'SAGE/VULNERABILITE.shp' + +# Couche des dire d'expert +c_expert = 'PGSZH_dire_expert_compilation_17032022.csv' + + +class check_dtypeInList(list): + def __contains__(self, typ): + return any(isinstance(val, typ) for val in self) + +# get_flux +def open_gpkg(Path0, layer=None,bbox=None): + ''' + Ouverture des couches Shapefile et Geopackages et + mise au formt du script: + + Parameters + ---------- + Path0 : str. Chemain/fichier. + layer : str. Si Geopackage, nom de la couche dans le + cas où il y en a plusieurs. + ''' + df = gpd.read_file(Path0,layer=layer, bbox=bbox) + if df.geometry.name != 'geom': + df.rename_geometry('geom',inplace=True) + if 'Lambert' in df.crs.name \ + and '93' in df.crs.name \ + and df.crs.srs.upper() != 'EPSG:2154': + print('Projection : %s - %s'%(df.crs.name,df.crs.srs[:20]) ) + print('Modification de la projection ...') + df.to_crs(epsg=2154,inplace=True) + if df.crs.srs.upper() != 'EPSG:2154': + print('Projection : %s - %s'%(df.crs.name,df.crs.srs[:20]) ) + print('Modification de la projection ...') + df.to_crs(epsg=2154,inplace=True) + + return df + + +def to_geoms(geometries): + for geometry in geometries: + if isinstance(geometry, (Polygon,LineString)): + yield geometry + else: + yield from geometry + + +def _union_polygons_geometry(df): + ''' + Transforme un GeoDataFrame de Polygons + et/ou MultiPolygons en un MultiPolygon unique: + + Parameters + ---------- + df : GeoDataFrame. + ''' + df = df.copy() + name_geom = df.geometry.name + + # poly = df[df.geom_type=='Polygon'][name_geom] + poly = df.loc[df.geom_type=='Polygon',name_geom] + multipoly = df.loc[df.geom_type=='MultiPolygon',name_geom] + poly = [*poly] + multipoly = [*multipoly] + + if poly: + mp2 = MultiPolygon(poly) + if poly and multipoly: + res = MultiPolygon(to_geoms([*mp2, *multipoly])) + elif not poly and multipoly: + res = MultiPolygon(to_geoms(multipoly)) + elif not multipoly and poly: + res = MultiPolygon(poly) + + return res + + +def _union_lines_geometry(df): + name_geom = df.geometry.name + + line = df.loc[df.geom_type=='LineString',name_geom].tolist() + multiline = df.loc[df.geom_type=='MultiLineString',name_geom].tolist() + + if line: + mp2 = MultiLineString(line) + if line and multiline: + res = MultiLineString(to_geoms([*mp2, *multiline])) + elif not line and multiline: + res = MultiLineString(to_geoms([*multiline])) + elif not multiline and line: + res = MultiLineString(line) + + return res + + +def _calc_recouvrmt(df1,df2): + ''' + Calcule le recouvrement de df2 sur df1 + pour chaque géométrie de df1: + + Parameters + ---------- + df1 : GeoDataFrame. + df2 : GeoDataFrame. + ''' + tmp = gpd.sjoin( + df1, + df2[['geom']], + predicate = 'intersects', + how = 'left') + tmp.dropna(subset=['index_right'],inplace=True) + tmp.index_right = tmp.index_right.astype(int) + tmp.reset_index(inplace=True) + tmp = tmp.join( + df2[['geom']].rename(columns={'geom': 'right_geom'}), + on=['index_right'], how='left') + tmp2 = tmp[['index_right','right_geom']].copy() \ + .rename(columns={'right_geom': 'geom'}) \ + .set_geometry('geom') + tmp1 = tmp[['id_site','geom']].copy() \ + .set_geometry('geom') + + if not tmp1.geom.values.is_valid.all(): + tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) + if not tmp2.geom.values.is_valid.all(): + tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) + + tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 + tmp = tmp.groupby(['id_site']).sum().reset_index() + df1 = df1.merge(tmp[['id_site','perc_rcvmt']], on=['id_site'], how='left') + df1.perc_rcvmt.fillna(0, inplace=True) + df1.perc_rcvmt = df1.perc_rcvmt.round(2) + + return df1 + + +def jenks(data,col,labels): + import jenkspy + data = data.copy() + c = col + tmp = data[c].unique() + tmp = gpd.pd.DataFrame({'val':tmp}) + ddf = gpd.pd.DataFrame() + # Si str in labels + labs_copy = None + if str in check_dtypeInList(labels): + labs_copy = labels.copy() + labels = range(len(labels)) + labels = list(labels) + + + + tmp['jenks'] = gpd.pd.cut(tmp['val'], + bins=jenkspy.jenks_breaks(tmp['val'], n_classes=len(labels)), + # bins=list(set(jenkspy.jenks_breaks(tmp['val'], n_classes=len(labels)))), + labels=labels, + include_lowest=False) + ddf[c] = data[c].copy() + ddf[c] = ddf[c].replace([*tmp.val],[*tmp.jenks]) + ddf[c+'1'] = data[c] + # ddf[c+'1'] = ddf[c+'1'].replace([*tmp.val],[*tmp.jenks]) + # ddf[c] = ddf[c].astype(float) + + if labs_copy: + ddf[c] = ddf[c].replace([*labels],[*labs_copy]) + # ddf[c+'1'] = ddf[c+'1'].replace([*labels],[*labs_copy]) + + + return ddf[c] + + +def get_rhomeo_indicateur(table,cols): + return gpd.pd.read_sql_table(table,pycen.con,'zh_analyse',columns=cols) + + +def intersects_rpg(df,code_cultu:list=None): + + # RPG + from pycen.ref import territoire as ter + # rpg = open_gpkg(Path0+p_phybio+c_rpg, bbox=df) + + lst_code = ['BOP','SPH','SPL'] + + if code_cultu: + code_cultu = {'code_cultu':code_cultu} + elif lst_code: + code_cultu = {'code_cultu':lst_code} + + rpg = ter.rpg2021_dep_parc(args=code_cultu,bbox=df.unary_union) + q = df.intersects(rpg.unary_union) + df['rpg'] = q.astype(int) + + return df + + +class fct_bio_eco: + ''' + Calcule l'indice et les sous-indices des fonctions biologiques + et écologiques des zones humides: + + Liste des fonctions + ---------- + znieff_1 : Calcul la présence/absence + des zones humides sur des ZNIEFF1 par intersection. + redi : Calcul la capacité de déplacement de la faune + du réseau écologique départemental de l'Isère (REDI) + par intersection. Utilisation d'une couche SIG REDI + (PolyLignes) avec un buffer de 100m. + fct_bio : Dans le cas où un zone humide n'intersecte pas + une ZNIEFF1, attribution d'un poids à chaque zone humide + pour ses fonctions Biologiques et Ecologiques. + Calcul dépendant du nombre de fonctions recensées en BDD. + fct_hab : Attribution d'un poids à chaque zone humide + en fonction des types d'habitat présents sur site. + fct_intpatri : Attribution d'un poids à chaque zone humide + en fonction des espèces protégées présentes sur site. + bilan : Somme des sous-indices des fonctions biologiques + et écologiques des zones humides. + ''' + def znieff_1(df): + ''' + Calcul la présence/absence des zones + humides sur des ZNIEFF 1 par intersection: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_znieff1' : Présence = 2 / Absence = 0 + ''' + print('INIT : Localisation de la zone en territoire ZNIEFF 1 ...') + df = df.copy() + data = open_gpkg(Path0+p_bio_eco+c_znieff,bbox=df) + geom = _union_polygons_geometry(data) + df['ssind_znieff1'] = df.geom.intersects(geom) \ + .astype(int) \ + .replace(1,2) + + return df + + + def redi(df,buffer:int=100): + ''' + Calcul la capacité de déplacement de la faune + dans le réseau écologique départemental de l'Isère (REDI) + par intersection. Utilisation d'une couche SIG REDI + (PolyLignes) avec un buffer de 100m: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_redi' : Présence = 2 / Absence = 0 + ''' + print('INIT : Axe de déplacement de la faune du REDI ...') + df = df.copy() + data = open_gpkg(Path0+p_bio_eco+c_redi,bbox=df) + data.geometry = data.geometry.map(ops.linemerge) + geom = _union_lines_geometry(data).buffer(buffer) + df['ssind_redi'] = df.geom.intersects(geom) \ + .astype(int) \ + .replace(1,2) + + return df + + + def fct_bio(df): + ''' + Dans le cas où un zone humide n'intersecte pas + une ZNIEFF 1 (ssind_znieff1 = 0), attribution d'un poids + à chaque zone humide pour ses fonctions Biologiques et + Ecologiques. Calcul dépendant du nombre de fonctions + recensées en BDD: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_znieff1' : Présence = 2 / Absence = 0 + 'ssind_fctbio' : si (ssind_znieff1 = 2) = 0 / + 1 fonction = 1 / multiple fonctions = 2 / + ''' + print('INIT : biologiques et écologiques ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'fct_bio') & (data.nom_fct != 'non documenté')] + df['ssind_fctbio'] = df.apply( + lambda x: data[data.id_site == x['id_site']].shape[0], + axis=1) + if 'ssind_znieff1' not in df.columns: + df = fct_bio_eco.znieff_1(df) + df.loc[df.ssind_znieff1==2, 'ssind_fctbio'] = 0 + # df.loc[(df.ssind_znieff1==0) & (df.ssind_fctbio==1), 'ssind_fctbio'] = 1 + df.loc[(df.ssind_znieff1==0) & (df.ssind_fctbio > 1), 'ssind_fctbio'] = 2 + + return df + + + def fct_hab(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction des types d'habitat et des espèces protégées + présents sur site: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_hab' : + Habitat 'prioritaire|très rare' = 2 + ''' + print('INIT : Habitats (prioritaire|très rare) ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'int_patri') & (data.nom_fct != 'non documenté')] + + # 2 pt si habitat prioritaire + lst_termep = 'prioritaire|communautaire|DH|très rare' + lst_termeC = 'communautaire' + lst_termeP = 'prioritaire|très rare' + datap = data[data.nom_fct == 'habitats'] + lst_siteh = datap.loc[datap.description.str.contains(lst_termep,na=False), 'id_site'] + lst_sitehC = datap.loc[datap.description.str.contains(lst_termeC,na=False), 'id_site'] + lst_sitehP = datap.loc[datap.description.str.contains(lst_termeP,na=False), 'id_site'] + df['ssind_hab'] = 0 + df.loc[df.id_site.isin(lst_sitehP),'ssind_hab'] = 2 + + return df + + + def readGN_espPatri(df:gpd.GeoDataFrame=None): + from pycen import con_gn + sql = ''' + with r1 as ( + SELECT + id_synthese, date_debut, cd_ref, ST_GeomFromText(geometrie_wkt_4326,4326) geom, + json_build_object(cd_type_statut,json_agg(code_statut)) statut_code, + array_agg(distinct cd_type_statut)::text[] type_statut + FROM gn_synthese.v_synthese_for_export + JOIN taxonomie.bdc_statut USING (cd_ref) + WHERE (regroupement_type = 'Liste rouge' + AND code_statut in ('VU','CR','EN')) + OR (regroupement_type = 'Protection' + AND (cd_type_statut in ('PN','PNA') + OR (cd_type_statut = 'PD' AND lb_adm_tr = 'Isère') + OR (cd_type_statut = 'PR' AND lb_adm_tr IN ('Auvergne-Rhône-Alpes','Rhône-Alpes')) + ) + ) + OR regroupement_type = 'Directives européennes' + GROUP BY 1,2,3,4,"cd_type_statut") + SELECT id_synthese, date_debut, cd_ref, geom, + json_agg(type_statut) type_statut, + json_agg(statut_code) statut_code + FROM r1 + ''' + + if not df.empty : + sql += "WHERE ST_INTERSECTS(geom,'SRID={epsg};{poly}')".format( + epsg=4326, + poly = df.to_crs(4326).unary_union + ) + sql += 'GROUP BY 1,2,3,4' + return gpd.read_postgis(sql,con_gn) + + + def esp_patrim(df): + + gn_sp = fct_bio_eco.readGN_espPatri(df) + + if not gn_sp.empty: + gn_sp.to_crs(2154,inplace=True) + gn_sp['type_statut'] = [[item for sublist in TUTU for item in sublist] for TUTU in gn_sp.type_statut] + gn_sp['typ_count'] = gn_sp.type_statut.str.len() + + spa_join = (gpd.sjoin( + df.set_index('id_site')[['geom']], + gn_sp, + predicate='intersects', + how = 'left' + )) + df_test = (spa_join + .groupby("id_site")['type_statut'] + .apply(lambda x: [*x]) + .reset_index() + ) + + df_test['patri'] = [ + set([item for sublist in TUTU for item in sublist]) + if gpd.pd.notna(TUTU).all() else None + for TUTU in df_test['type_statut'] + ] + df_test['count_typ'] = df_test.patri.str.len() + df.merge(df_test[['id_site','count_typ']],on='id_site',how='left') + + return df + + + def fct_intpatri(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction des espèces protégées présentes + sur site: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_esppatri' : + Faune/Flore nb(1 à 2) 'DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE' = 0.5 + Faune/Flore nb(< 3) 'DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE' = 1 + ''' + print('INIT : Faune - Flore (PN – PR – P38) ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'int_patri') & (data.nom_fct != 'non documenté')] + + # 1 pt si liste terme + lst_terme = 'DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE' + datat = data[data.nom_fct != 'habitats'].copy() + datat.quantite = datat.quantite.astype(float) + lst_sitet = datat.loc[datat.description.str.contains(lst_terme,na=False), 'id_site'] + lst_site1 = datat.loc[(datat.id_site.isin(lst_sitet))&(datat.quantite < 3), 'id_site'] + lst_site2 = datat.loc[(datat.id_site.isin(lst_sitet))&(datat.quantite >= 3),'id_site'] + lst_site3 = datat.loc[ + (datat.id_site.isin(lst_sitet)) & + (datat.quantite == 0) & + (datat.description.str.contains('nombreuses|plusieurs')),'id_site'] + + df['ssind_esppatri'] = 0 + df.loc[df.id_site.isin(lst_site1),'ssind_esppatri'] = 0.5 + df.loc[df.id_site.isin(lst_site2),'ssind_esppatri'] = 1 + df.loc[df.id_site.isin(lst_site3),'ssind_esppatri'] = 1 + + return df + + + def bilan(df): + ''' + Somme des sous-indices des fonctions biologiques + et écologiques des zones humides: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_bioeco' : + sum( + ssind_znieff1 + ssind_redi + + ssind_fctbio + ssind_hab + ssind_esppatri + ) + ''' + df = fct_bio_eco.znieff_1(df) + df = fct_bio_eco.redi(df) + df = fct_bio_eco.fct_bio(df) + df = fct_bio_eco.fct_hab(df) + df = fct_bio_eco.fct_intpatri(df) + ssind = df.columns[df.columns.str.contains('ssind_')] + df['ind_bioeco'] = df[ + ['ssind_znieff1','ssind_redi','ssind_fctbio','ssind_hab','ssind_esppatri'] + # ssind + ].sum(axis=1) + print(df['ind_bioeco'].dtype) + df.name = 'Fct_bio_eco' + + return df + + + +class fct_hyd: + ''' + Calcule l'indice et les sous-indices des fonctions hydrauliques + et hydrologiques des zones humides: + + Liste des fonctions + ---------- + zone_inond : Calcul la présence/absence des zones humides + sur des zones inondables par intersection. + eabf : Calcul de l'espace alluvial de bon fonctionnement (EABF) + ou de fond de vallée par intersection. + dist_reso_hydro : Si la zone humide ne possède pas d'espace + alluviale de bon fonctionnement d'après la fonction "eabf()", + calcul la distance au réseau hydrographique linéaire + (le plus proche). Attribution d'un poids en fonction de + la distance. Si la zone ne possède pas d'eabf et ne semble + pas à proximité d'un réseau hydrique, recherche de la + présence d'un cours d'eau dans la base de données zones humes. + reghydro_out: Pour chaque zone humide, en cas de distance + au réseau hydrographique linéaire > 50 et d'absence + d'espace alluviale de bon fonctionnement, recherche + dans la base de données des zones humides si une sortie + d'eau "Cours d'eau"est définie. Attribution d'un poids + en fonction. + connex_molasse : Attribution d'un poids à chaque zone humide + en fonction de sa connexion avérée à la molasse ou non. + idpr : Calcul de l'Indice de Développement et + de Persistance des Réseaux. Calcul réalisé dans le cas où + connex_molasse = 0 . + fct_hydro : Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + hydrauliques et hydrologiques qu'elle remplie. + zse_zsnea : Attribution d'un poids à chaque zone humide + en fonction de sont appartenance à une zone de sauvegarde + exploitée actuelle (zse) ou future (zsnea). + bilan : Somme des sous-indices des fonctions hydrauliques + et hydrologiques des zones humides. + ''' + def zone_inond(df): + ''' + Calcul la présence/absence des zones humides + sur des zones inondables par intersection: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_zoneinond' : None + ''' + print('INIT : Zone inondable ...') + df = df.copy() + data = open_gpkg( + Path0+p_hydro+c_alea_inond,bbox=df, + layer='prescription_surf') + data = data[data['PGSZH_alea_inondation']==1] + tmp = gpd.sjoin(df,data[['geom']],predicate='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_zoneinond'] = None + df.loc[df.id_site.isin(lst),'ssind_zoneinond'] = 1 + return df + + + def eabf(df): + ''' + Si la zone humide n'est pas en zone inondable d'après + la fonction "zone_inond()", calcul de l'espace alluvial de bon + fonctionnement (EABF) ou de fond de vallée par intersection: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_eabf' : + if ZH.intersects(EABF): + 'ssind_eabf' = 2 + else : + 'ssind_eabf' = 0 + ''' + print('INIT : Espace alluvial de bon fonctionnement (EABF) ou fond de vallée ...') + df = df.copy() + if isinstance(c_ebf_crseau,str): + data = open_gpkg(Path0+p_hydro+c_ebf_crseau,bbox=df) + else : + data = gpd.GeoDataFrame() + for c in c_ebf_crseau: + data = gpd.pd.concat([ + data, + open_gpkg(Path0+p_hydro+c,bbox=df)[['geom']] + ], + ignore_index=True) + + tmp = gpd.sjoin(df,data[['geom']],predicate='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_eabf'] = 0 + df.loc[df.id_site.isin(lst),'ssind_eabf'] = 2 + return df + + + def dist_reso_hydro(df): + ''' + Si la zone humide ne possède pas d'espace alluviale + de bon fonctionnement d'après la fonction "eabf()", + calcul de la distance au réseau hydrographique + linéaire (le plus proche).Attribution d'un poids + en fonction de la distance. + Si la zone ne possède pas d'eabf et ne semble pas + à proximité d'un réseau hydrique, recherche de la + présence d'un cours d'eau dans la base de données + zones humes: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_distHydro' : + Si > 50m = 0 + Si ]10 m – 50 m] = 1 + Si <= 10m = 2 + ''' + from pycen import wfs + print('INIT : Distance au réseau hydrographique linéaire (le plus proche) ...') + df = df.copy() + # data = pycen.ref_hydro().get_troncon() + list_layer = wfs.list_layer('https://datacarto.datara.gouv.fr/wfs','1.1.0') + layer = 'l_inventaire_cours_eau_l_v3_038' + + if layer in list_layer: + data = wfs.get_wfs( + url='https://datacarto.datara.gouv.fr/wfs', + layer=layer, + version='1.1.0', bbox=df.unary_union) + else : + sql = "SELECT gid,geom FROM ref_hydro.l_inventaire_cours_eau_l_v3_038 WHERE ST_INTERSECTS(geom,'SRID={epsg};{poly}')".format( + epsg = df.crs.srs.split(':')[1], + poly = df.unary_union + ) + data = gpd.read_postgis(sql,con=pycen.con) + # elif 'cours_d_eau' in list_layer: + # print('\n\tUTILISATION DE LA COUCHE IN BDD : l_inventaire_cours_eau_l_v3_038') + # layer = 'cours_d_eau' + # else : + # raise("Couche des cours d'eau non disponible ... ") + + + if 'ssind_eabf' not in df.columns: + df = fct_hyd.eabf(df) + # if 'MultiLineString' in data.geom_type: + # data.loc[data.geom_type=='MultiLineString'] = data.loc[data.geom_type=='MultiLineString'].geometry.map(ops.linemerge) + # if True in data.has_z.unique(): + # import shapely.wkb + # data.loc[data.has_z,'geom'] + # data.geom = [shapely.wkb.loads(shapely.wkb.dumps(g, output_dimension=2)) for g in data.geom] + # df10 = df[['id_site','geom']].copy() + # df50 = df[['id_site','geom']].copy() + # df10.geom = df10.buffer(10) + # df50.geom = df50.buffer(50) + isin10 = df.buffer(10).intersects(data) + isin50 = df.buffer(50).intersects(data) + noteabf = df.ssind_eabf == 0 + # df10 = gpd.sjoin(df10,data[['geom']],predicate='intersects', how='left') + # df50 = gpd.sjoin(df50,data[['geom']],predicate='intersects', how='left') + # lst10 = df10.loc[~df10.index_right.isna(),'id_site'].tolist() + # lst50 = df50.loc[~df50.index_right.isna(),'id_site'].tolist() + df['ssind_distHydro'] = 0 + df.loc[noteabf & isin50,'ssind_distHydro'] = 1 + df.loc[noteabf & isin10,'ssind_distHydro'] = 2 + + # df.loc[df.dist_min <= 10, 'ssind_distHydro'] = 2 + # Si 0, check entree/sortie regime hydro. + # Si cours d'eau ou eaux de crues ==> 1 + # union = data.geometry.unary_union + # df['buff10'] = df.buffer(10).intersects(union).astype(int) + # df['buff50'] = df.buffer(50).intersects(union).astype(int) + # df['ssind_distHydro'] = None + # df.loc[df.buff50 == 0, 'ssind_distHydro'] = 0 + # df.loc[df.buff50 == 1, 'ssind_distHydro'] = 1 + # df.loc[df.buff10 == 1, 'ssind_distHydro'] = 2 + # df.drop(columns=['buff10', 'buff50'], inplace=True) + + return df + + + def reghydro_out(df): + ''' + Pour chaque zone humide, en cas de distance + au réseau hydrographique linéaire > 50 et d'absence + d'espace alluviale de bon fonctionnement, recherche + dans la base de données des zones humides si une sortie + d'eau "Cours d'eau"est définie. Attribution d'un poids + en fonction: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_hydrout' : + Si ssind_distHydro = 0 + & ssind_eabf = 0 + & regime_hydri_out = "Cours d'eau" : + 'ssind_distHydro' = 2 + ''' + df = df.copy() + if 'ssind_distHydro' not in df.columns: + df = fct_hyd.dist_reso_hydro(df) + + df['ssind_hydrout'] = 0 + if not df.loc[df.ssind_distHydro == 0].empty : + lst_zh = df.loc[df.ssind_distHydro == 0].id_site.tolist() + tmp = zh.get_regHydro(id_site=lst_zh) + # tmp = tmp.loc[tmp.regime_hydri.isin(["Cours d'eau", "Eaux de crues"])] + tmp = tmp.loc[(tmp.in_out=='sortie')&(tmp.regime_hydri=="Cours d'eau")] + # in_out ??????????? + # permanance ??????? + lsttmp = tmp.id_site + df.loc[(df.ssind_eabf==0)&(df.id_site.isin(lsttmp)),'ssind_hydrout'] = 2 + return df + + + def connex_molasse(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction de sa connexion avérée + à la molasse ou non : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_molasse' : + (intersects == False) = 0 + (intersects == True) = 1 + ''' + print('INIT : Connexion à la molasse ...') + df = df.copy() + data = open_gpkg(Path0+p_hydro+c_connex_molasse,bbox=df) + tmp = gpd.sjoin(df,data[['geom']],predicate='intersects', how='left') + tmp = tmp[~tmp.index_right.isna()] + df['ssind_molasse'] = 0 + df.loc[df.id_site.isin(tmp.id_site),'ssind_molasse'] = 1 + return df + + def piezo(df): + import rasterio as rio + from os import system + print('INIT : Cross piézométrie ...') + df = df.copy() + # Polygoniser (raster vers vecteur) + # """gdal_polygonize.py + # "/home/colas/Documents/9_PROJETS/3_PGZH/SIG/multi_fonctions/IGN - BD Alti 25M/BDALTIV2_25M.tif" + # -b 1 -f "GPKG" + # /tmp/processing_sSlfcG/96533f9ad23e4c10992caa807da01bf1/OUTPUT.gpkg + # OUTPUT alti + # """ + + mnt_in = '' + piezo_in = 'my_interpolate_piezo.tif' + piezo_out = 'piezo.tif' + gpkg_out = 'out.gpkg' + mnt_out = 'mnt.tif' + piezoVSmnt = 'piezoVSmnt_out.tif' + poly_connect_nape = 'poly_connect_nape.gpkg' + # Découper un raster selon une couche de masque + # Découpage du MNT par les polygones d'étude + op = ''' + gdalwarp -overwrite -s_srs EPSG:2154 -t_srs EPSG:2154 -co FORMAT=GPKG -of GTiff -tr 25.0 -25.0 -tap -cutline \ + "PG:dbname='azalee' host=91.134.194.221 port=5432 sslmode=disable user='cgeier' password='adm1n*bdCen'" \ + -csql "SELECT site_code, geom FROM zones_humides.v_zoneshumides WHERE site_code in ('{liste_site}')" \ + "{mnt}" {out} + '''.format(vector = Path_tmp+gpkg_out, mnt=Path0+p_mltifct+c_mnt, out=Path_tmp+mnt_out, liste_site="','".join(df.id_site.tolist())) + system(op) + mnt = rio.open(Path_tmp+mnt_out) + xmin, ymin, xmax, ymax = mnt.bounds + + # Découpage du PIEZO rasterizé interpolé par les polygones d'étude + op = ''' + gdalwarp -overwrite -s_srs EPSG:2154 -t_srs EPSG:2154 -co FORMAT=GPKG -of GTiff -tr 25.0 -25.0 \ + -te {xmin} {ymin} {xmax} {ymax} \ + -tap -cutline "PG:dbname='azalee' host=91.134.194.221 port=5432 sslmode=disable user='cgeier' password='adm1n*bdCen'" \ + -csql "SELECT site_code, geom FROM zones_humides.v_zoneshumides WHERE site_code in ('{liste_site}')" \ + "{mnt}" {out} + '''.format( + xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, + mnt=Path_tmp+piezo_in, + out=Path_tmp+piezo_out, + liste_site="','".join(df.id_site.tolist()) + ) + system(op) + + # Soustraction de la valeur des mailles + op = ''' + gdal_calc.py --overwrite -A {mnt} -B {piezo} --outfile={out} --calc="(A-B)<=0" + '''.format(mnt=Path_tmp+mnt_out, piezo=Path_tmp+piezo_out, out=Path_tmp+piezoVSmnt) + system(op) + # polygonisation du raster + op = ''' + gdal_polygonize.py {mnt} -b 1 -f "GPKG" {out} + '''.format(mnt=Path_tmp+piezoVSmnt, out=Path_tmp+poly_connect_nape) + system(op) + + # data = rio.open(Path_tmp+piezoVSmnt) + data = open_gpkg(Path_tmp+poly_connect_nape) + data.rename(columns={'DN':'connect_nappe'}, inplace=True) + data = data[data.connect_nappe > 0].copy() + # IDEM : + # gpd.sjoin(df,data).sort_values('id_site').id_site.unique() == \ + # df[df.intersects(data.unary_union)].sort_values('id_site').id_site.tolist() + + tmp = gpd.sjoin(df,data,how='left') + del tmp['index_right'] + tmp.drop_duplicates(inplace=True) + df = tmp.copy() + + df.to_file(Path_tmp+'zh_connect_nappe.gpkg',driver='GPKG') + + # Import des courbe des niveau rasteriser par interpolation + # by QGIS : Outils de traitements > Interpolation > Interpolation TIN + # piezo = rio.open(Path0+p_hydro+c_piezo_interp) + + + # from geocube.api.core import make_geocube + # piézo = open_gpkg(Path0+p_hydro+c_piezo) + # piézo = piézo[~piézo.geom.isna()] + # piézo.rename_geometry('geometry', inplace=True) + # out_grid = make_geocube( + # vector_data=piézo, + # measurements=["id"], + # resolution=(-25, 25) + # ) + # out_grid["id"].rio.to_raster(Path_tmp+"my_rasterized_column.tif") + # import xarray + # xds = xarray.open_dataarray(Path_tmp+"my_rasterized_column.tif") + # filled = xds.rio.interpolate_na(method='linear') + # filled.rio.to_raster(Path_tmp+"my_interpolate_raster.tif") + + + return df + + def idpr(df): + ''' + Calcul réalisé dans le cas où connex_molasse = 0. + Calcul de l'Indice de Développement et + de Persistance des Réseaux : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_idpr' : + if [ %_recouvrement(idpr < 1000) > 25% ] : + ssind_idpr = 1 + else : + ssind_idpr = 0 + ''' + print('INIT : IDPR ...') + df = df.copy() + if 'ssind_molasse' not in df.columns: + df = fct_hyd.connex_molasse(df) + + import rasterio + from rasterio.features import shapes + mask = None + with rasterio.Env(): + with rasterio.open(Path0+p_hydro+c_idpr2) as src: + image = src.read(1) # first band + image[(image < 1000) & (image > -1)] = 1 + image[image >= 1000] = 0 + data = gpd.GeoDataFrame.from_features( + features = [{'properties': {'raster_val': v}, 'geometry': s} + for i, (s, v) + in enumerate( + shapes(image, mask=mask, transform=src.transform)) if v >= 0], + crs = 'EPSG:2154') + data.rename_geometry('geom', inplace=True) + + lst_data = [] + if not df[df.ssind_molasse == 0].empty: + perc = _calc_recouvrmt( + df[df.ssind_molasse == 0], + data[data.raster_val == 1] ) + lst_data = perc.loc[perc.perc_rcvmt.round(2) > 25,'id_site'] + + df['ssind_idpr'] = 0 + df.loc[df.id_site.isin(lst_data), 'ssind_idpr'] = 1 + + return df + + + def fct_hydro(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + hydrauliques et hydrologiques qu'elle remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_fcthydro' : + 0 fonction = 0 + 1 fonction = 0.5 + 2 fonctions = 1 + 3 fonctions = 2 + ''' + print('INIT : Fonctions hydro-biologiques à caractères hydrauliques et hydrologiques ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'fct_hydro') & (data.nom_fct != 'non documenté')] + + lst_terme = ["soutien naturel d'étiage",'ralentissement du ruissellement','expansion naturelle des crues'] + d = data.loc[data.nom_fct.isin(lst_terme),['id_site','nom_fct']] + d['nb_fct'] = 1 + d = d.groupby(['id_site']).sum().reset_index() + + df['ssind_fcthydro'] = 0 + lst_data1 = d.loc[d.nb_fct == 1, 'id_site'] + lst_data2 = d.loc[d.nb_fct == 2, 'id_site'] + lst_dataSup = d.loc[d.nb_fct > 2,'id_site'] + df.loc[df.id_site.isin(lst_data1), 'ssind_fcthydro'] = 0.5 + df.loc[df.id_site.isin(lst_data2), 'ssind_fcthydro'] = 1 + df.loc[df.id_site.isin(lst_dataSup), 'ssind_fcthydro'] = 2 + + return df + + + def zse_zsnea(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction de sont appartenance à une zone de sauvegarde + exploitée actuelle (zse) ou future (zsnea) : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_fcthydro' : + if ZH.intersects(ZSNEA|ZSE): + 'ssind_fcthydro' = 1 + else : + 'ssind_fcthydro' = 0 + ''' + print('INIT : Zones de sauvegardes actuelles et futures (ZSE / ZSNEA) ...') + df = df.copy() + data1 = open_gpkg(Path0+p_phybio+c_zse,bbox=df) + data2 = open_gpkg(Path0+p_phybio+c_zsea,bbox=df) + data1 = _union_polygons_geometry(data1) + data2 = _union_polygons_geometry(data2) + if not data1.is_valid: + data1 = data1.buffer(0) + if not data2.is_valid: + data2 = data2.buffer(0) + df['zse'] = df.intersects(data1).astype(int) #.replace(1,2) + df['zsnea'] = df.intersects(data2).astype(int) + df['ssind_zse_zsnea'] = df[['zse', 'zsnea']].max(axis=1) + df.drop(columns=['zse','zsnea'], inplace=True) + return df + + + def captage_ppi(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction de sa présence ou non au sein d'un périmêtre de prélèvement d'eau potable : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_capt_ppi' : + if ZH.intersects(Captage_ppi): + 'ssind_capt_ppi' = 1 + else : + 'ssind_capt_ppi' = 0 + ''' + if isinstance(c_captage_ppi,str): + data = open_gpkg(Path0+p_hydro+c_captage_ppi,bbox=df) + else : + data = gpd.GeoDataFrame() + for c in c_captage_ppi: + data = gpd.pd.concat([ + data, + open_gpkg(Path0+p_hydro+c,bbox=df)[['geom']] + ],ignore_index=True) + df['ssind_capt_ppi'] = (df.intersects(data.unary_union) + .astype(int)) + return df + + + def bilan(df): + ''' + Somme des sous-indices des fonctions hydrauliques + et hydrologiques des zones humides : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_hydro' : + sum( + ssind_eabf + ssind_distHydro + ssind_hydrout + ssind_fcthydro + ssind_pente + ssind_capt_ppi + ) + ''' + # df = fct_hyd.zone_inond(df) + df = fct_hyd.eabf(df) + df = fct_hyd.dist_reso_hydro(df) + df = fct_hyd.reghydro_out(df) + # df = fct_hyd.connex_molasse(df) + # df = fct_hyd.idpr(df) + df = fct_hyd.fct_hydro(df) + df = crit_multi_fct.pente2(df) + df = fct_hyd.captage_ppi(df) + # df = fct_hyd.zse_zsnea(df) + df['ind_hydro'] = df[ + [#'ssind_zoneinond', + 'ssind_eabf','ssind_distHydro', + 'ssind_hydrout', #'ssind_molasse','ssind_idpr', + 'ssind_fcthydro', #'ssind_zse_zsnea' + 'ssind_pente','ssind_capt_ppi' + ] + ].sum(axis=1) + df.name = 'Fct_hyd' + return df + + + +class fct_phy_bio: + ''' + Calcule l'indice et les sous-indices des fonctions physiques + et biochimiques des zones humides : + + Liste des fonctions + ---------- + captage_aep : Identification de la présence/absence + de zones de captages AEP à proximité des zones humides + par intersection. + fct_hydrobio : Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + physiques et biochimiques qu'elle remplie. + occup_sol : Pour chaque zone humide, identification de la nature + d'occupation du sol et de sa surface de recouvrement. Déduction + de la surface d'espace naturel concernée par les zonnages. + Attribution d'un poids en fonction de la surface de recouverte. + bilan : Somme des sous-indices des fonctions physiques + et biochimiques des zones humides. + ''' + + def captage_aep(df): + ''' + Identification de la présence/absence + de zones de captages à proximité des zones humides + par intersection. + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_perimcaptage' : + if ZH.intersects(captage): + 'ssind_perimcaptage' = 1 + else : + 'ssind_perimcaptage' = 0 + ''' + print('INIT : Périmètre de protection des captages AEP ...') + from pandas import concat + df = df.copy() + if isinstance(c_captage_aep,str): + data = open_gpkg(Path0+p_phybio+c_captage_aep,bbox=df) + else : + data = gpd.GeoDataFrame() + for c in c_captage_aep: + data = gpd.pd.concat([ + data, + open_gpkg(Path0+p_phybio+c,bbox=df)[['geom']] + ],ignore_index=True) + tmp = gpd.sjoin( + df, + data.loc[ + # ~data.N_INS___NO.str.contains('ABA|HS'), + :, + ['geom']], + predicate = 'intersects', + how = 'left') + lst_site = tmp[~tmp.index_right.isna()].id_site + df['ssind_perimcaptage'] = 0 + df.loc[df.id_site.isin(lst_site),'ssind_perimcaptage'] = 1 + return df + + + # def zse_zsnea(df): + # print('INIT : Zones de sauvegardes actuelles et futures (ZSE / ZSNEA) ...') + # df = df.copy() + # data1 = open_gpkg(Path0+p_phybio+c_zse,bbox=df) + # data2 = open_gpkg(Path0+p_phybio+c_zsea,bbox=df) + # data1 = _union_polygons_geometry(data1) + # data2 = _union_polygons_geometry(data2) + # if not data1.is_valid: + # data1 = data1.buffer(0) + # if not data2.is_valid: + # data2 = data2.buffer(0) + # df['zse'] = df.intersects(data1).astype(int).replace(1,2) + # df['zsnea'] = df.intersects(data2).astype(int) + # df['ssind_zse_zsnea'] = df[['zse', 'zsnea']].max(axis=1) + # df.drop(columns=['zse','zsnea'], inplace=True) + # return df + + + def fct_hydrobio(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + physiques et biochimiques qu'elle remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_fcthydrobio' : + 0 fonction = 0 + 1 fonction = 1 + + fonctions = 2 + ''' + print('INIT : Fonctions hydro-biologiques à caractères physiques et biochimiques ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'fct_hydro') & (data.nom_fct != 'non documenté')] + + lst_terme = ["fonctions d'épuration","rôle naturel de protection contre l'érosion"] + d = data.loc[data.nom_fct.isin(lst_terme),['id_site','nom_fct']] + d['nb_fct'] = 1 + d = d.groupby(['id_site']).sum().reset_index() + + df['ssind_fcthydrobio'] = 0 + lst_data1 = d.loc[d.nb_fct == 1, 'id_site'] + lst_dataSup = d.loc[d.nb_fct > 1,'id_site'] + df.loc[df.id_site.isin(lst_data1), 'ssind_fcthydrobio'] = 1 + df.loc[df.id_site.isin(lst_dataSup), 'ssind_fcthydrobio'] = 2 + return df + + + def occup_sol(df): + ''' + Pour chaque zone humide, identification de la nature + d'occupation du sol et de sa surface de recouvrement. + Déduction de la surface d'espace naturel concernée par les zonnages. + Type d'intérêt : + 'Forêts' / 'Milieux à végétation arbustive et/ou herbacée' / 'Prairies' + Attribution d'un poids en fonction de la surface de recouverte : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_occupsol' : + if surf_recouverte < 25% : + ssind = 0 + elif surf_recouverte in [ 25% ; 50% [ : + ssind = 0.5 + elif surf_recouverte in [ 50% ; 75% [ : + ssind = 1.5 + elif surf_recouverte in [ 75% ; 100% ] : + ssind = 2 + ''' + + init = dt.now() + print('INIT : Occupation du sol ...') + df = df.copy() + ddf = df.copy() + ddf['area_init'] = ddf.area + # data = open_gpkg(Path0+p_phybio+c_occupsol, bbox=df) + print('IMPORT DATA ...') + artif = open_gpkg(Path0+p_press+c_artif, bbox=df) + artif = artif[['ID','geom']] + + artif1 = artif.iloc[0,:] + artif1 = (gpd.GeoDataFrame(artif1).T + .set_geometry('geom') + .set_crs(crs=artif.crs.srs)) + # artif1 = gpd.GeoDataFrame(artif.iloc[0].copy(),geometry=artif.iloc[0].geom,crs=artif.crs.srs) + artif1 = (gpd.overlay(artif1,ddf, how='intersection') + .rename_geometry('geom')) + artif2 = (gpd.GeoDataFrame(artif.iloc[1:,:]) + .set_geometry('geom') + .set_crs(crs=artif.crs.srs)) + + # RPG + from pycen.ref import territoire as ter + # rpg = open_gpkg(Path0+p_phybio+c_rpg, bbox=df) + rpg = ter.rpg2021_dep_parc(bbox=df.unary_union) + lst_code = ['BOP','SPH','SPL','PPH','PRL','J6P','J6S','BTA','ROS','SBO'] + rpg = rpg[~rpg.code_cultu.isin(lst_code)] + + + print((dt.now() - init).total_seconds()) + print('CORRECTION GEOMETRY ...') + if not artif1.geom.is_valid.all() : + artif1.loc[~artif1.geom.is_valid,'geom'] = artif1.loc[~artif1.geom.is_valid,'geom'].buffer(0) + if not artif2.geom.is_valid.all() : + artif2.loc[~artif2.geom.is_valid,'geom'] = artif2.loc[~artif2.geom.is_valid,'geom'].buffer(0) + if not rpg.geom.is_valid.all() : + rpg.loc[~rpg.geom.is_valid,'geom'] = rpg.loc[~rpg.geom.is_valid,'geom'].buffer(0) + + + print((dt.now() - init).total_seconds()) + print('DATA READY ...') + print('INIT OVERLAY ...') + ddf = gpd.overlay(ddf,artif1, how='difference') + if 'GeometryCollection' in ddf.geom_type.unique(): + ddf.geom = ddf.geom.buffer(0) + ddf = gpd.overlay(ddf,rpg, how='difference') + if 'GeometryCollection' in ddf.geom_type.unique(): + ddf.geom = ddf.geom.buffer(0) + ddf = gpd.overlay(ddf,artif2, how='difference') + if 'GeometryCollection' in ddf.geom_type.unique(): + ddf.geom = ddf.geom.buffer(0) + print('END OVERLAY ...') + print((dt.now() - init).total_seconds()) + + ddf['area_end'] = ddf.area + ddf['perc_rcvmt'] = 100 - (100*ddf.area_end/ddf.area_init) + df = df.merge(ddf[['id_site','perc_rcvmt']], on='id_site', how='left') + df.perc_rcvmt.fillna(0,inplace=True) + df['perc_surfNat'] = 100 - df['perc_rcvmt'] + + + # lst_terme = ['Forêts','Milieux à végétation arbustive et/ou herbacée','Prairies'] + # d = data.loc[data.libelle_02.isin(lst_terme),] + # print( + # ('INIT : Calcul du recouvrement de l\'occupation des sols sur les zones humides :'), + # ('"Forêts","Milieux à végétation arbustive et/ou herbacée","Prairies"')) + # print(('ATTENTION : Les géometries de l\'occupation des sols étant complexes,'), + # ('le calcul peut prendre un certain temps ...')) + # df = _calc_recouvrmt(df,d) + # print('END : Calcul du recouvrement de l\'occupation des sols sur les zones humides') + if 'ssind_fcthydrobio' not in df.columns: + df = fct_phy_bio.fct_hydrobio(df) + + df['ssind_occupsol'] = 0 + df.loc[(df.ssind_fcthydrobio == 0) & (df.perc_surfNat.between(25,50,inclusive=True)),'ssind_occupsol'] = 0.5 + df.loc[(df.ssind_fcthydrobio == 0) & (df.perc_surfNat.between(50,75,inclusive=True)),'ssind_occupsol'] = 1.5 + df.loc[(df.ssind_fcthydrobio == 0) & (df.perc_surfNat >= 75),'ssind_occupsol'] = 2 + df.drop(columns=['perc_rcvmt','perc_surfNat'], inplace=True) + print('END ssind_occupsol ...') + + return df + + + def bilan(df): + ''' + Somme des sous-indices des fonctions physiques + et biochimiques des zones humides : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_phybio' : + sum( + ssind_perimcaptage + + ssind_fcthydrobio + + ssind_occupsol + ssind_pente + ) + ''' + df = fct_phy_bio.captage_aep(df) + # df = fct_phy_bio.zse_zsnea(df) + df = fct_phy_bio.fct_hydrobio(df) + df = fct_phy_bio.occup_sol(df) + df = crit_multi_fct.pente2(df) + df['ind_phybio'] = df[ + ['ssind_perimcaptage', + # 'ssind_zse_zsnea', + 'ssind_fcthydrobio','ssind_occupsol', + 'ssind_pente' + ] + ].sum(axis=1) + df.name = 'Fct_phy_bio' + return df + + + +class crit_multi_fct: + ''' + Calcule l'indice et les sous-indices des criètes + « multi-fonctions » des zones humides : + + Liste des fonctions + ---------- + surface : Calcul de la surface totale des zones humides. + Attribution d'un poid en fonction du résultat. + pente : Calcul de la pente moyenne des zones humides + via le MNT. Attribution d'un poid en fonction du résultat. + dir_exp : Ajout d'un champ dir_exp dans le tableau de sortie + qui sera à remplir manuellement par celui-ci. + bilan : Rassemblement des sous-indices des criètes + « multi-fonctions » dans un même tableau. L'indice pourra + être calculé lorque le champs dir_exp sera remplie. + ''' + def surface(df): + ''' + Calcul de la surface totale des zones humides. + Attribution d'un poid en fonction du résultat : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_surf' : + if ZH < 1ha : + ssind = 0 + elif ZH in [ 1ha ; 20ha [ : + ssind = 0.5 + elif ZH in [ 20ha ; 100ha [ : + ssind = 1 + elif ZH >= 100ha : + ssind = 1.5 + ''' + print('INIT : Calcul de la surface ...') + df = df.copy() + df['ssind_surf'] = 0 + df.loc[(df.area/10000).between(1,20,inclusive=True),'ssind_surf'] = 0.5 + df.loc[(df.area/10000).between(20,100,inclusive=True),'ssind_surf'] = 1 + df.loc[df.area/10000 >= 100,'ssind_surf'] = 1.5 + return df + + + def pente(df,seuil:int=5): + ''' + Calcul de la pente moyenne des zones humides + via le MNT. Attribution d'un poid en fonction du résultat : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_surf' : + if ZH > 5% : + ssind = 0 + else: + ssind = 1 + ''' + print('INIT : Calcul de la pente moyenne ...') + dfP = df.copy() + + from os import listdir, chdir, system + from pathlib import Path as Pathlib + from zipfile import ZipFile + from rasterstats import zonal_stats + home = str(Pathlib.home()) + chdir(Path_alti+p_mltifct) + Dir = listdir() + Dir = [x for x in Dir if '.zip' in x] + slope = 'temp_slop.tif' + for i, d in enumerate(Dir): + zip = ZipFile(d).namelist() + z = [z for z in zip if 'MNT' in z][0] + system("gdaldem slope '/vsizip/{zip}/{mnt}' '{slope}' -of GTiff -b 1 -s 1.0 -p".format(zip=d,mnt=z,slope=slope)) + + stats = zonal_stats(dfP.geom,slope) + stats = gpd.pd.DataFrame(stats) + if i == 0 : + dfP[stats.columns] = stats + else: + tmp = gpd.pd.DataFrame({'dfP':dfP['mean']*dfP['count'], 'stats':stats['mean']*stats['count']}) + dfP['mean'] = tmp.sum(axis=1)/(dfP['count']+stats['count']) + tmp = gpd.pd.DataFrame({'dfP':dfP['count'], 'stats':stats['count']}) + dfP['count'] = tmp.sum(axis=1) + tmp = gpd.pd.DataFrame({'dfP':dfP['min'], 'stats':stats['min']}) + dfP['min'] = tmp.min(axis=1) + tmp = gpd.pd.DataFrame({'dfP':dfP['max'], 'stats':stats['max']}) + dfP['max'] = tmp.max(axis=1) + + system('rm {slope}'.format(slope=slope)) + chdir(home) + dfP['ssind_pente'] = 0 + dfP.loc[dfP['mean'] < seuil, 'ssind_pente'] = 1 + + df = df.merge(dfP[['id_site','ssind_pente']], on=['id_site'], how='left') + + + # dst = [z for z in zip if 'DST' in z][0] + # src = [z for z in zip if 'SRC' in z][0] + # dst = rasterio.open('zip:{0}/{1}'.format(d,dst)) + # src = rasterio.open('zip:{0}/{1}'.format(d,src)) + # mnt = rasterio.open( + # 'zip:{0}/{1}'.format(d,z),'w+', + # width=dst.width, + # height=dst.height, + # count=1, + # crs=dst.crs.data, + # transform=dst.transform, + # dtype=dst.dtypes[0]) + + + # stats = rasterstats.zonal_stats(df.geom,'zip:{0}/{1}'.format(d,z)) + # stats = pd.DataFrame(stats) + # if i == 0 : + # df[stats.columns] = stats + # else: + # tmp = pd.DataFrame({'df':df['mean']*df['count'], 'stats':stats['mean']*stats['count']}) + # df['mean'] = tmp.sum(axis=1)/(df['count']+stats['count']) + # tmp = pd.DataFrame({'df':df['count'], 'stats':stats['count']}) + # df['count'] = tmp.sum(axis=1) + # tmp = pd.DataFrame({'df':df['min'], 'stats':stats['min']}) + # df['min'] = tmp.min(axis=1) + # tmp = pd.DataFrame({'df':df['max'], 'stats':stats['max']}) + # df['max'] = tmp.max(axis=1) + # https://stackoverflow.com/questions/8844781/get-file-list-of-files-contained-in-a-zip-file + # res.append(rasterio.open( + # 'zip:{0}/{1}'.format(d,z))) + # continue + # df['ssind_pente'] = 0 + return df + + def pente2(df,seuil:int=5): + sql = """ + SELECT + site_code, + geom, + (slope).count slope_count, + (slope).sum slope_sum, + (slope).mean slope_mean, + (slope).stddev slope_stddev, + (slope).min slope_min, + (slope).max slope_max + FROM ( + SELECT + v.site_code, + v.geom, + ST_SummaryStats(ST_Slope(ST_Union(mm.rast),1,'32BF','DEGREES')) slope + --,ST_SummaryStats(ST_Union(mm.rast)) sum2 + --,(ST_Intersection(mm.rast,1,v.geom)) rast_geom + FROM ref_territoire.mnt_5m mm + CROSS JOIN + (SELECT + site_code, geom + FROM zones_humides.v_zoneshumides vz + WHERE site_code in {list_zh} + ) v + WHERE ST_Intersects(mm.rast,1,v.geom) + GROUP BY 1,2 + ) t; + """.format(list_zh=tuple(df.id_site)) + data = (gpd.read_postgis(sql,pycen.con). + rename(columns={'site_code':'id_site'})) + data['ssind_pente'] = 0 + data.loc[data['slope_mean'] < seuil, 'ssind_pente'] = 1 + + return df.merge(data[['id_site','ssind_pente']], on=['id_site'], how='left') + + + def dir_exp(df): + ''' + Ajout d'un champ dir_exp dans le tableau de sortie + qui sera à remplir manuellement par celui-ci : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_direxp' : None + ''' + df = df.copy() + data = gpd.pd.read_csv(P_expert + c_expert,sep=';') + data = data[['site_code','Note_DE_fonction']] + data.columns = ['site_code','ssind_direxp'] + data.ssind_direxp = data.ssind_direxp.astype(float) + df = df.merge(data,right_on='site_code',left_on='id_site') + del df['site_code'] + # df['ssind_direxp'] = None + return df + + + def bilan(df): + ''' + Rassemblement des sous-indices des criètes + « multi-fonctions » dans un même tableau. L'indice pourra + être calculé lorque le champs dir_exp sera remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_multifct' : + sum( + ssind_surf + + ssind_pente + 'ssind_direxp' + ) + ''' + # df = crit_multi_fct.surface(df) + df = crit_multi_fct.pente2(df) + # df['ssind_total'] = df[ + # ['ssind_surf','ssind_pente'] + # ].sum(axis=1) + df = crit_multi_fct.dir_exp(df) + df['ind_multifct'] = df[ + [#'ssind_surf', + 'ssind_pente','ssind_direxp'] + ].sum(axis=1) + df.name = 'Crit_multi_fct' + return df + + + +class pression: + ''' + Calcule l'indice et les sous-indices des pressions + exercées sur les zones humides : + + Liste des fonctions + ---------- + artificialisation : Récupération des résultats des pressions directes + d'artificialisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + artif_indir : Récupération des résultats des pressions indirectes + d'artificialisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + urbanisation : Récupération des résultats des pressions directes + d'urbanisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 4 classes [0, 0.5, 1, 1.5]. + urbani_indir : Récupération des résultats des pressions indirectes + d'urbanisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 4 classes [0, 0.5, 1, 1.5]. + pressAgricole : Récupération des résultats des pressions directes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + pressAgri_indir : Récupération des résultats des pressions indirectes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + projet_plu_U : Intersections des zones relevant du projet d'Urbanisme (PLU) + avec les polygones de l'étude. Considération du champs + Typezone == 'U'. Attribution des points en cas d'intersections. + projet_scot : En cas d'absence de PLU, recherche d'espaces de + développements potentiels au alentours des sites (SCOT). + Attribution des points en cas d'intersections. + conflit_redi : Intersections des zones de conflits redi + (Points, Lignes) avec les polygones de l'étude. Utilistaion + d'un buffer de 100m. + prelev_eau : Identification da la proximité des zones humides + avec des sources de captages. Application d'un buffer de 50m. + Identification par intersection. + icpe : Identification da la proximité des zones humides + avec des installations classés. Application d'un buffer de 500m. + Identification par intersection. + ouvrage : Identification da la présence d'ouvrages et de dépôts + au sein des zones humides. Identification par intersection. + vulnerabilite : Identification da la proximité des zones humides + avec des espèces exotiques envahissantes. Application d'un buffer + de 100m. Identification par intersection. + press_urba : Calcul de l'indice de pression directe (artificialisation, + urbanisation, pressAgricole, projet_plu_U, conflit_redi, + icpe, prelev_eau, ouvrage, vulnerabilite). + press_agri : Calcul de l'indice de pression indirecte (artif_indir; + urbani_indir, pressAgri_indir, projet_plu_AU, projet_scot). + bilan : Rassemblement des sous-indices des pressions directes + et indirectes dans un même tableau. + ''' + # indesirable : Identification da la présence d'espèces indésirables + # au sein des zones humides. Identification par intersection + # à partir d'une couche fournie par le commanditaire + def artificialisation(df,pres_type:str='both'): + ''' + Récupération des résultats des pressions directes + d'artificialisation Rhoméo I12. Application de + la discrimination de Jenks pour catégoriser les résultats + en 3 classes [0, 0.5, 1]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_artif' = 0|0.5|1 + ''' + ind_col = ['site_code'] + if pres_type == 'direct': + ind_col += ['presdirect_artif'] + elif pres_type == 'indirect': + ind_col += ['presindir_artif'] + elif pres_type == 'both': + ind_col += ['presdirect_artif','presindir_artif'] + else: + raise('Argument `pres_type` invalid !') + + # from datetime import datetime as dt + print('INIT : Artificialisation ...') + + df = df.copy() + # data = gpd.pd.read_csv(path0 + c_rhomeo) + # data = open_gpkg(Path0+p_press+c_artif, bbox=df) + # data.set_index('site_code', inplace=True) + # data.presdirect_artif = data.presdirect_artif.round() + data = (get_rhomeo_indicateur(c_rhomeo,ind_col) + .set_index('site_code') + .round() + .sum(axis=1) + .to_frame('pres_artif')) + + tmp = jenks( + data=data[['pres_artif']], + col='pres_artif', + labels=[0, 0.5, 1]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'pres_artif':'ssind_artif'}, inplace=True) + + return df + + def urbanisation(df,pres_type:str='both'): + ''' + Récupération des résultats des pressions directes + d'urbanisation Rhoméo I12. Application de + la discrimination de Jenks pour catégoriser les résultats + en 4 classes [0, 0.5, 1, 1.5]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + pres_type : str. Type de pression calculer `direct`, `indirect` + ou `both`. Default : `both` + + Return + ---------- + 'ssind_urban' = 0|0.5|1|2 + ''' + ind_col = ['site_code'] + if pres_type == 'direct': + ind_col += ['presdirect_urba'] + elif pres_type == 'indirect': + ind_col += ['presindir_urba'] + elif pres_type == 'both': + ind_col += ['presdirect_urba','presindir_urba'] + else: + raise('Argument `pres_type` invalid !') + + print('INIT : Urbanisation indirecte ...') + + + df = df.copy() + # data = gpd.pd.read_csv(path0 + c_rhomeo) + data = (get_rhomeo_indicateur(c_rhomeo,ind_col) + .set_index('site_code') + .round() + .sum(axis=1) + .to_frame('pres_urba')) + + # data.presdirect_urba = data.presdirect_urba.round() + tmp = jenks( + data=data[['pres_urba']], + col='pres_urba', + labels=[0, 0.5, 1, 2]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'pres_urba':'ssind_urban'}, inplace=True) + # df['ssind_urbani'] = None + return df + + def pressAgricole(df,pres_type:str='both'): + ''' + Récupération des résultats des pressions directes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes + [0, 0.5, 1]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_agrico' = 0|0.5|1 + ''' + ind_col = ['site_code'] + if pres_type == 'direct': + ind_col += ['presdirect_agri'] + elif pres_type == 'indirect': + ind_col += ['presindir_agri'] + elif pres_type == 'both': + ind_col += ['presdirect_agri','presindir_agri'] + else: + raise('Argument `pres_type` invalid !') + + print('INIT : Pressions agricoles directes...') + df = df.copy() + # data = open_gpkg(Path0+p_press+c_agric, bbox=df) + # tmp = _calc_recouvrmt(df,data) + + # tmp['ssind_agri'] = 0 + # tmp.loc[tmp.perc_rcvmt > 5,'ssind_agri'] = 0.5 + # tmp.loc[tmp.perc_rcvmt > 10,'ssind_agri'] = 1 + # df = df.merge(tmp[['id_site','ssind_agri']], on=['id_site'],how='left') + # data = gpd.pd.read_csv(path0 + c_rhomeo) + # data.set_index('site_code', inplace=True) + # data.presdirect_agri = data.presdirect_agri.round() + + data = (get_rhomeo_indicateur(c_rhomeo,ind_col) + .set_index('site_code') + .round() + .sum(axis=1) + .to_frame('pres_agri')) + + tmp = jenks( + data=data[['pres_agri']], + col='pres_agri', + labels=[0, 0.5, 1]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'pres_agri':'ssind_agrico'}, inplace=True) + + return df + + + def projet_plu(df): + ''' + Intersections des zones relevant du projet d'Urbanisme (PLU) + avec les polygones de l'étude. Considération du champs + Typezone == 'U'. Attribution des points en cas d'intersections: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_plu' = 0|1 + ''' + Typezone = ['U','AU'] + df = df.copy() + if isinstance(c_urba_plu,str): + data = (open_gpkg(Path0+p_press+c_urba_plu,bbox=df) + .rename(columns={'typezone': 'Typezone'})) + else : + data = gpd.GeoDataFrame() + for c in c_urba_plu: + data = gpd.pd.concat([ + data, + (open_gpkg(Path0+p_press+c,bbox=df)[['geom']] + .rename(columns={'typezone': 'Typezone'})) + ],ignore_index=True) + data.columns = data.columns.str.lower() + d = data.typezone.str.contains('|'.join(Typezone)) + tmp = gpd.sjoin( + df, + data.loc[d,['geom']], + predicate='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_plu'] = 0 + df.loc[df.id_site.isin(lst),'ssind_plu'] = 1 + + return df + + + def projet_scot(df): + ''' + En cas d'absence de PLU, recherche d'espaces de + développements potentiels au alentours des sites + (SCOT). Attribution des points en cas d'intersections: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_scot' = 0|1 + ''' + df = df.copy() + # Ouverture des couches PLU + d = open_gpkg(Path0+p_press+c_urba_plu, bbox=df) + d.rename(columns={'typezone': 'Typezone'}, inplace=True) + dataPLU = d[['Typezone','geom']] + tmpPLU = gpd.sjoin(df,dataPLU[['geom']],predicate='intersects', how='left') + lstPLU = tmpPLU.loc[~tmpPLU.index_right.isna(),'id_site'].tolist() + + # if 'ssind_pluAU' not in df.columns: + # df = pression.projet_plu_AU(df) + # Si pas de PLU concerner par les sites, intersection des SCOT + dataSCOT = open_gpkg(Path0+p_press+c_urba_scot, bbox=df) + tmp = gpd.sjoin(df,dataSCOT[['geom']],predicate='intersects', how='left') + lstSCOT = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_scot'] = 0 + df.loc[(~df.id_site.isin(lstPLU))&(df.id_site.isin(lstSCOT)),'ssind_scot'] = 1 + + return df + + def alpage(df): + ''' + Intersections des zones d'alpage au RPG: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_alpage' = 0 + if ZH.intersects(RPG) : + 'ssind_alpage' = 2 + ''' + lst_code = ['BOP','SPH','SPL'] + return (intersects_rpg(df,lst_code) + .rename(columns={'rpg':'ssind_alpage'})) + + def conflit_redi(df): + ''' + Intersections des zones de conflits redi (Points, Lignes) + avec les polygones de l'étude. Utilistaion d'un buffer + de 100m: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_confli' = 0 + if conflit : + 'ssind_confli' = 0.5 + ''' + df = df.copy() + lign = open_gpkg(Path0+p_press+c_lign_confliredi, bbox=df) + poin = open_gpkg(Path0+p_press+c_poin_confliredi, bbox=df) + lign.columns = lign.columns.str.lower() + poin.columns = poin.columns.str.lower() + lign.geom = lign.geom.buffer(50) + poin.geom = poin.geom.buffer(50) + data = gpd.pd.concat([ + lign[['id','geom']], + poin[['id','geom']] + ]) + data = gpd.GeoDataFrame(data,geometry='geom',crs=lign.crs.srs) + geom = _union_polygons_geometry(data) + if not geom.is_valid: + geom = geom.buffer(0) + df['ssind_confli'] = df.intersects(geom).astype(int).replace(1,0.5) + return df + + + def prelev_eau(df): + ''' + Identification da la proximité des zones humides + avec des sources de captages. + Application d'un buffer de 50m. + Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_prlvmteau' : + ZH.geom = ZH.buffer(50) + if ZH.intersects(prelev_eau): + ssind = 1 + else : + ssind = 0 + ''' + print("INIT : Prélevement d'eau ...") + df_buf = df.copy() + df_buf.geom = df.buffer(50) + data = open_gpkg(Path0+p_press+c_captag,bbox=df) + if data.geometry.name != 'geom': + data.rename_geometry('geom', inplace=True) + data.reset_index(drop=True, inplace=True) + mp = MultiPoint(data.geom) + df_buf['ssind_prlvmteau'] = df_buf.intersects(mp).astype(int) + df = df.merge(df_buf[['id_site','ssind_prlvmteau']], on=['id_site']) + + return df + + + def icpe(df): + ''' + Identification da la proximité des zones humides + avec des installations classés. + Application d'un buffer de 500m. + Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_icpe' : + ZH.geom = ZH.buffer(500) + if ZH.intersects(install_classee): + ssind = 0.5 + else : + ssind = 0 + ''' + print('INIT : ICPE ...') + tmp = df.copy() + tmp.geom = tmp.buffer(500) + data = open_gpkg(Path0+p_press+c_iClass) + data = MultiPoint(data.geom) + tmp['ssind_icpe'] = tmp.intersects(data).astype(int).replace(1,0.5) + df = df.merge(tmp[['id_site','ssind_icpe']], on=['id_site'],how='left') + + return df + + def barrage(df): + ''' + Identification da la présence de Barrage, seuil (ROE) + au sein des zones humides. Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_ouvrag' : + if ZH.intersects(ouvrages): + ssind = 0.5 + else : + ssind = 0 + ''' + print('INIT : Ouvrage ...') + df = df.copy() + data = open_gpkg(Path0+p_press+c_barage, bbox=df) + data = data.unary_union + # data = MultiPoint(data.geom) + df['ssind_barage'] = df.intersects(data).astype(int).replace(1,0.5) + return df + + def ouvrage(df): + ''' + Identification da la présence d'ouvrages et de dépôts + au sein des zones humides. Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_ouvrag' : + if ZH.intersects(ouvrages): + ssind = 0.5 + else : + ssind = 0 + ''' + print('INIT : Ouvrage ...') + df = df.copy() + data = open_gpkg(Path0+p_press+c_ouvrag, bbox=df) + data = data.unary_union + # data = MultiPoint(data.geom) + df['ssind_ouvrag'] = df.intersects(data).astype(int).replace(1,0.5) + + return df + + + def vulnerabilite(df): + ''' + Identification da la proximité des zones humides + avec des espèces exotiques envahissantes. + Application d'un buffer de 100m. + Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_vulnerab' : + ZH.geom = ZH.buffer(100) + if ZH.intersects(esp_exo_envahi): + ssind = 1 + else : + ssind = 0 + ''' + print('INIT : Vulnérabilité ...') + df_buf = df.copy() + df_buf.geom = df_buf.buffer(100) + lst_term = ['Buddleia','Renouee','Solidage'] + data1 = open_gpkg(Path0+p_press+c_invas, bbox=df_buf) + data1 = data1[data1.Espece.isin(lst_term)] + data2 = open_gpkg(Path0+p_press+c_fallo, bbox=df_buf) + data3 = open_gpkg(Path0+p_press+c_cd38_eee, bbox=df_buf, layer='Renouée') + data4 = open_gpkg(Path0+p_press+c_cd38_eee, bbox=df_buf, layer='Ambroisie') + data = gpd.pd.concat([data1[['geom']],data2[['geom']],data3[['geom']],data4[['geom']]],ignore_index=True) + data.reset_index(inplace=True) + tmp = gpd.sjoin(df,data[['geom']],predicate='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df_buf['ssind_vulnerab'] = 0 + df_buf.loc[df_buf.id_site.isin(lst),'ssind_vulnerab'] = 1 + + # get_sicen2 get observation in bbox of df_buff who contains lst_term + lst_term = [ + 'Solidago gigantea','Reynoutria','Buddleja davidii', + 'Impatiens glandulifera','Ambrosia artemisiifolia'] + from shapely.geometry import box + from geopandas import read_postgis + from pycen.params import sicen_con as con + bbox = box(*df_buf.total_bounds) + geom_col = 'geom' + schema_sicen = 'saisie' + table_sicen = 'vm_synthese_observations' + sql = """SELECT id_obs, date_obs, regne, classe, ordre, nom_latin, nom_vern, geom FROM {sch}.{tab} + WHERE ST_Intersects ({geom_col}, 'SRID={epsg};{poly}') + AND nom_latin LIKE ANY (array{array});""".format( + sch=schema_sicen, + tab=table_sicen, + array = ['%%{}%%'.format(t) for t in lst_term], + geom_col=geom_col, + epsg=df_buf.crs.srs.split(':')[1], + poly=bbox + ) + sicen = read_postgis( + sql = sql, + con = con) + data3 = sicen.unary_union + df_buf.loc[df_buf.ssind_vulnerab == 0, 'ssind_vulnerab'] = df_buf[df_buf.ssind_vulnerab == 0].intersects(data3).astype(int) + + + df = df.merge(df_buf[['id_site','ssind_vulnerab']], on=['id_site'],how='left') + + return df + + + def press_urba(df): + ''' + Calcul de l'indice de pression directe: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pressURBAN' : + sum( + ssind_artif + ssind_urban + + ssind_agrico + ssind_plu + + ssind_confli + ssind_prlvmteau + + ssind_icpe + ssind_ouvrag + + ssind_vulnerab + ) + ''' + df = pression.artificialisation(df,pres_type='both') + df = pression.urbanisation(df,pres_type='both') + df = pression.projet_plu(df) + # df = pression.conflit_redi(df) + # df = pression.icpe(df) + # df = pression.prelev_eau(df) + # df = pression.ouvrage(df) + # df = pression.vulnerabilite(df) + + df['ind_pressURBAN'] = df[ + ['ssind_artif','ssind_urban', + 'ssind_plu']].sum(axis=1) + df.name = 'Pression_urbanisation' + + return df + + + def press_agri(df): + ''' + Calcul de l'indice de pression indirecte: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pressAGRI' : + sum( + ssind_agrico + ssind_alpage + ) + ''' + df = pression.pressAgricole(df,pres_type='both') + df = pression.alpage(df) + + # df = pression.dir_exp(df) + df['ind_pressAGRI'] = df[ + ['ssind_agrico','ssind_alpage' + '']].sum(axis=1) + df.name = 'Pression_agricole' + + return df + + + + def press_autre(df): + ''' + Calcul de l'indice d'autres pressions: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pressAGRI' : + sum( + ssind_ouvrag + ssind_barage + + ssind_confli + ssind_prlvmteau + + ssind_scot + ssind_icpe + ssind_direxp + ) + ''' + df = pression.conflit_redi(df) + df = pression.barrage(df) + df = pression.ouvrage(df) + df = pression.prelev_eau(df) + df = pression.icpe(df) + try: + df = pression.dir_exp(df) + except : + print('No dir_exp file') + + calc_ind = [] + if 'ssind_confli' in df.columns: calc_ind += ['ssind_confli'] + if 'ssind_barage' in df.columns: calc_ind += ['ssind_barage'] + if 'ssind_ouvrag' in df.columns: calc_ind += ['ssind_ouvrag'] + if 'ssind_prlvmteau' in df.columns: calc_ind += ['ssind_prlvmteau'] + if 'ssind_icpe' in df.columns: calc_ind += ['ssind_icpe'] + if 'ssind_direxp' in df.columns: calc_ind += ['ssind_direxp'] + # df = pression.dir_exp(df) + df['ind_pressAutr'] = df[ + calc_ind].sum(axis=1) + df.name = 'Pression_autre' + + return df + + def dir_exp(df): + ''' + Ajout d'un champ dir_exp dans le tableau de sortie + qui sera à remplir manuellement par celui-ci : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_direxp' : None + ''' + df = df.copy() + data = gpd.pd.read_csv(P_expert + c_expert,sep=';') + data = data[['site_code','Note_DE_pression']] + data.columns = ['site_code','ssind_direxp'] + data.ssind_direxp = data.ssind_direxp.astype(float) + df = df.merge(data,right_on='site_code',left_on='id_site') + del df['site_code'] + # df['ssind_direxp'] = None + return df + + + def bilan(df): + ''' + Rassemblement des sous-indices des criètes + « pression » dans un même tableau. L'indice pourra + être calculé lorque le champs dir_exp sera remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pression' : + sum( + ind_pressURBAN + ind_pressAGRI + ) + ''' + df = pression.press_urba(df) + df = pression.press_agri(df) + df = pression.press_autre(df) + # df['ssind_total'] = df[ + # ['ssind_artif','ssind_urban','ssind_plu' + # 'ssind_agrico','ssind_alpage', + # 'ssind_confli','ssind_barage','ssind_ouvrag', + # 'ssind_prlvmteau','ssind_icpe''ssind_direxp'] + # ].sum(axis=1) + df['ind_pression'] = df[ + ['ind_pressURBAN','ind_pressAGRI','ind_pressAutr'] + ].sum(axis=1) + df.name = 'Pression' + return df + +def priorisation(data,titre,fct,pss): + data = data.copy() + data[titre] = None + # data.loc[(data[fct]=='fort')&(data[pss]=='fort'),titre] = 'P1' + # data.loc[(data[fct]=='fort')&(data[pss]=='moyen'),titre] = 'P2' + # data.loc[(data[fct]=='moyen')&(data[pss]=='fort'),titre] = 'P2' + # data.loc[(data[fct]=='moyen')&(data[pss]=='moyen'),titre] = 'P2' + # data.loc[(data[fct]=='faible')&(data[pss]=='faible'),titre] = 'P4' + data.loc[(data[fct]=='faible')&(data[pss]=='fort'),titre] = 'P1' + data.loc[(data[fct]=='moyen')&(data[pss]=='fort'),titre] = 'P1' + data.loc[(data[fct]=='fort')&(data[pss]=='fort'),titre] = 'P3' + data.loc[(data[fct]=='faible')&(data[pss]=='moyen'),titre] = 'P2' + data.loc[(data[fct]=='moyen')&(data[pss]=='moyen'),titre] = 'P2' + data.loc[(data[fct]=='fort')&(data[pss]=='moyen'),titre] = 'P4' + data.loc[(data[fct]=='moyen')&(data[pss]=='faible'),titre] = 'P4' + data.loc[data[titre].isna(),titre] = 'P5' + return data[titre] + + +if __name__ == '__main__': + + from datetime import datetime as dt + from pandas import read_table + from pycen import zh + from os import listdir + zh = zh() + + # Récupération de la liste des zones concernées + init = dt.now() + zone = gpd.read_file(Path0+Path_tmp+"../Zone d'étude/Perimetre etude/PGSZH_bell_zonage.shp") + v_zh = zh.v_zoneshumides() + is_etude = v_zh.intersects(zone.unary_union) + lst_idsite = v_zh[is_etude].site_code.tolist() + # lst_idsite = zone.site_code.tolist() + sit = zh.get_sitesGeom(id_site=lst_idsite, last_update=True) + # sit = zh.get_sitesGeom(last_update=True) + + if not sit.is_valid.all(): + sit.loc[~sit.is_valid, 'geom'] = sit.loc[~sit.is_valid].buffer(0) + df = sit[['id_site', 'geom']].copy() + + # Définition des pressions et fonctions de l'étude + df_bio = fct_bio_eco.bilan(df) + print((dt.now() - init).total_seconds()) + df_hyd = fct_hyd.bilan(df) + print((dt.now() - init).total_seconds()) + df_phy = fct_phy_bio.bilan(df) + print((dt.now() - init).total_seconds()) + # df_mlt = crit_multi_fct.bilan(df) + # print((dt.now() - init).total_seconds()) + df_pre = pression.bilan(df) + print((dt.now() - init).total_seconds()) + lst_df = [df_bio,df_hyd,df_phy,df_pre] + bilan = sit[['id_site']].copy() + for d in lst_df: + ind_col = d.columns[d.columns.str.startswith('ind')] + bilan = bilan.merge(d[['id_site', *ind_col]], on=['id_site']) + + cols_ind = bilan.columns[bilan.columns.str.startswith('ind')] + ind_pres = df_pre.columns[df_pre.columns.str.startswith('ind')] + bilan['ind_fct'] = bilan[cols_ind.drop(ind_pres)].sum(axis=1) + bilan['indice'] = bilan[['ind_fct','ind_pression']].sum(axis=1) + bilan.name = 'Bilan' + print((dt.now() - init).total_seconds()) + + # jenks(data, col, labels) + # Normalisation des notes via la méthode de classification de jenks + enjeux = bilan[['id_site']].copy() + enjeux['clss_bioeco'] = jenks(bilan,'ind_bioeco',['faible','moyen','fort']) + enjeux['clss_hydro'] = jenks(bilan,'ind_hydro',['faible','moyen','fort']) + enjeux['clss_phybio'] = jenks(bilan,'ind_phybio',['faible','moyen','fort']) + # enjeux['clss_multifct'] = jenks(bilan,'ind_multifct',['faible','moyen','fort']) + # enjeux['clss_pressURBAN'] = jenks(bilan,'ind_pressURBAN',['faible','moyen','fort']) + # enjeux['clss_pressAGRI'] = jenks(bilan,'ind_pressAGRI',['faible','moyen','fort']) + # enjeux['clss_pressAutr'] = jenks(bilan,'ind_pressAutr',['faible','moyen','fort']) + enjeux['clss_fct'] = jenks(bilan,'ind_fct',['faible','moyen','fort']) + enjeux['clss_pression'] = jenks(bilan,'ind_pression',['faible','moyen','fort']) + + # Priorisation des enjeux + enjeux['enjeu_bioeco'] = priorisation(data=enjeux,titre='enjeu_bioeco',fct='clss_bioeco',pss='clss_pression') + enjeux['enjeu_hydro'] = priorisation(data=enjeux,titre='enjeu_hydro',fct='clss_hydro',pss='clss_pression') + enjeux['enjeu_phybio'] = priorisation(data=enjeux,titre='enjeu_phybio',fct='clss_phybio',pss='clss_pression') + enjeux['enjeu_bilan'] = priorisation(data=enjeux,titre='enjeu_bilan',fct='clss_fct',pss='clss_pression') + + enjeux.name = 'Enjeux' + + # Récupération des bornes de chaques classes + cols_enjeu = enjeux.columns + cols_enjeu = cols_enjeu[cols_enjeu.str.contains('enjeu')] + borne = gpd.pd.DataFrame( + columns=enjeux.columns.drop(['id_site',*cols_enjeu]), # ,'enjeu_bilan' + index=['faible','moyen','fort']) + for col in borne.columns: + sfx = col.split('_')[1] + tmp = gpd.pd.merge( + bilan[['id_site','ind_'+sfx]], + enjeux[['id_site','clss_'+sfx]], + on = 'id_site' + ) + for idx in borne.index: + borne.loc[borne.index==idx,col] = str([ + tmp[tmp[col]==idx]['ind_'+sfx].min(), + tmp[tmp[col]==idx]['ind_'+sfx].max() + ]) + borne.index.name = 'classe' + borne.reset_index(inplace=True, drop=False) + borne.name = 'Borne des classes enjeux' + + # Constitution d'un dictionnaire de tableau + dict_dfs = {} + for ddf in [borne, enjeux, bilan, *lst_df]: + dict_dfs[ddf.name] = ddf + + # Ecriture du multi-tableau des résultats + print((dt.now() - init).total_seconds()) + pycen.write_bilan(dict_dfs, PATH_OUT) + + # Jointure des géometries sur Bilan et Enjeux + bilan = bilan.merge(df,how='left',on='id_site') + bilan = bilan.set_geometry('geom') + bilan.rename(columns={'id_site':'site_code'}, inplace=True) + bilan.name = 'Bilan' + enjeux = enjeux.merge(df,how='left',on='id_site') + enjeux = enjeux.set_geometry('geom') + enjeux.rename(columns={'id_site':'site_code'}, inplace=True) + enjeux.name = 'Enjeux' + + # Ecriture du géopackage + df_bio.to_file(PATH_OUT[:-4]+'gpkg', layer=df_bio.name,driver='GPKG') + df_hyd.to_file(PATH_OUT[:-4]+'gpkg', layer=df_hyd.name,driver='GPKG') + df_phy.to_file(PATH_OUT[:-4]+'gpkg', layer=df_phy.name,driver='GPKG') + # df_mlt.to_file(PATH_OUT[:-4]+'gpkg', layer=df_mlt.name,driver='GPKG') + df_pre.to_file(PATH_OUT[:-4]+'gpkg', layer=df_pre.name,driver='GPKG') + enjeux.to_file(PATH_OUT[:-4]+'gpkg', layer=enjeux.name,driver='GPKG') + bilan.to_file( PATH_OUT[:-4]+'gpkg', layer=bilan.name, driver='GPKG') + + + from sys import exit + print('') + print((dt.now() - init).total_seconds()) + exit('END PGZH') \ No newline at end of file diff --git a/tmp_save/pgszh_SudGres.py b/tmp_save/pgszh_SudGres.py new file mode 100644 index 0000000..d66027e --- /dev/null +++ b/tmp_save/pgszh_SudGres.py @@ -0,0 +1,2374 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# from binascii import Incomplete +from lib2to3.pgen2 import driver +from warnings import filterwarnings +import geopandas as gpd +# from shapely import geometry +from shapely.geometry import MultiPolygon, MultiLineString, MultiPoint, Polygon, LineString #, collection +from shapely import ops +import pycen +filterwarnings("ignore",category=RuntimeWarning) +zh = pycen.zh() + + +path0 = '/home/colas/Documents/9_PROJETS/3_PGZH/' +PATH_OUT = path0 + 'RESULTATS/Résultats_etude_PGSZH_SudGresivaudan.xlsx' + +Path = path0 + 'SIG/' +Path_tmp = Path + 'tmp/' +p_bio_eco = 'biologie_ecologie/' +p_hydro = 'hydraulique_hydrologique/' +p_phybio = 'physique_biochimique/' +p_mltifct = 'multi_fonctions/IGN - BD Alti 25M/' +p_press = 'pressions/' +P_expert = path0 + 'DIRE_EXPERT/' + +# Couche des fonctions biologiques et écologiques +c_znieff = 'PGZSH_znieff1.gpkg' +c_zico = 'PGZSH_zico.gpkg' +c_redi = 'PGZSH_axefauneredi.gpkg' + +# Couche des fonctions hydrauliques et hydrologiques +c_reso_hydro = 'PGSZH_SG_cours eau_DREAL.gpkg' +c_alea_inond = 'alea_inondation/utsg_gpu.gpkg' +c_ebf_crseau = 'EBF_cours_eau.shp' +c_connex_molasse = 'ZH_CONNECT_ESO.shp' +c_idpr = 'BRGM_IDPR/IDPR_2154_CLIP.tif' +c_idpr2 = 'BRGM_IDPR/IDPR_2154_CLIP.gpkg' +c_piezo = 'Piezo_SAGE BDPV/carte_piézo_HE_2021.shp' +c_piezo_interp = 'Piezo_SAGE BDPV/piezo_interpoler.tif' + +# Couche des fonctions physiques et biochimiques +c_artif = 'AERMC-Grésivaudan/pressions_vecteur/PRESSIONS_ARTIFICIALISATION_2020.shp' +c_smvic_PPR1 = 'Captage/smvic_PPR1_SMVI.shp' +c_smvic_PPR2 = 'Captage/smvic_PPR2_SMVI.shp' +c_smvic_PPi = 'Captage/smvic_PPi_SMVI.shp' +c_smvic_PPe = 'Captage/smvic_PPe_SMVI.shp' +c_rpg = 'RPG/RPG_2017.shp' +c_zse = 'ZSE.shp' +c_zsea = 'ZSNEA.shp' +c_occupsol = 'PGSZH_oscom.gpkg' + +# Couche des critères « multi-fonctions » +c_alti = 'BDALTIV2_25M_FXX_' +c_mnt = 'BDALTIV2_25M.tif' + +# Couche des pressions +c_rhomeo = 'RESULTATS/RHOMEO/sig_indicateurs_2021_simby_202110081733.csv' +# c_artif = 'AERMC-Grésivaudan/pressions_vecteur/PRESSIONS_ARTIFICIALISATION_2020.shp' +c_captag = 'AERMC-Grésivaudan/Captage_tout_usage_Sud_Gresivaudan_2019_AERMC.csv' +c_agric = 'AERMC-Grésivaudan/pressions_vecteur/PRESSIONS_AGRICOLES_2019.shp' +c_urba_plu1 = 'SYMBHI/PLU/Zonage urba Cras/38137_ZONE_URBA_20210930.shp' +c_urba_plu2 = 'SYMBHI/PLU/Zonage urba NDO/38278_ZONE_URBA_20210930.shp' +c_urba_plu3 = 'SYMBHI/PLU/Zonage urba SMVIC/sirap_std_cnig_plu.v_geo_zone_urba.shp' +c_urba_scot = 'SYMBHI/SCOT/PGSZH_ref_scot_esp_pot_dev.shp' +c_iClass = 'InstallationsClassees_France.shp' +c_Purb = 'tache_urbaine.shp' +c_lign_confliredi = 'PGSZH_lignes_conflits_REDI.gpkg' +c_poin_confliredi = 'PGSZH_points_conflits_REDI.gpkg' +c_ouvrag = 'SYMBHI/ouvrage.TAB' +c_depot = 'SYMBHI/PlagesdeDepot.TAB' +c_invas = 'SYMBHI/TerrainInvasives2020.tab' +c_fallo = 'SYMBHI/fallopia.TAB' +c_cd38_eee = 'CD38/PGSZH_EEE_CDIsere.gpkg' +# c_vulner = 'SAGE/VULNERABILITE.shp' + +# Couche des dire d'expert +c_expert = 'PGSZH_dire_expert_compilation_17032022.csv' + + +class check_dtypeInList(list): + def __contains__(self, typ): + return any(isinstance(val, typ) for val in self) + +# get_flux +def open_gpkg(Path, layer=None,bbox=None): + ''' + Ouverture des couches Shapefile et Geopackages et + mise au formt du script: + + Parameters + ---------- + Path : str. Chemain/fichier. + layer : str. Si Geopackage, nom de la couche dans le + cas où il y en a plusieurs. + ''' + df = gpd.read_file(Path,layer=layer, bbox=bbox) + if df.geometry.name != 'geom': + df.rename_geometry('geom',inplace=True) + if 'Lambert' in df.crs.name \ + and '93' in df.crs.name \ + and df.crs.srs.upper() != 'EPSG:2154': + print('Projection : %s - %s'%(df.crs.name,df.crs.srs) ) + print('Modification de la projection ...') + df.to_crs(epsg=2154,inplace=True) + if df.crs.srs.upper() != 'EPSG:2154': + print('Projection : %s - %s'%(df.crs.name,df.crs.srs) ) + print('Modification de la projection ...') + df.to_crs(epsg=2154,inplace=True) + + return df + + +def to_geoms(geometries): + for geometry in geometries: + if isinstance(geometry, (Polygon,LineString)): + yield geometry + else: + yield from geometry + + +def _union_polygons_geometry(df): + ''' + Transforme un GeoDataFrame de Polygons + et/ou MultiPolygons en un MultiPolygon unique: + + Parameters + ---------- + df : GeoDataFrame. + ''' + df = df.copy() + name_geom = df.geometry.name + + # poly = df[df.geom_type=='Polygon'][name_geom] + poly = df.loc[df.geom_type=='Polygon',name_geom] + multipoly = df.loc[df.geom_type=='MultiPolygon',name_geom] + poly = [*poly] + multipoly = [*multipoly] + + if poly: + mp2 = MultiPolygon(poly) + if poly and multipoly: + res = MultiPolygon(to_geoms([*mp2, *multipoly])) + elif not poly and multipoly: + res = MultiPolygon(to_geoms(multipoly)) + elif not multipoly and poly: + res = MultiPolygon(poly) + + return res + + +def _union_lines_geometry(df): + name_geom = df.geometry.name + + line = df.loc[df.geom_type=='LineString',name_geom].tolist() + multiline = df.loc[df.geom_type=='MultiLineString',name_geom].tolist() + + if line: + mp2 = MultiLineString(line) + if line and multiline: + res = MultiLineString(to_geoms([*mp2, *multiline])) + elif not line and multiline: + res = MultiLineString(to_geoms([*multiline])) + elif not multiline and line: + res = MultiLineString(line) + + return res + + +def _calc_recouvrmt(df1,df2): + ''' + Calcule le recouvrement de df2 sur df1 + pour chaque géométrie de df1: + + Parameters + ---------- + df1 : GeoDataFrame. + df2 : GeoDataFrame. + ''' + tmp = gpd.sjoin( + df1, + df2[['geom']], + op = 'intersects', + how = 'left') + tmp.dropna(subset=['index_right'],inplace=True) + tmp.index_right = tmp.index_right.astype(int) + tmp.reset_index(inplace=True) + tmp = tmp.join( + df2[['geom']].rename(columns={'geom': 'right_geom'}), + on=['index_right'], how='left') + tmp2 = tmp[['index_right','right_geom']].copy() \ + .rename(columns={'right_geom': 'geom'}) \ + .set_geometry('geom') + tmp1 = tmp[['id_site','geom']].copy() \ + .set_geometry('geom') + + if not tmp1.geom.values.is_valid.all(): + tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0) + if not tmp2.geom.values.is_valid.all(): + tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0) + + tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100 + tmp = tmp.groupby(['id_site']).sum().reset_index() + df1 = df1.merge(tmp[['id_site','perc_rcvmt']], on=['id_site'], how='left') + df1.perc_rcvmt.fillna(0, inplace=True) + df1.perc_rcvmt = df1.perc_rcvmt.round(2) + + return df1 + + +def jenks(data,col,labels): + import jenkspy + data = data.copy() + c = col + tmp = data[c].unique() + tmp = gpd.pd.DataFrame({'val':tmp}) + ddf = gpd.pd.DataFrame() + # Si str in labels + labs_copy = None + if str in check_dtypeInList(labels): + labs_copy = labels.copy() + labels = range(len(labels)) + labels = list(labels) + + tmp['jenks'] = gpd.pd.cut(tmp['val'], + bins=jenkspy.jenks_breaks(tmp['val'], nb_class=len(labels)), + labels=labels, + include_lowest=True) + ddf[c] = data[c].copy() + ddf[c] = ddf[c].replace([*tmp.val],[*tmp.jenks]) + ddf[c+'1'] = data[c] + # ddf[c+'1'] = ddf[c+'1'].replace([*tmp.val],[*tmp.jenks]) + # ddf[c] = ddf[c].astype(float) + + if labs_copy: + ddf[c] = ddf[c].replace([*labels],[*labs_copy]) + # ddf[c+'1'] = ddf[c+'1'].replace([*labels],[*labs_copy]) + + + return ddf[c] + + + + +class fct_bio_eco: + ''' + Calcule l'indice et les sous-indices des fonctions biologiques + et écologiques des zones humides: + + Liste des fonctions + ---------- + znieff_1 : Calcul la présence/absence + des zones humides sur des ZNIEFF1 par intersection. + redi : Calcul la capacité de déplacement de la faune + du réseau écologique départemental de l'Isère (REDI) + par intersection. Utilisation d'une couche SIG REDI + (PolyLignes) avec un buffer de 100m. + fct_bio : Dans le cas où un zone humide n'intersecte pas + une ZNIEFF1, attribution d'un poids à chaque zone humide + pour ses fonctions Biologiques et Ecologiques. + Calcul dépendant du nombre de fonctions recensées en BDD. + fct_hab : Attribution d'un poids à chaque zone humide + en fonction des types d'habitat présents sur site. + fct_esppatri : Attribution d'un poids à chaque zone humide + en fonction des espèces protégées présentes sur site. + bilan : Somme des sous-indices des fonctions biologiques + et écologiques des zones humides. + ''' + def znieff_1(df): + ''' + Calcul la présence/absence des zones + humides sur des ZNIEFF 1 par intersection: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_znief' : Présence = 2 / Absence = 0 + ''' + print('INIT : Localisation de la zone en territoire ZNIEFF 1 ...') + df = df.copy() + data = open_gpkg(Path+p_bio_eco+c_znieff,bbox=df) + geom = _union_polygons_geometry(data) + df['ssind_znief'] = df.geom.intersects(geom) \ + .astype(int) \ + .replace(1,2) + + return df + + + def redi(df): + ''' + Calcul la capacité de déplacement de la faune + dans le réseau écologique départemental de l'Isère (REDI) + par intersection. Utilisation d'une couche SIG REDI + (PolyLignes) avec un buffer de 100m: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_redi' : Présence = 2 / Absence = 0 + ''' + print('INIT : Axe de déplacement de la faune du REDI ...') + df = df.copy() + data = open_gpkg(Path+p_bio_eco+c_redi,bbox=df) + data.geometry = data.geometry.map(ops.linemerge) + geom = _union_lines_geometry(data).buffer(100) + df['ssind_redi'] = df.geom.intersects(geom) \ + .astype(int) \ + .replace(1,2) + + return df + + + def fct_bio(df): + ''' + Dans le cas où un zone humide n'intersecte pas + une ZNIEFF 1 (ssind_znief = 0), attribution d'un poids + à chaque zone humide pour ses fonctions Biologiques et + Ecologiques. Calcul dépendant du nombre de fonctions + recensées en BDD: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_znief' : Présence = 2 / Absence = 0 + 'ssind_fctbio' : si (ssind_znief = 2) = 0 / + 1 fonction = 1 / multiple fonctions = 2 / + ''' + print('INIT : biologiques et écologiques ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'fct_bio') & (data.nom_fct != 'non documenté')] + df['ssind_fctbio'] = df.apply( + lambda x: data[data.id_site == x['id_site']].shape[0], + axis=1) + if 'ssind_znief' not in df.columns: + df = fct_bio_eco.znieff_1(df) + df.loc[df.ssind_znief==2, 'ssind_fctbio'] = 0 + # df.loc[(df.ssind_znief==0) & (df.ssind_fctbio==1), 'ssind_fctbio'] = 1 + df.loc[(df.ssind_znief==0) & (df.ssind_fctbio > 1), 'ssind_fctbio'] = 2 + + return df + + + def fct_hab(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction des types d'habitat et des espèces protégées + présents sur site: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_hab' : + Habitat 'prioritaire|très rare' = 2 + ''' + print('INIT : Habitats (prioritaire|très rare) ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'int_patri') & (data.nom_fct != 'non documenté')] + + # 2 pt si habitat prioritaire + lst_termep = 'prioritaire|communautaire|DH|très rare' + lst_termeC = 'communautaire' + lst_termeP = 'prioritaire|très rare' + datap = data[data.nom_fct == 'habitats'] + lst_siteh = datap.loc[datap.description.str.contains(lst_termep), 'id_site'] + lst_sitehC = datap.loc[datap.description.str.contains(lst_termeC), 'id_site'] + lst_sitehP = datap.loc[datap.description.str.contains(lst_termeP), 'id_site'] + df['ssind_hab'] = 0 + df.loc[df.id_site.isin(lst_sitehP),'ssind_hab'] = 2 + + return df + + def fct_esppatri(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction des espèces protégées présentes + sur site: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_esppatri' : + Faune/Flore nb(1 à 2) 'DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE' = 0.5 + Faune/Flore nb(< 3) 'DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE' = 1 + ''' + print('INIT : Faune - Flore (PN – PR – P38) ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'int_patri') & (data.nom_fct != 'non documenté')] + + # 1 pt si liste terme + lst_terme = 'DH|PR|DO|LR|PN|LRR|LRN|LRM|LRF|LRD|LRE' + datat = data[data.nom_fct != 'habitats'].copy() + datat.quantite = datat.quantite.astype(float) + lst_sitet = datat.loc[datat.description.str.contains(lst_terme), 'id_site'] + lst_site1 = datat.loc[(datat.id_site.isin(lst_sitet))&(datat.quantite < 3), 'id_site'] + lst_site2 = datat.loc[(datat.id_site.isin(lst_sitet))&(datat.quantite >= 3),'id_site'] + lst_site3 = datat.loc[ + (datat.id_site.isin(lst_sitet)) & + (datat.quantite == 0) & + (datat.description.str.contains('nombreuses|plusieurs')),'id_site'] + + df['ssind_esppatri'] = 0 + df.loc[df.id_site.isin(lst_site1),'ssind_esppatri'] = 0.5 + df.loc[df.id_site.isin(lst_site2),'ssind_esppatri'] = 1 + df.loc[df.id_site.isin(lst_site3),'ssind_esppatri'] = 1 + + return df + + + def bilan(df): + ''' + Somme des sous-indices des fonctions biologiques + et écologiques des zones humides: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_bioeco' : + sum( + ssind_znief + ssind_redi + + ssind_fctbio + ssind_hab + ssind_esppatri + ) + ''' + df = fct_bio_eco.znieff_1(df) + df = fct_bio_eco.redi(df) + df = fct_bio_eco.fct_bio(df) + df = fct_bio_eco.fct_hab(df) + df = fct_bio_eco.fct_esppatri(df) + ssind = df.columns[df.columns.str.contains('ssind_')] + df['ind_bioeco'] = df[ + ['ssind_znief','ssind_redi','ssind_fctbio','ssind_hab','ssind_esppatri'] + # ssind + ].sum(axis=1) + print(df['ind_bioeco'].dtype) + df.name = 'Fct_bio_eco' + + return df + + + +class fct_hyd: + ''' + Calcule l'indice et les sous-indices des fonctions hydrauliques + et hydrologiques des zones humides: + + Liste des fonctions + ---------- + zone_inond : Calcul la présence/absence des zones humides + sur des zones inondables par intersection. + eabf : Calcul de l'espace alluvial de bon fonctionnement (EABF) + ou de fond de vallée par intersection. + dist_reso_hydro : Si la zone humide ne possède pas d'espace + alluviale de bon fonctionnement d'après la fonction "eabf()", + calcul la distance au réseau hydrographique linéaire + (le plus proche). Attribution d'un poids en fonction de + la distance. Si la zone ne possède pas d'eabf et ne semble + pas à proximité d'un réseau hydrique, recherche de la + présence d'un cours d'eau dans la base de données zones humes. + reghydro_out: Pour chaque zone humide, en cas de distance + au réseau hydrographique linéaire > 50 et d'absence + d'espace alluviale de bon fonctionnement, recherche + dans la base de données des zones humides si une sortie + d'eau "Cours d'eau"est définie. Attribution d'un poids + en fonction. + connex_molasse : Attribution d'un poids à chaque zone humide + en fonction de sa connexion avérée à la molasse ou non. + idpr : Calcul de l'Indice de Développement et + de Persistance des Réseaux. Calcul réalisé dans le cas où + connex_molasse = 0 . + fct_hydro : Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + hydrauliques et hydrologiques qu'elle remplie. + zse_zsnea : Attribution d'un poids à chaque zone humide + en fonction de sont appartenance à une zone de sauvegarde + exploitée actuelle (zse) ou future (zsnea). + bilan : Somme des sous-indices des fonctions hydrauliques + et hydrologiques des zones humides. + ''' + def zone_inond(df): + ''' + Calcul la présence/absence des zones humides + sur des zones inondables par intersection: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_zoneinond' : None + ''' + print('INIT : Zone inondable ...') + df = df.copy() + data = open_gpkg( + Path+p_hydro+c_alea_inond,bbox=df, + layer='prescription_surf') + data = data[data['PGSZH_alea_inondation']==1] + tmp = gpd.sjoin(df,data[['geom']],op='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_zoneinond'] = None + df.loc[df.id_site.isin(lst),'ssind_zoneinond'] = 1 + return df + + + def eabf(df): + ''' + Si la zone humide n'est pas en zone inondable d'après + la fonction "zone_inond()", calcul de l'espace alluvial de bon + fonctionnement (EABF) ou de fond de vallée par intersection: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_eabf' : None + ''' + print('INIT : Espace alluvial de bon fonctionnement (EABF) ou fond de vallée ...') + df = df.copy() + data = open_gpkg(Path+p_hydro+c_ebf_crseau,bbox=df) + tmp = gpd.sjoin(df,data[['geom']],op='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_eabf'] = 0 + df.loc[df.id_site.isin(lst),'ssind_eabf'] = 1 + return df + + + def dist_reso_hydro(df): + ''' + Si la zone humide ne possède pas d'espace alluviale + de bon fonctionnement d'après la fonction "eabf()", + calcul de la distance au réseau hydrographique + linéaire (le plus proche).Attribution d'un poids + en fonction de la distance. + Si la zone ne possède pas d'eabf et ne semble pas + à proximité d'un réseau hydrique, recherche de la + présence d'un cours d'eau dans la base de données + zones humes: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_distHydro' : + Si > 50m = 0 + Si ]10 m – 50 m] = 0.5 + Si <= 10m = 1 + ''' + print('INIT : Distance au réseau hydrographique linéaire (le plus proche) ...') + df = df.copy() + # data = open_gpkg(Path+p_hydro+c_reso_hydro,bbox=df) + data = pycen.ref_hydro().get_troncon() + if 'ssind_eabf' not in df.columns: + df = fct_hyd.eabf(df) + # if 'MultiLineString' in data.geom_type: + # data.loc[data.geom_type=='MultiLineString'] = data.loc[data.geom_type=='MultiLineString'].geometry.map(ops.linemerge) + # if True in data.has_z.unique(): + # import shapely.wkb + # data.loc[data.has_z,'geom'] + # data.geom = [shapely.wkb.loads(shapely.wkb.dumps(g, output_dimension=2)) for g in data.geom] + df10 = df[['id_site','geom']].copy() + df50 = df[['id_site','geom']].copy() + df10.geom = df10.buffer(10) + df50.geom = df50.buffer(50) + df10 = gpd.sjoin(df10,data[['geom']],op='intersects', how='left') + df50 = gpd.sjoin(df50,data[['geom']],op='intersects', how='left') + lst10 = df10.loc[~df10.index_right.isna(),'id_site'].tolist() + lst50 = df50.loc[~df50.index_right.isna(),'id_site'].tolist() + df['ssind_distHydro'] = 0 + df.loc[(df.ssind_eabf==0)&(df.id_site.isin(lst50)),'ssind_distHydro'] = 0.5 + df.loc[(df.ssind_eabf==0)&(df.id_site.isin(lst10)),'ssind_distHydro'] = 1 + + # df.loc[df.dist_min <= 10, 'ssind_distHydro'] = 2 + # Si 0, check entree/sortie regime hydro. + # Si cours d'eau ou eaux de crues ==> 1 + # union = data.geometry.unary_union + # df['buff10'] = df.buffer(10).intersects(union).astype(int) + # df['buff50'] = df.buffer(50).intersects(union).astype(int) + # df['ssind_distHydro'] = None + # df.loc[df.buff50 == 0, 'ssind_distHydro'] = 0 + # df.loc[df.buff50 == 1, 'ssind_distHydro'] = 1 + # df.loc[df.buff10 == 1, 'ssind_distHydro'] = 2 + # df.drop(columns=['buff10', 'buff50'], inplace=True) + + return df + + + def reghydro_out(df): + ''' + Pour chaque zone humide, en cas de distance + au réseau hydrographique linéaire > 50 et d'absence + d'espace alluviale de bon fonctionnement, recherche + dans la base de données des zones humides si une sortie + d'eau "Cours d'eau"est définie. Attribution d'un poids + en fonction: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_hydrout' : + Si ssind_distHydro = 0 + & ssind_eabf = 0 + & regime_hydri_out = "Cours d'eau" : + 'ssind_distHydro' = 1 + ''' + df = df.copy() + if 'ssind_distHydro' not in df.columns: + df = fct_hyd.dist_reso_hydro(df) + + df['ssind_hydrout'] = 0 + if not df.loc[df.ssind_distHydro == 0].empty : + lst_zh = df.loc[df.ssind_distHydro == 0].id_site.tolist() + tmp = zh.get_regHydro(id_site=lst_zh) + # tmp = tmp.loc[tmp.regime_hydri.isin(["Cours d'eau", "Eaux de crues"])] + tmp = tmp.loc[(tmp.in_out=='sortie')&(tmp.regime_hydri=="Cours d'eau")] + # in_out ??????????? + # permanance ??????? + lsttmp = tmp.id_site + df.loc[(df.ssind_eabf==0)&(df.id_site.isin(lsttmp)),'ssind_hydrout'] = 1 + return df + + + def connex_molasse(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction de sa connexion avérée + à la molasse ou non : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_molasse' : + (intersects == False) = 0 + (intersects == True) = 1 + ''' + print('INIT : Connexion à la molasse ...') + df = df.copy() + data = open_gpkg(Path+p_hydro+c_connex_molasse,bbox=df) + tmp = gpd.sjoin(df,data[['geom']],op='intersects', how='left') + tmp = tmp[~tmp.index_right.isna()] + df['ssind_molasse'] = 0 + df.loc[df.id_site.isin(tmp.id_site),'ssind_molasse'] = 1 + return df + + def piezo(df): + import rasterio as rio + from os import system + print('INIT : Cross piézométrie ...') + df = df.copy() + # Polygoniser (raster vers vecteur) + # """gdal_polygonize.py + # "/home/colas/Documents/9_PROJETS/3_PGZH/SIG/multi_fonctions/IGN - BD Alti 25M/BDALTIV2_25M.tif" + # -b 1 -f "GPKG" + # /tmp/processing_sSlfcG/96533f9ad23e4c10992caa807da01bf1/OUTPUT.gpkg + # OUTPUT alti + # """ + + mnt_in = '' + piezo_in = 'my_interpolate_piezo.tif' + piezo_out = 'piezo.tif' + gpkg_out = 'out.gpkg' + mnt_out = 'mnt.tif' + piezoVSmnt = 'piezoVSmnt_out.tif' + poly_connect_nape = 'poly_connect_nape.gpkg' + # Découper un raster selon une couche de masque + # Découpage du MNT par les polygones d'étude + op = ''' + gdalwarp -overwrite -s_srs EPSG:2154 -t_srs EPSG:2154 -co FORMAT=GPKG -of GTiff -tr 25.0 -25.0 -tap -cutline \ + "PG:dbname='azalee' host=91.134.194.221 port=5432 sslmode=disable user='cgeier' password='adm1n*bdCen'" \ + -csql "SELECT site_code, geom FROM zones_humides.v_zoneshumides WHERE site_code in ('{liste_site}')" \ + "{mnt}" {out} + '''.format(vector = Path_tmp+gpkg_out, mnt=Path+p_mltifct+c_mnt, out=Path_tmp+mnt_out, liste_site="','".join(df.id_site.tolist())) + system(op) + mnt = rio.open(Path_tmp+mnt_out) + xmin, ymin, xmax, ymax = mnt.bounds + + # Découpage du PIEZO rasterizé interpolé par les polygones d'étude + op = ''' + gdalwarp -overwrite -s_srs EPSG:2154 -t_srs EPSG:2154 -co FORMAT=GPKG -of GTiff -tr 25.0 -25.0 \ + -te {xmin} {ymin} {xmax} {ymax} \ + -tap -cutline "PG:dbname='azalee' host=91.134.194.221 port=5432 sslmode=disable user='cgeier' password='adm1n*bdCen'" \ + -csql "SELECT site_code, geom FROM zones_humides.v_zoneshumides WHERE site_code in ('{liste_site}')" \ + "{mnt}" {out} + '''.format( + xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, + mnt=Path_tmp+piezo_in, + out=Path_tmp+piezo_out, + liste_site="','".join(df.id_site.tolist()) + ) + system(op) + + # Soustraction de la valeur des mailles + op = ''' + gdal_calc.py --overwrite -A {mnt} -B {piezo} --outfile={out} --calc="(A-B)<=0" + '''.format(mnt=Path_tmp+mnt_out, piezo=Path_tmp+piezo_out, out=Path_tmp+piezoVSmnt) + system(op) + # polygonisation du raster + op = ''' + gdal_polygonize.py {mnt} -b 1 -f "GPKG" {out} + '''.format(mnt=Path_tmp+piezoVSmnt, out=Path_tmp+poly_connect_nape) + system(op) + + # data = rio.open(Path_tmp+piezoVSmnt) + data = open_gpkg(Path_tmp+poly_connect_nape) + data.rename(columns={'DN':'connect_nappe'}, inplace=True) + data = data[data.connect_nappe > 0].copy() + # IDEM : + # gpd.sjoin(df,data).sort_values('id_site').id_site.unique() == \ + # df[df.intersects(data.unary_union)].sort_values('id_site').id_site.tolist() + + tmp = gpd.sjoin(df,data,how='left') + del tmp['index_right'] + tmp.drop_duplicates(inplace=True) + df = tmp.copy() + + df.to_file(Path_tmp+'zh_connect_nappe.gpkg',driver='GPKG') + + # Import des courbe des niveau rasteriser par interpolation + # by QGIS : Outils de traitements > Interpolation > Interpolation TIN + # piezo = rio.open(Path+p_hydro+c_piezo_interp) + + + # from geocube.api.core import make_geocube + # piézo = open_gpkg(Path+p_hydro+c_piezo) + # piézo = piézo[~piézo.geom.isna()] + # piézo.rename_geometry('geometry', inplace=True) + # out_grid = make_geocube( + # vector_data=piézo, + # measurements=["id"], + # resolution=(-25, 25) + # ) + # out_grid["id"].rio.to_raster(Path_tmp+"my_rasterized_column.tif") + # import xarray + # xds = xarray.open_dataarray(Path_tmp+"my_rasterized_column.tif") + # filled = xds.rio.interpolate_na(method='linear') + # filled.rio.to_raster(Path_tmp+"my_interpolate_raster.tif") + + + return df + + def idpr(df): + ''' + Calcul réalisé dans le cas où connex_molasse = 0. + Calcul de l'Indice de Développement et + de Persistance des Réseaux : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_idpr' : + if [ %_recouvrement(idpr < 1000) > 25% ] : + ssind_idpr = 1 + else : + ssind_idpr = 0 + ''' + print('INIT : IDPR ...') + df = df.copy() + if 'ssind_molasse' not in df.columns: + df = fct_hyd.connex_molasse(df) + + import rasterio + from rasterio.features import shapes + mask = None + with rasterio.Env(): + with rasterio.open(Path+p_hydro+c_idpr2) as src: + image = src.read(1) # first band + image[(image < 1000) & (image > -1)] = 1 + image[image >= 1000] = 0 + data = gpd.GeoDataFrame.from_features( + features = [{'properties': {'raster_val': v}, 'geometry': s} + for i, (s, v) + in enumerate( + shapes(image, mask=mask, transform=src.transform)) if v >= 0], + crs = 'EPSG:2154') + data.rename_geometry('geom', inplace=True) + + lst_data = [] + if not df[df.ssind_molasse == 0].empty: + perc = _calc_recouvrmt( + df[df.ssind_molasse == 0], + data[data.raster_val == 1] ) + lst_data = perc.loc[perc.perc_rcvmt.round(2) > 25,'id_site'] + + df['ssind_idpr'] = 0 + df.loc[df.id_site.isin(lst_data), 'ssind_idpr'] = 1 + + return df + + + def fct_hydro(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + hydrauliques et hydrologiques qu'elle remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_fcthydro' : + 0 fonction = 0 + 1 fonction = 0.5 + + fonctions = 1 + ''' + print('INIT : Fonctions hydro-biologiques à caractères hydrauliques et hydrologiques ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'fct_hydro') & (data.nom_fct != 'non documenté')] + + lst_terme = ["soutien naturel d'étiage",'ralentissement du ruissellement','expansion naturelle des crues'] + d = data.loc[data.nom_fct.isin(lst_terme),['id_site','nom_fct']] + d['nb_fct'] = 1 + d = d.groupby(['id_site']).sum().reset_index() + + df['ssind_fcthydro'] = 0 + lst_data1 = d.loc[d.nb_fct == 1, 'id_site'] + lst_dataSup = d.loc[d.nb_fct > 1,'id_site'] + df.loc[df.id_site.isin(lst_data1), 'ssind_fcthydro'] = 0.5 + df.loc[df.id_site.isin(lst_dataSup), 'ssind_fcthydro'] = 1 + + return df + + + def zse_zsnea(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction de sont appartenance à une zone de sauvegarde + exploitée actuelle (zse) ou future (zsnea) : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_fcthydro' : + if ZH.intersects(ZSNEA|ZSE): + 'ssind_fcthydro' = 1 + else : + 'ssind_fcthydro' = 0 + ''' + print('INIT : Zones de sauvegardes actuelles et futures (ZSE / ZSNEA) ...') + df = df.copy() + data1 = open_gpkg(Path+p_phybio+c_zse,bbox=df) + data2 = open_gpkg(Path+p_phybio+c_zsea,bbox=df) + data1 = _union_polygons_geometry(data1) + data2 = _union_polygons_geometry(data2) + if not data1.is_valid: + data1 = data1.buffer(0) + if not data2.is_valid: + data2 = data2.buffer(0) + df['zse'] = df.intersects(data1).astype(int) #.replace(1,2) + df['zsnea'] = df.intersects(data2).astype(int) + df['ssind_zse_zsnea'] = df[['zse', 'zsnea']].max(axis=1) + df.drop(columns=['zse','zsnea'], inplace=True) + return df + + + def bilan(df): + ''' + Somme des sous-indices des fonctions hydrauliques + et hydrologiques des zones humides : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_hydro' : + sum( + ssind_zoneinond + ssind_eabf + ssind_distHydro + + ssind_molasse + ssind_idpr + ssind_fcthydro + + ssind_zse_zsnea + ) + ''' + df = fct_hyd.zone_inond(df) + df = fct_hyd.eabf(df) + df = fct_hyd.dist_reso_hydro(df) + df = fct_hyd.reghydro_out(df) + # df = fct_hyd.connex_molasse(df) + # df = fct_hyd.idpr(df) + df = fct_hyd.fct_hydro(df) + df = fct_hyd.zse_zsnea(df) + df['ind_hydro'] = df[ + ['ssind_zoneinond','ssind_eabf','ssind_distHydro', + 'ssind_hydrout',#'ssind_molasse','ssind_idpr', + 'ssind_fcthydro','ssind_zse_zsnea' + ] + ].sum(axis=1) + df.name = 'Fct_hyd' + return df + + + +class fct_phy_bio: + ''' + Calcule l'indice et les sous-indices des fonctions physiques + et biochimiques des zones humides : + + Liste des fonctions + ---------- + perim_captage : Identification de la présence/absence + de zones de captages à proximité des zones humides + par intersection. + fct_hydrobio : Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + physiques et biochimiques qu'elle remplie. + occup_sol : Pour chaque zone humide, identification de la nature + d'occupation du sol et de sa surface de recouvrement. Déduction + de la surface d'espace naturel concernée par les zonnages. + Attribution d'un poids en fonction de la surface de recouverte. + bilan : Somme des sous-indices des fonctions physiques + et biochimiques des zones humides. + ''' + + def perim_captage(df): + ''' + Identification de la présence/absence + de zones de captages à proximité des zones humides + par intersection. Ne considère pas les captages 'ABA|HS' : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_perimcaptage' : + if ZH.intersects(captage): + 'ssind_perimcaptage' = 2 + else : + 'ssind_perimcaptage' = 0 + ''' + print('INIT : Périmètre de protection des captages AEP ...') + from pandas import concat + df = df.copy() + data1 = open_gpkg(Path+p_phybio+c_smvic_PPR1,bbox=df) + data2 = open_gpkg(Path+p_phybio+c_smvic_PPR2,bbox=df) + data3 = open_gpkg(Path+p_phybio+c_smvic_PPi,bbox=df) + data4 = open_gpkg(Path+p_phybio+c_smvic_PPe,bbox=df) + data = concat([data1,data2,data3,data4]) + tmp = gpd.sjoin( + df, + data.loc[ + ~data.N_INS___NO.str.contains('ABA|HS'), + ['geom']], + op = 'intersects', + how = 'left') + lst_site = tmp[~tmp.index_right.isna()].id_site + df['ssind_perimcaptage'] = 0 + df.loc[df.id_site.isin(lst_site),'ssind_perimcaptage'] = 2 + return df + + + # def zse_zsnea(df): + # print('INIT : Zones de sauvegardes actuelles et futures (ZSE / ZSNEA) ...') + # df = df.copy() + # data1 = open_gpkg(Path+p_phybio+c_zse,bbox=df) + # data2 = open_gpkg(Path+p_phybio+c_zsea,bbox=df) + # data1 = _union_polygons_geometry(data1) + # data2 = _union_polygons_geometry(data2) + # if not data1.is_valid: + # data1 = data1.buffer(0) + # if not data2.is_valid: + # data2 = data2.buffer(0) + # df['zse'] = df.intersects(data1).astype(int).replace(1,2) + # df['zsnea'] = df.intersects(data2).astype(int) + # df['ssind_zse_zsnea'] = df[['zse', 'zsnea']].max(axis=1) + # df.drop(columns=['zse','zsnea'], inplace=True) + # return df + + + def fct_hydrobio(df): + ''' + Attribution d'un poids à chaque zone humide + en fonction du nombre de rôles hydro-biologiques à caractères + physiques et biochimiques qu'elle remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_fcthydrobio' : + 0 fonction = 0 + 1 fonction = 1 + + fonctions = 2 + ''' + print('INIT : Fonctions hydro-biologiques à caractères physiques et biochimiques ...') + df = df.copy() + data = zh.get_fct(id_site=df.id_site.tolist()) + data = data[(data.type == 'fct_hydro') & (data.nom_fct != 'non documenté')] + + lst_terme = ["fonctions d'épuration","rôle naturel de protection contre l'érosion"] + d = data.loc[data.nom_fct.isin(lst_terme),['id_site','nom_fct']] + d['nb_fct'] = 1 + d = d.groupby(['id_site']).sum().reset_index() + + df['ssind_fcthydrobio'] = 0 + lst_data1 = d.loc[d.nb_fct == 1, 'id_site'] + lst_dataSup = d.loc[d.nb_fct > 1,'id_site'] + df.loc[df.id_site.isin(lst_data1), 'ssind_fcthydrobio'] = 1 + df.loc[df.id_site.isin(lst_dataSup), 'ssind_fcthydrobio'] = 2 + return df + + + def occup_sol(df): + ''' + Pour chaque zone humide, identification de la nature + d'occupation du sol et de sa surface de recouvrement. + Déduction de la surface d'espace naturel concernée par les zonnages. + Type d'intérêt : + 'Forêts' / 'Milieux à végétation arbustive et/ou herbacée' / 'Prairies' + Attribution d'un poids en fonction de la surface de recouverte : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_occupsol' : + if surf_recouverte < 25% : + ssind = 0 + elif surf_recouverte in [ 25% ; 50% [ : + ssind = 1 + elif surf_recouverte in [ 50% ; 75% [ : + ssind = 1.5 + elif surf_recouverte in [ 75% ; 100% ] : + ssind = 2 + ''' + + init = dt.now() + print('INIT : Occupation du sol ...') + df = df.copy() + ddf = df.copy() + ddf['area_init'] = ddf.area + # data = open_gpkg(Path+p_phybio+c_occupsol, bbox=df) + print('IMPORT DATA ...') + artif = open_gpkg(Path+p_press+c_artif, bbox=df) + artif = artif[['ID','geom']] + + artif1 = artif.iloc[0,:] + artif1 = gpd.GeoDataFrame(artif1).T + artif1.set_geometry('geom', inplace=True) + artif1.set_crs(crs=artif.crs.srs, inplace=True) + # artif1 = gpd.GeoDataFrame(artif.iloc[0].copy(),geometry=artif.iloc[0].geom,crs=artif.crs.srs) + artif1 = gpd.overlay(artif1,ddf, how='intersection') + artif1.rename_geometry('geom', inplace=True) + artif2 = gpd.GeoDataFrame(artif.iloc[1:,:]) + artif2.set_geometry('geom', inplace=True) + artif2.set_crs(crs=artif.crs.srs, inplace=True) + + rpg = open_gpkg(Path+p_phybio+c_rpg, bbox=df) + lst_code = ['BOP','SPH','SPL','PPH','PRL','J6P','J6S','BTA','ROS','SBO'] + rpg = rpg[~rpg.code_cultu.isin(lst_code)] + + + print((dt.now() - init).total_seconds()) + print('CORRECTION GEOMETRY ...') + if not artif1.geom.is_valid.all() : + artif1.loc[~artif1.geom.is_valid,'geom'] = artif1.loc[~artif1.geom.is_valid,'geom'].buffer(0) + if not artif2.geom.is_valid.all() : + artif2.loc[~artif2.geom.is_valid,'geom'] = artif2.loc[~artif2.geom.is_valid,'geom'].buffer(0) + if not rpg.geom.is_valid.all() : + rpg.loc[~rpg.geom.is_valid,'geom'] = rpg.loc[~rpg.geom.is_valid,'geom'].buffer(0) + + + print((dt.now() - init).total_seconds()) + print('DATA READY ...') + print('INIT OVERLAY ...') + ddf = gpd.overlay(ddf,artif1, how='difference') + if 'GeometryCollection' in ddf.geom_type.unique(): + ddf.geom = ddf.geom.buffer(0) + ddf = gpd.overlay(ddf,rpg, how='difference') + if 'GeometryCollection' in ddf.geom_type.unique(): + ddf.geom = ddf.geom.buffer(0) + ddf = gpd.overlay(ddf,artif2, how='difference') + if 'GeometryCollection' in ddf.geom_type.unique(): + ddf.geom = ddf.geom.buffer(0) + print('END OVERLAY ...') + print((dt.now() - init).total_seconds()) + + ddf['area_end'] = ddf.area + ddf['perc_rcvmt'] = 100 - (100*ddf.area_end/ddf.area_init) + df = df.merge(ddf[['id_site','perc_rcvmt']], on='id_site', how='left') + df.perc_rcvmt.fillna(0,inplace=True) + df['perc_surfNat'] = 100 - df['perc_rcvmt'] + + + # lst_terme = ['Forêts','Milieux à végétation arbustive et/ou herbacée','Prairies'] + # d = data.loc[data.libelle_02.isin(lst_terme),] + # print( + # ('INIT : Calcul du recouvrement de l\'occupation des sols sur les zones humides :'), + # ('"Forêts","Milieux à végétation arbustive et/ou herbacée","Prairies"')) + # print(('ATTENTION : Les géometries de l\'occupation des sols étant complexes,'), + # ('le calcul peut prendre un certain temps ...')) + # df = _calc_recouvrmt(df,d) + # print('END : Calcul du recouvrement de l\'occupation des sols sur les zones humides') + if 'ssind_fcthydrobio' not in df.columns: + df = fct_phy_bio.fct_hydrobio(df) + + df['ssind_occupsol'] = 0 + df.loc[(df.ssind_fcthydrobio == 0) & (df.perc_surfNat.between(25,50,inclusive=True)),'ssind_occupsol'] = 1 + df.loc[(df.ssind_fcthydrobio == 0) & (df.perc_surfNat.between(50,75,inclusive=True)),'ssind_occupsol'] = 1.5 + df.loc[(df.ssind_fcthydrobio == 0) & (df.perc_surfNat >= 75),'ssind_occupsol'] = 2 + df.drop(columns=['perc_rcvmt','perc_surfNat'], inplace=True) + print('END ssind_occupsol ...') + + return df + + + def bilan(df): + ''' + Somme des sous-indices des fonctions physiques + et biochimiques des zones humides : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_phybio' : + sum( + ssind_perimcaptage + + ssind_fcthydrobio + + ssind_occupsol + ) + ''' + df = fct_phy_bio.perim_captage(df) + # df = fct_phy_bio.zse_zsnea(df) + df = fct_phy_bio.fct_hydrobio(df) + df = fct_phy_bio.occup_sol(df) + df['ind_phybio'] = df[ + ['ssind_perimcaptage', + # 'ssind_zse_zsnea', + 'ssind_fcthydrobio','ssind_occupsol'] + ].sum(axis=1) + df.name = 'Fct_phy_bio' + return df + + + +class crit_multi_fct: + ''' + Calcule l'indice et les sous-indices des criètes + « multi-fonctions » des zones humides : + + Liste des fonctions + ---------- + surface : Calcul de la surface totale des zones humides. + Attribution d'un poid en fonction du résultat. + pente : Calcul de la pente moyenne des zones humides + via le MNT. Attribution d'un poid en fonction du résultat. + dir_exp : Ajout d'un champ dir_exp dans le tableau de sortie + qui sera à remplir manuellement par celui-ci. + bilan : Rassemblement des sous-indices des criètes + « multi-fonctions » dans un même tableau. L'indice pourra + être calculé lorque le champs dir_exp sera remplie. + ''' + def surface(df): + ''' + Calcul de la surface totale des zones humides. + Attribution d'un poid en fonction du résultat : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_surf' : + if ZH < 1ha : + ssind = 0 + elif ZH in [ 1ha ; 20ha [ : + ssind = 0.5 + elif ZH in [ 20ha ; 100ha [ : + ssind = 1 + elif ZH >= 100ha : + ssind = 1.5 + ''' + print('INIT : Calcul de la surface ...') + df = df.copy() + df['ssind_surf'] = 0 + df.loc[(df.area/10000).between(1,20,inclusive=True),'ssind_surf'] = 0.5 + df.loc[(df.area/10000).between(20,100,inclusive=True),'ssind_surf'] = 1 + df.loc[df.area/10000 >= 100,'ssind_surf'] = 1.5 + return df + + + def pente(df): + ''' + Calcul de la pente moyenne des zones humides + via le MNT. Attribution d'un poid en fonction du résultat : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_surf' : + if ZH > 5% : + ssind = 0 + else: + ssind = 1 + ''' + print('INIT : Calcul de la pente moyenne ...') + dfP = df.copy() + # df.geom = df.buffer(0) + # poly=df.geom[0] + # from shapely.geometry import Point + # tutu = gpd.pd.DataFrame( + # [{'index':g,'coords':list(x.exterior.coords)} for g,geom in enumerate(df.geom) for x in geom.geoms] + # ) + # tutu = tutu.explode('coords') + # tutu['geom'] = [Point([x]) for x in tutu.coords] + # tutu = tutu.set_geometry('geom') + # tutu.set_crs(epsg=2154, inplace=True) + # chdir(Path+p_mltifct) + # Dir = listdir() + # Dir = [x for x in Dir if '.zip' in x] + # for i, d in enumerate(Dir): + # zip = zipfile.ZipFile(d).namelist() + # z = [z for z in zip if 'MNT' in z][0] + # stats = rasterstats.zonal_stats(tutu.geometry,'zip:{0}/{1}'.format(d,z)) + # stats = pd.DataFrame(stats) + # if i == 0 : + # tutu[stats.columns] = stats + # else: + # tmp = pd.DataFrame({'tutu':tutu['mean']*tutu['count'], 'stats':stats['mean']*stats['count']}) + # tutu['mean'] = tmp.sum(axis=1)/(tutu['count']+stats['count']) + # tmp = pd.DataFrame({'tutu':tutu['count'], 'stats':stats['count']}) + # tutu['count'] = tmp.sum(axis=1) + # tmp = pd.DataFrame({'tutu':tutu['min'], 'stats':stats['min']}) + # tutu['min'] = tmp.min(axis=1) + # tmp = pd.DataFrame({'tutu':tutu['max'], 'stats':stats['max']}) + # tutu['max'] = tmp.max(axis=1) + + # transformation altitude to pente . + # gdaldem slope + # "/home/colas/Documents/9_PROJETS/3_PGZH/SIG/multi_fonctions/IGN - BD Alti 25M/BDALTIV2_25M_FXX_0900_6475/BDALTIV2_25M_FXX_0900_6475_MNT_LAMB93_IGN69.asc" + # /tmp/processing_nooAPj/4392eb6b07804db4b4350433cc6db54a/OUTPUT.tif -of GTiff -b 1 -s 1.0 -p + from os import listdir, chdir, system + from pathlib import Path as Pathlib + from zipfile import ZipFile + from rasterstats import zonal_stats + home = str(Pathlib.home()) + chdir(Path+p_mltifct) + Dir = listdir() + Dir = [x for x in Dir if '.zip' in x] + slope = 'temp_slop.tif' + for i, d in enumerate(Dir): + zip = ZipFile(d).namelist() + z = [z for z in zip if 'MNT' in z][0] + system("gdaldem slope '/vsizip/{zip}/{mnt}' '{slope}' -of GTiff -b 1 -s 1.0 -p".format(zip=d,mnt=z,slope=slope)) + + stats = zonal_stats(dfP.geom,slope) + stats = gpd.pd.DataFrame(stats) + if i == 0 : + dfP[stats.columns] = stats + else: + tmp = gpd.pd.DataFrame({'dfP':dfP['mean']*dfP['count'], 'stats':stats['mean']*stats['count']}) + dfP['mean'] = tmp.sum(axis=1)/(dfP['count']+stats['count']) + tmp = gpd.pd.DataFrame({'dfP':dfP['count'], 'stats':stats['count']}) + dfP['count'] = tmp.sum(axis=1) + tmp = gpd.pd.DataFrame({'dfP':dfP['min'], 'stats':stats['min']}) + dfP['min'] = tmp.min(axis=1) + tmp = gpd.pd.DataFrame({'dfP':dfP['max'], 'stats':stats['max']}) + dfP['max'] = tmp.max(axis=1) + + system('rm {slope}'.format(slope=slope)) + chdir(home) + dfP['ssind_pente'] = 0 + dfP.loc[dfP['mean'] < 5, 'ssind_pente'] = 1 + + df = df.merge(dfP[['id_site','ssind_pente']], on=['id_site'], how='left') + + + # dst = [z for z in zip if 'DST' in z][0] + # src = [z for z in zip if 'SRC' in z][0] + # dst = rasterio.open('zip:{0}/{1}'.format(d,dst)) + # src = rasterio.open('zip:{0}/{1}'.format(d,src)) + # mnt = rasterio.open( + # 'zip:{0}/{1}'.format(d,z),'w+', + # width=dst.width, + # height=dst.height, + # count=1, + # crs=dst.crs.data, + # transform=dst.transform, + # dtype=dst.dtypes[0]) + + + # stats = rasterstats.zonal_stats(df.geom,'zip:{0}/{1}'.format(d,z)) + # stats = pd.DataFrame(stats) + # if i == 0 : + # df[stats.columns] = stats + # else: + # tmp = pd.DataFrame({'df':df['mean']*df['count'], 'stats':stats['mean']*stats['count']}) + # df['mean'] = tmp.sum(axis=1)/(df['count']+stats['count']) + # tmp = pd.DataFrame({'df':df['count'], 'stats':stats['count']}) + # df['count'] = tmp.sum(axis=1) + # tmp = pd.DataFrame({'df':df['min'], 'stats':stats['min']}) + # df['min'] = tmp.min(axis=1) + # tmp = pd.DataFrame({'df':df['max'], 'stats':stats['max']}) + # df['max'] = tmp.max(axis=1) + # https://stackoverflow.com/questions/8844781/get-file-list-of-files-contained-in-a-zip-file + # res.append(rasterio.open( + # 'zip:{0}/{1}'.format(d,z))) + # continue + # df['ssind_pente'] = 0 + return df + + + def dir_exp(df): + ''' + Ajout d'un champ dir_exp dans le tableau de sortie + qui sera à remplir manuellement par celui-ci : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_direxp' : None + ''' + df = df.copy() + data = gpd.pd.read_csv(P_expert + c_expert,sep=';') + data = data[['site_code','Note_DE_fonction']] + data.columns = ['site_code','ssind_direxp'] + data.ssind_direxp = data.ssind_direxp.astype(float) + df = df.merge(data,right_on='site_code',left_on='id_site') + del df['site_code'] + # df['ssind_direxp'] = None + return df + + + def bilan(df): + ''' + Rassemblement des sous-indices des criètes + « multi-fonctions » dans un même tableau. L'indice pourra + être calculé lorque le champs dir_exp sera remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_multifct' : + sum( + ssind_surf + + ssind_pente + 'ssind_direxp' + ) + ''' + # df = crit_multi_fct.surface(df) + df = crit_multi_fct.pente(df) + # df['ssind_total'] = df[ + # ['ssind_surf','ssind_pente'] + # ].sum(axis=1) + df = crit_multi_fct.dir_exp(df) + df['ind_multifct'] = df[ + [#'ssind_surf', + 'ssind_pente','ssind_direxp'] + ].sum(axis=1) + df.name = 'Crit_multi_fct' + return df + + + +class pression: + ''' + Calcule l'indice et les sous-indices des pressions + exercées sur les zones humides : + + Liste des fonctions + ---------- + artif_directe : Récupération des résultats des pressions directes + d'artificialisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + artif_indir : Récupération des résultats des pressions indirectes + d'artificialisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + urbani_directe : Récupération des résultats des pressions directes + d'urbanisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 4 classes [0, 0.5, 1, 1.5]. + urbani_indir : Récupération des résultats des pressions indirectes + d'urbanisation Rhoméo I12. Application de la discrimination + de Jenks pour catégoriser les résultats en 4 classes [0, 0.5, 1, 1.5]. + pressAgri_directe : Récupération des résultats des pressions directes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + pressAgri_indir : Récupération des résultats des pressions indirectes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes [0, 0.5, 1]. + projet_plu_U : Intersections des zones relevant du projet d'Urbanisme (PLU) + avec les polygones de l'étude. Considération du champs + Typezone == 'U'. Attribution des points en cas d'intersections. + projet_plu_AU : Intersections des zones relevant du projet d'Urbanisme (PLU) + avec les polygones de l'étude. Considération du champs + Typezone == 'AU'. Attribution des points en cas d'intersections. + projet_scot : En cas d'absence de PLU, recherche d'espaces de + développements potentiels au alentours des sites (SCOT). + Attribution des points en cas d'intersections. + conflit_redi : Intersections des zones de conflits redi + (Points, Lignes) avec les polygones de l'étude. Utilistaion + d'un buffer de 100m. + prelev_eau : Identification da la proximité des zones humides + avec des sources de captages. Application d'un buffer de 50m. + Identification par intersection. + icpe : Identification da la proximité des zones humides + avec des installations classés. Application d'un buffer de 500m. + Identification par intersection. + ouvrage : Identification da la présence d'ouvrages et de dépôts + au sein des zones humides. Identification par intersection. + vulnerabilite : Identification da la proximité des zones humides + avec des espèces exotiques envahissantes. Application d'un buffer + de 100m. Identification par intersection. + press_dir : Calcul de l'indice de pression directe (artif_directe, + urbani_directe, pressAgri_directe, projet_plu_U, conflit_redi, + icpe, prelev_eau, ouvrage, vulnerabilite). + press_ind : Calcul de l'indice de pression indirecte (artif_indir; + urbani_indir, pressAgri_indir, projet_plu_AU, projet_scot). + bilan : Rassemblement des sous-indices des pressions directes + et indirectes dans un même tableau. + ''' + # indesirable : Identification da la présence d'espèces indésirables + # au sein des zones humides. Identification par intersection + # à partir d'une couche fournie par le commanditaire + def artif_directe(df): + ''' + Récupération des résultats des pressions directes + d'artificialisation Rhoméo I12. Application de + la discrimination de Jenks pour catégoriser les résultats + en 3 classes [0, 0.5, 1]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_artifDir' = 0|0.5|1 + ''' + # from datetime import datetime as dt + print('INIT : Artificialisation directe ...') + + df = df.copy() + # data = open_gpkg(Path+p_press+c_artif, bbox=df) + # data = data[['ID','geom']] + + # # recouvrement de la surface + # init = dt.now() + # data_surf = gpd.GeoDataFrame(data.iloc[0].to_dict(), crs=data.crs.srs,geometry='geom') + # tmp_surf = _calc_recouvrmt(df,data_surf) + # print((dt.now() - init).total_seconds()) + + # # recouvrement des routes sur les zones humides + # init = dt.now() + # tmp_route = _calc_recouvrmt(df,data.iloc[1:]) + # print((dt.now() - init).total_seconds()) + + # # Calcul des sous-indices + # tmp = tmp_surf.merge(tmp_route, on=['id_site']) + # tmp['perc_rcvmt'] = tmp[['perc_rcvmt_x','perc_rcvmt_y']].sum(axis=1) + # tmp['ssind_artif'] = 0 + # tmp.loc[tmp.perc_rcvmt > 5,'ssind_artif'] = 1 + # tmp.loc[tmp.perc_rcvmt > 10,'ssind_artif'] = 2 + # df = df.merge(tmp[['id_site','ssind_artif']], on=['id_site'],how='left') + data = gpd.pd.read_csv(path0 + c_rhomeo) + data.set_index('site_code', inplace=True) + data.presdirect_artif = data.presdirect_artif.round() + tmp = jenks( + data=data[['presdirect_artif']], + col='presdirect_artif', + labels=[0, 0.5, 1]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'presdirect_artif':'ssind_artifDir'}, inplace=True) + + return df + + + def artif_indir(df): + ''' + Récupération des résultats des pressions indirectes + d'artificialisation Rhoméo I12. Application de + la discrimination de Jenks pour catégoriser les résultats + en 3 classes [0, 0.5, 1]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_artifIndir' = 0|0.5|1 + ''' + # from datetime import datetime as dt + print('INIT : Artificialisation indirecte ...') + + df = df.copy() + data = gpd.pd.read_csv(path0 + c_rhomeo) + data.set_index('site_code', inplace=True) + data.presindir_artif = data.presindir_artif.round() + tmp = jenks( + data=data[['presindir_artif']], + col='presindir_artif', + labels=[0, 0.5, 1]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'presindir_artif':'ssind_artifIndir'}, inplace=True) + + return df + + + def urbani_directe(df): + ''' + Récupération des résultats des pressions directes + d'urbanisation Rhoméo I12. Application de + la discrimination de Jenks pour catégoriser les résultats + en 4 classes [0, 0.5, 1, 1.5]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_urbaDir' = 0|0.5|1|1.5 + ''' + print('INIT : Urbanisation indirecte ...') + df = df.copy() + data = gpd.pd.read_csv(path0 + c_rhomeo) + data.set_index('site_code', inplace=True) + data.presdirect_urba = data.presdirect_urba.round() + tmp = jenks( + data=data[['presdirect_urba']], + col='presdirect_urba', + labels=[0, 0.5, 1, 1.5]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'presdirect_urba':'ssind_urbaDir'}, inplace=True) + # df['ssind_urbani'] = None + return df + + + def urbani_indir(df): + ''' + Récupération des résultats des pressions indirectes + d'urbanisation Rhoméo I12. Application de + la discrimination de Jenks pour catégoriser les résultats + en 4 classes [0, 0.5, 1, 1.5]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_urbaIndir' = 0|0.5|1|1.5 + ''' + print('INIT : Urbanisation indirecte ...') + df = df.copy() + data = gpd.pd.read_csv(path0 + c_rhomeo) + data.set_index('site_code', inplace=True) + data.presindir_urba = data.presindir_urba.round() + tmp = jenks( + data=data[['presindir_urba']], + col='presindir_urba', + labels=[0, 0.5, 1, 1.5]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'presindir_urba':'ssind_urbaIndir'}, inplace=True) + # df['ssind_urbani'] = None + return df + + + def pressAgri_directe(df): + ''' + Récupération des résultats des pressions directes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes + [0, 0.5, 1]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_agriDir' = 0|0.5|1 + ''' + print('INIT : Pressions agricoles directes...') + df = df.copy() + # data = open_gpkg(Path+p_press+c_agric, bbox=df) + # tmp = _calc_recouvrmt(df,data) + + # tmp['ssind_agri'] = 0 + # tmp.loc[tmp.perc_rcvmt > 5,'ssind_agri'] = 0.5 + # tmp.loc[tmp.perc_rcvmt > 10,'ssind_agri'] = 1 + # df = df.merge(tmp[['id_site','ssind_agri']], on=['id_site'],how='left') + data = gpd.pd.read_csv(path0 + c_rhomeo) + data.set_index('site_code', inplace=True) + data.presdirect_agri = data.presdirect_agri.round() + tmp = jenks( + data=data[['presdirect_agri']], + col='presdirect_agri', + labels=[0, 0.5, 1]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'presdirect_agri':'ssind_agriDir'}, inplace=True) + + return df + + + def pressAgri_indir(df): + ''' + Récupération des résultats des pressions indirectes + agricoles Rhoméo I13. Application de la discrimination + de Jenks pour catégoriser les résultats en 3 classes + [0, 0.5, 1]: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_agriIndir' = 0|0.5|1 + ''' + print('INIT : Pressions agricoles indirectes...') + df = df.copy() + data = gpd.pd.read_csv(path0 + c_rhomeo) + data.set_index('site_code', inplace=True) + data.presindir_agri = data.presindir_agri.round() + tmp = jenks( + data=data[['presindir_agri']], + col='presindir_agri', + labels=[0, 0.5, 1]) + df = df.merge(tmp,how='left',left_on='id_site',right_index=True) + df.rename(columns={'presindir_agri':'ssind_agriIndir'}, inplace=True) + + return df + + + def projet_plu_U(df): + ''' + Intersections des zones relevant du projet d'Urbanisme (PLU) + avec les polygones de l'étude. Considération du champs + Typezone == 'U'. Attribution des points en cas d'intersections: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_pluU' = 0|1 + ''' + df = df.copy() + d1 = open_gpkg(Path+p_press+c_urba_plu1, bbox=df) + d2 = open_gpkg(Path+p_press+c_urba_plu2, bbox=df) + d3 = open_gpkg(Path+p_press+c_urba_plu3, bbox=df) + d3.rename(columns={'typezone': 'Typezone'}, inplace=True) + d1 = d1[d1.Typezone =='U'] + d2 = d2[d2.Typezone =='U'] + d3 = d3[d3.Typezone =='U'] + data = gpd.pd.concat( + [d1[['Typezone','geom']],d2[['Typezone','geom']],d3[['Typezone','geom']]], + ignore_index=True) + tmp = gpd.sjoin(df,data[['geom']],op='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_pluU'] = 0 + df.loc[df.id_site.isin(lst),'ssind_pluU'] = 1 + + return df + + + def projet_plu_AU(df): + ''' + Intersections des zones relevant du projet d'Urbanisme (PLU) + avec les polygones de l'étude. Considération du champs + Typezone == 'AU'. Attribution des points en cas d'intersections: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_pluAU' = 0|1 + ''' + df = df.copy() + d1 = open_gpkg(Path+p_press+c_urba_plu1, bbox=df) + d2 = open_gpkg(Path+p_press+c_urba_plu2, bbox=df) + d3 = open_gpkg(Path+p_press+c_urba_plu3, bbox=df) + d3.rename(columns={'typezone': 'Typezone'}, inplace=True) + d1 = d1[d1.Typezone.str.contains('AU')] + d2 = d2[d2.Typezone.str.contains('AU')] + d3 = d3[d3.Typezone.str.contains('AU',na=False)] + data = gpd.pd.concat( + [d1[['Typezone','geom']],d2[['Typezone','geom']],d3[['Typezone','geom']]], + ignore_index=True) + tmp = gpd.sjoin(df,data[['geom']],op='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_pluAU'] = 0 + df.loc[df.id_site.isin(lst),'ssind_pluAU'] = 1 + + return df + + + def projet_scot(df): + ''' + En cas d'absence de PLU, recherche d'espaces de + développements potentiels au alentours des sites + (SCOT). Attribution des points en cas d'intersections: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_scot' = 0|1 + ''' + df = df.copy() + # Ouverture des couches PLU + d1 = open_gpkg(Path+p_press+c_urba_plu1, bbox=df) + d2 = open_gpkg(Path+p_press+c_urba_plu2, bbox=df) + d3 = open_gpkg(Path+p_press+c_urba_plu3, bbox=df) + d3.rename(columns={'typezone': 'Typezone'}, inplace=True) + dataPLU = gpd.pd.concat( + [d1[['Typezone','geom']],d2[['Typezone','geom']],d3[['Typezone','geom']]], + ignore_index=True) + tmpPLU = gpd.sjoin(df,dataPLU[['geom']],op='intersects', how='left') + lstPLU = tmpPLU.loc[~tmpPLU.index_right.isna(),'id_site'].tolist() + + # if 'ssind_pluAU' not in df.columns: + # df = pression.projet_plu_AU(df) + # Si pas de PLU concerner par les sites, intersection des SCOT + dataSCOT = open_gpkg(Path+p_press+c_urba_scot, bbox=df) + tmp = gpd.sjoin(df,dataSCOT[['geom']],op='intersects', how='left') + lstSCOT = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df['ssind_scot'] = 0 + df.loc[(~df.id_site.isin(lstPLU))&(df.id_site.isin(lstSCOT)),'ssind_scot'] = 1 + + return df + + + + def conflit_redi(df): + ''' + Intersections des zones de conflits redi (Points, Lignes) + avec les polygones de l'étude. Utilistaion d'un buffer + de 100m: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + + 'ssind_confli' = 0 + if conflit : + 'ssind_confli' = 2 + ''' + df = df.copy() + lign = open_gpkg(Path+p_press+c_lign_confliredi, bbox=df) + poin = open_gpkg(Path+p_press+c_poin_confliredi, bbox=df) + lign.columns = lign.columns.str.lower() + poin.columns = poin.columns.str.lower() + lign.geom = lign.geom.buffer(50) + poin.geom = poin.geom.buffer(50) + data = gpd.pd.concat([ + lign[['id','geom']], + poin[['id','geom']] + ]) + data = gpd.GeoDataFrame(data,geometry='geom',crs=lign.crs.srs) + geom = _union_polygons_geometry(data) + if not geom.is_valid: + geom = geom.buffer(0) + df['ssind_confli'] = df.intersects(geom).astype(int).replace(1,2) + return df + + + def prelev_eau(df): + ''' + Identification da la proximité des zones humides + avec des sources de captages. + Application d'un buffer de 50m. + Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_prlvmteau' : + ZH.geom = ZH.buffer(50) + if ZH.intersects(prelev_eau): + ssind = 1 + else : + ssind = 0 + ''' + print("INIT : Prélevement d'eau ...") + df_buf = df.copy() + df_buf.geom = df.buffer(50) + data = gpd.pd.read_csv(Path+p_press+c_captag, sep=';') + data = data.loc[data['Qualit de localisation']==1] + data = gpd.GeoDataFrame( + data, + crs=df_buf.crs.srs, + geometry=gpd.points_from_xy(data['Coordonnes lambert X'],data['Coordonnes lambert Y']) + ) + data.rename_geometry('geom', inplace=True) + data.reset_index(drop=True, inplace=True) + mp = MultiPoint(data.geom) + df_buf['ssind_prlvmteau'] = df_buf.intersects(mp).astype(int) + df = df.merge(df_buf[['id_site','ssind_prlvmteau']], on=['id_site']) + + return df + + + def icpe(df): + ''' + Identification da la proximité des zones humides + avec des installations classés. + Application d'un buffer de 500m. + Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_icpe' : + ZH.geom = ZH.buffer(500) + if ZH.intersects(install_classee): + ssind = 1 + else : + ssind = 0 + ''' + print('INIT : ICPE ...') + tmp = df.copy() + tmp.geom = tmp.buffer(500) + data = open_gpkg(Path+p_press+c_iClass) + data = MultiPoint(data.geom) + tmp['ssind_icpe'] = tmp.intersects(data).astype(int) + df = df.merge(tmp[['id_site','ssind_icpe']], on=['id_site'],how='left') + + return df + + + # def indesirable(df): + # ''' + # Identification da la présence d'espèces indésirables + # au sein des zones humides. Identification par intersection + # à partir d'une couche fournie par le commanditaire : + + # Parameters + # ---------- + # df : GeoDataFrame. GeoDataFrame des zones humides + # de l'étude. + + # Return + # ---------- + # 'ssind_indesi' : + # if ZH.intersects(esp_indesir): + # ssind_indesir = 1 + # else : + # ssind_indesir = 0 + # ''' + # print('INIT : Indésirable ...') + # df = df.copy() + + # d1 = open_gpkg(Path+p_press+c_indesi, bbox=df, layer='Ambroisie') + # d2 = open_gpkg(Path+p_press+c_indesi, bbox=df, layer='Renouée') + # d1.rename(columns={'ID_SIR_GL_AMBROISIE':'ID'}, inplace=True) + # d2.rename(columns={'ID_SIR_GL_RENOUEE':'ID'}, inplace=True) + # dftmp = df.copy() + # dftmp.geom = dftmp.buffer(100) + # data = gpd.pd.concat([d1[['ID','geom']],d2[['ID','geom']]]) + # tmp = gpd.sjoin(dftmp,data[['geom']],op='intersects', how='left') + # lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + # df['ssind_indesir'] = 0 + # df.loc[df.id_site.isin(lst),'ssind_indesir'] = 1 + + # return df + # data.reset_index(inplace=True) + # data.surface_m2 = data.surface_m2.str.replace('m2','', regex=True) + + # MinMax = data.loc[data.surface_m2.str.contains('-'),'surface_m2'].str.split('-',1, expand=True) + # MinMax.columns = ['min_surf', 'max_surf'] + # Min = data.loc[data.surface_m2.str.contains('>'),'surface_m2'].str.replace('>','', regex=True) + # MinMax = gpd.pd.concat([MinMax,Min]) + # MinMax.loc[~MinMax[0].isna(),'min_surf'] = MinMax.loc[~MinMax[0].isna(),0] + # MinMax.drop(columns=0, inplace=True) + # Max = data.loc[data.surface_m2.str.contains('<'),'surface_m2'].str.replace('<','', regex=True) + # MinMax = gpd.pd.concat([MinMax,Max]) + # MinMax.loc[~MinMax[0].isna(),'max_surf'] = MinMax.loc[~MinMax[0].isna(),0] + # MinMax.drop(columns=0, inplace=True) + + # data = data.join(MinMax) + + # from shapely.ops import nearest_points + # # unary union of the gpd2 geomtries + # pts3 = data.geom.unary_union + # def near(point, pts=pts3): + # # find the nearest point and return the corresponding Place value + # nearest = data.geom == nearest_points(point, pts)[1] + # return data[nearest].index[0] + # df_temp['Nearest'] = df_temp.apply(lambda row: near(row.geom), axis=1) + + # data.rename(columns={'index':'Nearest'}, inplace=True) + # data2 = df_temp[['Nearest']].copy() + # data2 = data2.join(data[['geom', 'surface_m2','min_surf', 'max_surf']], how='left', on=['Nearest']) + # data2 = data2.set_geometry('geom') + # data3 = data2.drop_duplicates() + # df_temp['dist'] = df_temp.distance(data2) + # df_temp = df_temp.merge(data3[['Nearest','min_surf','max_surf']], on=['Nearest'], how='left') + # df_temp.min_surf.fillna(0, inplace=True) + # df_temp['min_surf'] = gpd.pd.to_numeric(df_temp['min_surf'], errors='coerce') + # df_temp['max_surf'] = gpd.pd.to_numeric(df_temp['max_surf'], errors='coerce') + # df_temp.loc[df_temp.max_surf.isna(),'max_surf'] = df_temp.loc[df_temp.max_surf.isna(),'min_surf'] + + # from math import pi, sqrt + # df_temp['rayon'] = df_temp.max_surf.apply(lambda x: sqrt(x*pi)) + + # df_temp['ssind_indesi'] = 0 + # df_temp.loc[df_temp.dist <= df_temp.rayon,'ssind_indesi'] = 1 + # df = df.merge(df_temp[['id_site','ssind_indesi']], on=['id_site'],how='left') + + # # geom_data = data.unary_union + # # df['ssind_indesi'] = df.intersects(geom_data).astype(int) + + + + def ouvrage(df): + ''' + Identification da la présence d'ouvrages et de dépôts + au sein des zones humides. Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_ouvrag' : + if ZH.intersects(ouvrages): + ssind = 1 + else : + ssind = 0 + ''' + print('INIT : Ouvrage ...') + df = df.copy() + data = open_gpkg(Path+p_press+c_ouvrag, bbox=df) + data = data.unary_union + # data = MultiPoint(data.geom) + df['ssind_ouvrag'] = df.intersects(data).astype(int) + + data2 = open_gpkg(Path+p_press+c_depot, bbox=df) + data2 = data2.unary_union + df.loc[df.ssind_ouvrag == 0, 'ssind_ouvrag'] = df[df.ssind_ouvrag == 0].intersects(data2).astype(int) + + return df + + + def vulnerabilite(df): + ''' + Identification da la proximité des zones humides + avec des espèces exotiques envahissantes. + Application d'un buffer de 100m. + Identification par intersection : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_vulnerab' : + ZH.geom = ZH.buffer(100) + if ZH.intersects(esp_exo_envahi): + ssind = 1 + else : + ssind = 0 + ''' + print('INIT : Vulnérabilité ...') + df_buf = df.copy() + df_buf.geom = df_buf.buffer(100) + lst_term = ['Buddleia','Renouee','Solidage'] + data1 = open_gpkg(Path+p_press+c_invas, bbox=df_buf) + data1 = data1[data1.Espece.isin(lst_term)] + data2 = open_gpkg(Path+p_press+c_fallo, bbox=df_buf) + data3 = open_gpkg(Path+p_press+c_cd38_eee, bbox=df_buf, layer='Renouée') + data4 = open_gpkg(Path+p_press+c_cd38_eee, bbox=df_buf, layer='Ambroisie') + data = gpd.pd.concat([data1[['geom']],data2[['geom']],data3[['geom']],data4[['geom']]],ignore_index=True) + data.reset_index(inplace=True) + tmp = gpd.sjoin(df,data[['geom']],op='intersects', how='left') + lst = tmp.loc[~tmp.index_right.isna(),'id_site'].tolist() + df_buf['ssind_vulnerab'] = 0 + df_buf.loc[df_buf.id_site.isin(lst),'ssind_vulnerab'] = 1 + + # get_sicen2 get observation in bbox of df_buff who contains lst_term + lst_term = [ + 'Solidago gigantea','Reynoutria','Buddleja davidii', + 'Impatiens glandulifera','Ambrosia artemisiifolia'] + from shapely.geometry import box + from geopandas import read_postgis + from pycen.params import sicen_con as con + bbox = box(*df_buf.total_bounds) + geom_col = 'geom' + schema_sicen = 'saisie' + table_sicen = 'vm_synthese_observations' + sql = """SELECT id_obs, date_obs, regne, classe, ordre, nom_latin, nom_vern, geom FROM {sch}.{tab} + WHERE ST_Intersects ({geom_col}, 'SRID={epsg};{poly}') + AND nom_latin LIKE ANY (array{array});""".format( + sch=schema_sicen, + tab=table_sicen, + array = ['%%{}%%'.format(t) for t in lst_term], + geom_col=geom_col, + epsg=df_buf.crs.srs.split(':')[1], + poly=bbox + ) + sicen = read_postgis( + sql = sql, + con = con) + data3 = sicen.unary_union + df_buf.loc[df_buf.ssind_vulnerab == 0, 'ssind_vulnerab'] = df_buf[df_buf.ssind_vulnerab == 0].intersects(data3).astype(int) + + + df = df.merge(df_buf[['id_site','ssind_vulnerab']], on=['id_site'],how='left') + + return df + + + def press_dir(df): + ''' + Calcul de l'indice de pression directe: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pressDir' : + sum( + ssind_artifDir + ssind_urbaDir + + ssind_agriDir + ssind_pluU + + ssind_confli + ssind_prlvmteau + + ssind_icpe + ssind_ouvrag + + ssind_vulnerab + ) + ''' + df = pression.artif_directe(df) + df = pression.urbani_directe(df) + df = pression.pressAgri_directe(df) + df = pression.projet_plu_U(df) + df = pression.conflit_redi(df) + df = pression.icpe(df) + df = pression.prelev_eau(df) + df = pression.ouvrage(df) + df = pression.vulnerabilite(df) + + df['ind_pressDir'] = df[ + ['ssind_artifDir','ssind_urbaDir','ssind_agriDir', + 'ssind_pluU','ssind_confli','ssind_prlvmteau','ssind_icpe', + 'ssind_ouvrag','ssind_vulnerab']].sum(axis=1) + df.name = 'Pression_direct' + + return df + + + def press_ind(df): + ''' + Calcul de l'indice de pression indirecte: + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pressInd' : + sum( + ssind_artifIndir + ssind_urbaIndir + + ssind_agriIndir + ssind_pluAU + + ssind_scot + ) + ''' + df = pression.artif_indir(df) + df = pression.urbani_indir(df) + df = pression.pressAgri_indir(df) + df = pression.projet_plu_AU(df) + df = pression.projet_scot(df) + + # df = pression.dir_exp(df) + df['ind_pressInd'] = df[ + ['ssind_artifIndir','ssind_urbaIndir','ssind_agriIndir', + 'ssind_pluAU', 'ssind_scot']].sum(axis=1) + df.name = 'Pression_indir' + + return df + + + def dir_exp(df): + ''' + Ajout d'un champ dir_exp dans le tableau de sortie + qui sera à remplir manuellement par celui-ci : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ssind_direxp' : None + ''' + df = df.copy() + data = gpd.pd.read_csv(P_expert + c_expert,sep=';') + data = data[['site_code','Note_DE_pression']] + data.columns = ['site_code','ssind_direxp'] + data.ssind_direxp = data.ssind_direxp.astype(float) + df = df.merge(data,right_on='site_code',left_on='id_site') + del df['site_code'] + # df['ssind_direxp'] = None + return df + + + def bilan(df): + ''' + Rassemblement des sous-indices des criètes + « pression » dans un même tableau. L'indice pourra + être calculé lorque le champs dir_exp sera remplie : + + Parameters + ---------- + df : GeoDataFrame. GeoDataFrame des zones humides + de l'étude. + + Return + ---------- + 'ind_pression' : + sum( + ind_pressDir + ind_pressInd + ) + ''' + df = pression.press_dir(df) + df = pression.press_ind(df) + df = pression.dir_exp(df) + # df = pression.indesirable(df) + # df['ssind_total'] = df[ + # ['ssind_artifDir','ssind_artifIndir','ssind_urbaDir', + # 'ssind_urbaIndir','ssind_agriDir','ssind_agriIndir', + # 'ssind_pluU','ssind_confli','ssind_prlvmteau','ssind_icpe', + # 'ssind_ouvrag','ssind_vulnerab'] + # ].sum(axis=1) + df['ind_pression'] = df[ + ['ind_pressDir','ind_pressInd','ssind_direxp'] + ].sum(axis=1) + df.name = 'Pression' + return df + +def priorisation(data,titre,col1,col2): + data = data.copy() + data[titre] = None + data.loc[(data[col1]=='fort')&(data[col2]=='fort'),titre] = 'P1' + data.loc[(data[col1]=='fort')&(data[col2]=='moyen'),titre] = 'P2' + data.loc[(data[col1]=='moyen')&(data[col2]=='fort'),titre] = 'P2' + data.loc[(data[col1]=='moyen')&(data[col2]=='moyen'),titre] = 'P2' + data.loc[(data[col1]=='faible')&(data[col2]=='faible'),titre] = 'P4' + data.loc[data[titre].isna(),titre] = 'P3' + return data[titre] + + +if __name__ == '__main__': + + from datetime import datetime as dt + from pandas import read_table + + # Récupération de la liste des zones concernées + init = dt.now() + liste_zh = read_table(path0+'PGSZH_liste_ZH.csv',sep=';') + lst_idsite = liste_zh.site_code.tolist() + sit = zh.get_sitesGeom(id_site=lst_idsite, last_update=True) + # sit = zh.get_sitesGeom(last_update=True) + + if not sit.is_valid.all(): + sit.loc[~sit.is_valid, 'geom'] = sit.loc[~sit.is_valid].buffer(0) + df = sit[['id_site', 'geom']].copy() + + # Définition des pressions et fonctions de l'étude + bilan = sit[['id_site']].copy() + df_bio = fct_bio_eco.bilan(df) + print((dt.now() - init).total_seconds()) + df_hyd = fct_hyd.bilan(df) + print((dt.now() - init).total_seconds()) + df_phy = fct_phy_bio.bilan(df) + print((dt.now() - init).total_seconds()) + df_mlt = crit_multi_fct.bilan(df) + print((dt.now() - init).total_seconds()) + df_pre = pression.bilan(df) + print((dt.now() - init).total_seconds()) + lst_df = [df_bio,df_hyd,df_phy,df_mlt,df_pre] + for d in lst_df: + ind_col = d.columns[d.columns.str.startswith('ind')] + bilan = bilan.merge(d[['id_site', *ind_col]], on=['id_site']) + + cols_ind = bilan.columns[bilan.columns.str.startswith('ind')] + ind_pres = df_pre.columns[df_pre.columns.str.startswith('ind')] + bilan['ind_fct'] = bilan[cols_ind.drop(ind_pres)].sum(axis=1) + bilan['indice'] = bilan[['ind_fct','ind_pression']].sum(axis=1) + bilan.name = 'Bilan' + print((dt.now() - init).total_seconds()) + + # jenks(data, col, labels) + # Normalisation des notes via la méthode de classification de jenks + enjeux = bilan[['id_site']].copy() + enjeux['clss_bioeco'] = jenks(bilan,'ind_bioeco',['faible','moyen','fort']) + enjeux['clss_hydro'] = jenks(bilan,'ind_hydro',['faible','moyen','fort']) + enjeux['clss_phybio'] = jenks(bilan,'ind_phybio',['faible','moyen','fort']) + enjeux['clss_multifct'] = jenks(bilan,'ind_multifct',['faible','moyen','fort']) + enjeux['clss_pressDir'] = jenks(bilan,'ind_pressDir',['faible','moyen','fort']) + enjeux['clss_pressInd'] = jenks(bilan,'ind_pressInd',['faible','moyen','fort']) + enjeux['clss_fct'] = jenks(bilan,'ind_fct',['faible','moyen','fort']) + enjeux['clss_pression'] = jenks(bilan,'ind_pression',['faible','moyen','fort']) + + # Priorisation des enjeux + enjeux['enjeu_bioeco'] = priorisation(data=enjeux,titre='enjeu_bioeco',col1='clss_bioeco',col2='clss_pression') + enjeux['enjeu_hydro'] = priorisation(data=enjeux,titre='enjeu_hydro',col1='clss_hydro',col2='clss_pression') + enjeux['enjeu_phybio'] = priorisation(data=enjeux,titre='enjeu_phybio',col1='clss_phybio',col2='clss_pression') + enjeux['enjeu_bilan'] = priorisation(data=enjeux,titre='enjeu_bilan',col1='clss_fct',col2='clss_pression') + + enjeux.name = 'Enjeux' + + # Récupération des bornes de chaques classes + cols_enjeu = enjeux.columns + cols_enjeu = cols_enjeu[cols_enjeu.str.contains('enjeu')] + borne = gpd.pd.DataFrame( + columns=enjeux.columns.drop(['id_site',*cols_enjeu]), # ,'enjeu_bilan' + index=['faible','moyen','fort']) + for col in borne.columns: + sfx = col.split('_')[1] + tmp = gpd.pd.merge( + bilan[['id_site','ind_'+sfx]], + enjeux[['id_site','clss_'+sfx]], + on = 'id_site' + ) + for idx in borne.index: + borne.loc[borne.index==idx,col] = str([ + tmp[tmp[col]==idx]['ind_'+sfx].min(), + tmp[tmp[col]==idx]['ind_'+sfx].max() + ]) + borne.index.name = 'classe' + borne.reset_index(inplace=True, drop=False) + borne.name = 'Borne des classes enjeux' + + # Constitution d'un dictionnaire de tableau + dict_dfs = {} + for ddf in [borne, enjeux, bilan, *lst_df]: + dict_dfs[ddf.name] = ddf + + # Ecriture du multi-tableau des résultats + print((dt.now() - init).total_seconds()) + pycen.write_bilan(dict_dfs, PATH_OUT) + + # Jointure des géometries sur Bilan et Enjeux + bilan = bilan.merge(df,how='left',on='id_site') + bilan = bilan.set_geometry('geom') + bilan.rename(columns={'id_site':'site_code'}, inplace=True) + bilan.name = 'Bilan' + enjeux = enjeux.merge(df,how='left',on='id_site') + enjeux = enjeux.set_geometry('geom') + enjeux.rename(columns={'id_site':'site_code'}, inplace=True) + enjeux.name = 'Enjeux' + + # Ecriture du géopackage + df_bio.to_file(PATH_OUT[:-4]+'gpkg', layer=df_bio.name,driver='GPKG') + df_hyd.to_file(PATH_OUT[:-4]+'gpkg', layer=df_hyd.name,driver='GPKG') + df_phy.to_file(PATH_OUT[:-4]+'gpkg', layer=df_phy.name,driver='GPKG') + df_mlt.to_file(PATH_OUT[:-4]+'gpkg', layer=df_mlt.name,driver='GPKG') + df_pre.to_file(PATH_OUT[:-4]+'gpkg', layer=df_pre.name,driver='GPKG') + enjeux.to_file(PATH_OUT[:-4]+'gpkg', layer=enjeux.name,driver='GPKG') + bilan.to_file( PATH_OUT[:-4]+'gpkg', layer=bilan.name, driver='GPKG') + + + from sys import exit + print('') + print((dt.now() - init).total_seconds()) + exit('END PGZH') \ No newline at end of file diff --git a/tutu.py b/tutu.py new file mode 100644 index 0000000..abc62a2 --- /dev/null +++ b/tutu.py @@ -0,0 +1,8 @@ +c1 = df.regne.unique() +c2 = df.phylum.unique() +c3 = df.classe.unique() +c4 = df.ordre.unique() +c5 = df.famille.unique() +c6 = df.sous_famille.unique() + +length = len(c1) + len(c2) + len(c3) + len(c4) + len(c5) + len(c6) \ No newline at end of file diff --git a/update_to_sql.py b/update_to_sql.py new file mode 100644 index 0000000..78ccd8d --- /dev/null +++ b/update_to_sql.py @@ -0,0 +1,76 @@ +def __get_pkey__(engine,table_name,schema): + pk = engine.dialect.get_pk_constraint(engine,table_name=table_name,schema=schema) + return pk + +def __get_dtype__(engine,table_name,schema): + cols = engine.dialect.get_columns(engine,table_name=table_name,schema=schema) + type_cols = {i['name']:i['type'] for i in cols} + return type_cols + +def update_to_sql(df, con, table_name, schema_name, key_name): + # from sys import exit + + a = [] + b = [] + table = table_name + schema = schema_name + primary_key = key_name + pkey = __get_pkey__( + con,table_name=table,schema=schema) + type_cols = __get_dtype__( + con,table_name=table,schema=schema) + + if not all(item in pkey['constrained_columns'] for item in df.columns): + print('Le(s) champs clé primaire "%s" ne figure pas dans le DataFrame'%pkey['constrained_columns']) + Q = input('Voulez-vous continuer la mise à jour ? (y/n) ') + if Q.lower() == 'y': + pass + else : + return print('Données non mise à jour') + # if pkey not in df.columns: + # exit('Le champs clé primaire "%s" ne figure pas dans le DataFrame'%pkey) + + if isinstance(primary_key, str): + primary_key = [primary_key] + + for col in df.columns: + if col in primary_key: + b.append("t.{col}=f.{col}".format(col=col)) + else: + dtype = type_cols[col] + if hasattr(dtype,'enums') : + dty = '.'.join([dtype.schema,dtype.name]) + a.append("{col}=t.{col}::{typ}".format(col=col,typ=dty)) + else: + a.append("{col}=t.{col}".format(col=col)) + + if isinstance(df, gpd.GeoDataFrame): + df.to_postgis( + name = 'temp_table', + con = con, + schema = schema, + if_exists = 'replace', + geom_col = df.geometry.name + ) + else: + df.to_sql( + name = 'temp_table', + con = con, + schema = schema, + if_exists = 'replace', + index = False, + method = 'multi' + ) + + update_stmt_1 = "UPDATE {sch}.{final_table} f".format(sch=schema,final_table=table) + update_stmt_2 = " FROM {sch}.temp_table t".format(sch=schema) + update_stmt_6 = " WHERE %s"%' AND '.join(b) + update_stmt_3 = " SET " + update_stmt_4 = ", ".join(a) + update_stmt_5 = update_stmt_1 + update_stmt_3 + update_stmt_4 + update_stmt_2 + update_stmt_6 + ";" + drop_stmt = "DROP TABLE {sch}.temp_table ;".format(sch=schema) + with con.begin() as cnx: + cnx.execute(update_stmt_5) + cnx.execute(drop_stmt) + return print('END update') + diff --git a/update_zonage_site.py b/update_zonage_site.py new file mode 100644 index 0000000..1b73389 --- /dev/null +++ b/update_zonage_site.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*-. + +from sqlalchemy import create_engine #, text +from sqlalchemy.engine import URL +import geopandas as gpd +import pandas as pd +from pycen import update_to_sql +from pycen.tools import Polygons_to_MultiPolygon +from datetime import datetime as dt + +path = '/home/colas/Documents/9_PROJETS/0_FONCIER/MAJ_SITE/' +site = 'ARSE' +f_zi = 'extension_PL_V2.shp' +f_zo = 'extension_PL_V2.shp' + +path_file = path+site+'/'+f_zi +typ_zon = 'ZO' +id = 117 +nom_site = "Tourbière de l'Arselle et le lac Achard" + +# Parametres bdd +user = 'cgeier' +pwd = 'adm1n*bdCen' +adr = '192.168.0.189' +port = '5432' +base = 'bd-cen-38' + +url = URL.create('postgresql+psycopg2', + username=user, + password=pwd, + host=adr, + database=base, +) +con = create_engine(url) + +keep_col = [ + 'id','code_site','type_zonage','nom_site', + 'date_maj','surface_ha','geom' + ] + +gdf = gpd.read_file(path_file) +gdf.columns = gdf.columns.str.lower() +gdf.rename_geometry('geom', inplace=True) +if 'Polygon' in gdf.geom_type.unique(): + gdf = Polygons_to_MultiPolygon(gdf) +# gdf['geom'] = gdf.unary_union +gdf['date_maj'] = dt.now().date() +gdf['date_maj'] = pd.to_datetime(gdf['date_maj']) +gdf['surface_ha'] = (gdf.area / 10000).round(2) +gdf['type_zonage'] = typ_zon +gdf['id'] = id +gdf['nom_site'] = nom_site + + +kp_col = gdf.columns[gdf.columns.isin(keep_col)] +gdf = gdf[kp_col] +# gdf = gdf.drop_duplicates() + +update_to_sql( + gdf, + con, + table_name='c_sites_zonages', + schema_name='sites', + key_name='id' +) \ No newline at end of file