Python_scripts/tmp/CVB_bourbre_MC_penible.py

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

import os
from pathlib import Path
import pandas as pd
import geopandas as gpd


path_fichsite = '/media/colas/SRV/FICHIERS/OPERATIONS (EX ETUDES)/ETUDES EN COURS/CORRIDORS/CVB_Bourbre/#42_Mesures compensatoires/BM/Evaluation des sites'
path_geom = '/media/colas/SRV/FICHIERS/OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/ETUDES/CVB_Bourbre/Cartographie CVB Bourbre 2021/Mesures compensatoires/0_FINAL'
c_geom = 'mc_isere_CVBbourbre.gpkg'
c_geom_det = 'mc_isere_CVBbourbre_détaillé.gpkg'
pathout = '/home/colas/Documents/tmp/CVB_Bourbre'

dic = {
        'Code cadastral' : 'N° cadast',
        "Date d'évaluation": 'date',
        "Date d'Evaluation": 'date',
        'RESULTAT' : 'Note',
        'APPARTENANCE A UN CORRIDOR SRCE' : 'corridor',
        'PATRIMOINE NATUREL' : 'patrim_nat',
        'CONNECTIVITE' : 'connectiv',
        'POTENTIEL DE RESTAURATION' : 'potentiel_restau',
        'Foncier ?' : 'foncier',
}

gdf = gpd.read_file(Path(path_geom) / c_geom)
gdf.loc[(gdf['N° cadast']=='0A0816'),'Note'] = 35.59
gdf.loc[gdf['N° cadast']=='AB0063','Note'] = 48.18
gdf.loc[gdf['N° cadast']=='AO0775','N° cadast'] = '0A0775'
gdf.loc[gdf['N° cadast']=='A0321','N° cadast'] = '0A0321'
gdf.loc[gdf['N° cadast']=='D0430','N° cadast'] = '0D0430'
gdf.loc[gdf['N° cadast']=='D0719','N° cadast'] = '0D0719'
gdf.loc[gdf['N° cadast']=='AB0006','N° cadast'] = 'AB0009'
gdf.loc[gdf['N° cadast']=='AM0154','N° cadast'] = 'CM0154'
gdf.loc[gdf['N° cadast']=='AM1032','N° cadast'] = 'AM0132'
gdf.loc[gdf['N° cadast']=='AK0098','N° cadast'] = 'AI0098'
gdf.loc[gdf['N° cadast']=='AI00109','N° cadast'] = 'AI0109'
gdf.loc[gdf['N° cadast']=='00645','N° cadast'] = '0F0645'
gdf.loc[gdf['N° cadast']=='DS0252','N° cadast'] = '0C0252'
gdf.loc[gdf['Commune']=='Trep','Commune'] = 'Trept'
gdf.loc[gdf['Commune']=='Villefoantaine','Commune'] = 'Villefontaine'
gdf.loc[gdf['Commune']=='Villefonatine','Commune'] = 'Villefontaine'
gdf.loc[gdf['Commune']=='La Verpillère','Commune'] = 'La Verpillière'
gdf.loc[gdf['Commune']=='La Veprillière','Commune'] = 'La Verpillière'
gdf.loc[gdf['Commune']=='Vaulx_Milieu','Commune'] = 'Vaulx-Milieu'
gdf.loc[gdf['Commune']=='Valx-Milieu','Commune'] = 'Vaulx-Milieu'
gdf.loc[gdf['Commune']=='Saint-Qentin-Fallavier','Commune'] = 'Saint-Quentin-Fallavier'

def recup_details_note_parcelle(path):
    DF = pd.DataFrame()
    for roots, dirs, files in os.walk(path):
        print(dirs)
        for f in files:
            if not f.endswith('.xlsx'):
                continue
            if f in ['fichier de saisie.xlsx', 'MC-Evaluation des sites.xlsx']:
                continue
            if f.startswith('.~'):
                continue
            if f.__contains__('origine'):
                continue

            print(f)
            xl = pd.ExcelFile(Path(roots)/f)
            xl.sheet_names
            for sht in xl.sheet_names:
                if sht.__contains__('AM0132'):
                    continue

                df = pd.read_excel(Path(roots)/f, usecols="C:D",nrows=50, sheet_name=sht)
                df.drop_duplicates(inplace=True)
                if df.empty:
                    continue

                index = df.columns[0]
                df.set_index(index, inplace=True)
                lst_index = [*dic.keys() ]
                df = df[df.index.isin(lst_index)]
                if df.empty:
                    continue
                df = df.T
                df.reset_index(inplace=True, drop=True)
                df.rename(columns=dic, inplace=True)
                df.columns.name = None
                df['sites'] = f
                df['feuillet'] = sht
                if df.columns.duplicated(keep=False).any():
                    merge_col = df.columns[df.columns.duplicated(keep=False)]
                if not DF.empty and not df['N° cadast'].isin(DF['N° cadast']).all() :
                    DF = pd.concat([DF,df],ignore_index=True)
                else:
                    DF = pd.concat([DF,df],ignore_index=True)
    return DF

def normalize_colcadast(col):
    col = col.replace([', ',' \+ ',' \(','\)'],['/','/','',''],regex=True)
    col = col.replace(['\+'],['/'],regex=True)
    return col

def split_cadast(df,col):
    tmp = df[col].str.split('/',expand=True).stack().reset_index(-1,drop=True)
    tmp.name = 'split'
    df = pd.merge(df,tmp,right_index=True,left_index=True)
    df.drop_duplicates(inplace=True)
    df.drop_duplicates(subset=['Note','split'],inplace=True)
    del df[col]
    df.rename(columns={'split':col},inplace=True)
    df.reset_index(inplace=True,drop=True)
    return df


df21 = recup_details_note_parcelle(Path(path_fichsite)/'2021')
df20 = recup_details_note_parcelle(Path(path_fichsite)/'2020')
df19 = recup_details_note_parcelle(Path(path_fichsite)/'2019')
df21['N° cadast'] = normalize_colcadast(df21['N° cadast'])
df20['N° cadast'] = normalize_colcadast(df20['N° cadast'])
df19['N° cadast'] = normalize_colcadast(df19['N° cadast'])
df21 = split_cadast(df21,'N° cadast')
df20 = split_cadast(df20,'N° cadast')
df19 = split_cadast(df19,'N° cadast')

# Notes réalisé en 2021 mais publié avec son année d'origine
N19 = ['AB0008']
N20 = ['0C0366']
DF21 = df21[~df21['N° cadast'].isin([*N19,*N20])]

# Notes non présentes en 2021
tmp20 = df20[
    (~df20['N° cadast'].isin(df21['N° cadast']))
    &(df20['N° cadast'].isin(gdf['N° cadast']))
]
tmp19 = df19[
    (~df19['N° cadast'].isin(df21['N° cadast']))
    &(~df19['N° cadast'].isin(df20['N° cadast']))
    &(df19['N° cadast'].isin(gdf['N° cadast']))
]

df = pd.concat([df21,df20,df19])
df.reset_index(inplace=True,drop=True)
df.loc[df['N° cadast']=='0C1003','N° cadast'] = '0C1003.4.5.6'
lsttmp = df[df['N° cadast'].isin(['0C1004','0C1005','0C1006'])].index
df.drop(lsttmp,inplace=True)
df.reset_index(inplace=True,drop=True)
cols_note = ['Note','corridor','patim_nat','connectiv','potentiel_restau','foncier']
df[cols_note] = df[cols_note].astype(float).round(2)
df[cols_note] = df[cols_note].fillna(0)
# df.drop(columns=['sites','feuillet'], inplace=True)
df['N_withFon'] = round(df['corridor'] + df['patim_nat'] + df['connectiv'] + df['potentiel_restau'] + df['foncier'], 2)
df['N_withoutFon'] = round(df['corridor'] + df['patim_nat'] + df['connectiv'] + df['potentiel_restau'], 2)
c_df = df.copy()

GDF = pd.merge(gdf,c_df,on=['N° cadast','Note'],how='left')


df['T_withFon'] = df['N_withFon'].eq(df['Note'])
df['T_withoutFon'] = df['N_withoutFon'].eq(df['Note'])
GDF['T_withFon'] = GDF['N_withFon'].eq(GDF['Note'])
GDF['T_withoutFon'] = GDF['N_withoutFon'].eq(GDF['Note'])


TT = GDF[~(GDF['T_withFon'] | GDF['T_withoutFon'])]
tt = df[~(df['T_withFon'] | df['T_withoutFon'])]

TT[(TT.sites.isna())&(TT.annee==2021)]
TT[(TT.sites.isna())&(TT.annee==2020)]
TT[(TT.sites.isna())&(TT.annee==2019)]
TT[(~TT.sites.isna())]

#####################
#####################
#####################
gdf.loc[gdf['N° cadast']=='AO0775','N° cadast'] = '0A0775'
gdf.loc[gdf['N° cadast']=='A0321','N° cadast']  = '0A0321'
gdf.loc[gdf['N° cadast']=='D0430','N° cadast']  = '0D0430'
gdf.loc[gdf['N° cadast']=='D0719','N° cadast']  = '0D0719'
gdf.loc[gdf['N° cadast']=='AB0006','N° cadast'] = 'AB0009'
gdf.loc[gdf['N° cadast']=='AM0154','N° cadast'] = 'CM0154'
gdf.loc[gdf['N° cadast']=='AM1032','N° cadast'] = 'AM0132'
gdf.loc[gdf['N° cadast']=='AK0098','N° cadast'] = 'AI0098'
gdf.loc[gdf['N° cadast']=='AI00109','N° cadast'] = 'AI0109'
gdf.loc[gdf['N° cadast']=='00645','N° cadast']  = '0F0645'
gdf.loc[gdf['N° cadast']=='DS0252','N° cadast'] = '0C0252'
gdf.loc[gdf['Commune']=='Trep','Commune'] = 'Trept'
gdf.loc[gdf['Commune']=='Villefoantaine','Commune'] = 'Villefontaine'

gdf.to_file(pathout+'/'+ c_geom, driver='GPKG')


#####################
#####################
#####################
path = '/media/colas/SRV/FICHIERS/OUTILS/CARTOGRAPHIE/ESPACE DE TRAVAIL/ETUDES/CVB_Bourbre/Cartographie CVB Bourbre 2021/Mesures compensatoires'
l1 = 'LOT 1.shp'
l2 = 'LOT 2.shp'
l3 = 'LOT 3.shp'
l4 = 'LOT 4 VALIDE PARTENAIRE.shp'
gdf1 = gpd.read_file(Path(path)/l1)
gdf2 = gpd.read_file(Path(path)/l2)
gdf3 = gpd.read_file(Path(path)/l3)
gdf4 = gpd.read_file(Path(path)/'SIG lot 4 SAFER'/l4)

print(gdf1[gdf1.Parcelle.str.contains('AI0098|AK0068|DS0252')])
print(gdf2[gdf2.Parcelle.str.contains('AI0098|AK0068|DS0252')])
print(gdf3[gdf3.Parcelle.str.contains('AI0098|AK0068|DS0252')])
print(gdf4[gdf4.Parcelle.str.contains('AI0098|AK0068|DS0252',na=False)])


#####################
#####################
#####################
#####################

dic = {
    'corridor' : 'N_corridor',
    'patim_nat' : 'N_patrim_nat',
    'connectiv' : 'N_connectiv',
    'potentiel_restau' : 'N_pot_rest',
    'foncier' : 'N_foncier',
    'T_withFon' : 'N_withFon',
}
GDF.rename(columns=dic,inplace=True)

tmp = DF['N° cadast'].str.split('/',expand=True).stack().reset_index(-1,drop=True)
tmp.name = 'split'
df2019 = pd.merge(DF,tmp,right_index=True,left_index=True)


df2020 = pd.merge(DF,tmp,right_index=True,left_index=True)
df2020.drop_duplicates(inplace=True)
df2020.drop_duplicates(subset=['Note','split'],inplace=True)


tmp = df2020[(~df2020.split.isin(df21.split))&(df2020.split.isin(gdf['N° cadast']))]

tmp = df2019[(~df2019.split.isin(df2021.split))&(~df2019.split.isin(df2020.split))&(df2019.split.isin(gdf['N° cadast']))]


df2021 = pd.merge(DF,tmp,right_index=True,left_index=True)
df2021.drop_duplicates(inplace=True)
df2021.drop_duplicates(subset=['Note','split'],inplace=True)

DF_FINAL = pd.concat([df2021,tmp])

DF_FINAL = pd.concat([DF_FINAL,tmp])


df2021[~df2021['split'].isin(gdf['N° cadast'])].shape
df2021[df2021['split'].isin(gdf['N° cadast'])].shape