771 lines
28 KiB
Python
Executable File
771 lines
28 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: UTF-8 -*-
|
|
#Nom : : foncier_insert_table.py
|
|
#Description : Insertion des données cadastrales à la base <foncier> après de sa création.
|
|
#Copyright : 2021, CEN38
|
|
#Auteur : Colas Geier
|
|
#Version : 1.0
|
|
|
|
|
|
import pandas as pd
|
|
# import numpy as np
|
|
from sqlalchemy import create_engine, text
|
|
from geoalchemy2 import Geometry
|
|
import gc
|
|
import sys
|
|
# import time
|
|
import datetime as dt
|
|
# from pycen import bdd
|
|
# from shapely.geometry.multipolygon import MultiPolygon
|
|
|
|
|
|
# Parametrage geopandas
|
|
import geopandas as gpd
|
|
import warnings; warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning)
|
|
# import shapely
|
|
# shapely.speedups.disable()
|
|
# gpd.options.use_pygeos = True
|
|
|
|
|
|
# start_time = dt.datetime.today()
|
|
# tmp = dt.datetime.today() - start_time
|
|
|
|
|
|
check_duplicates = False
|
|
# Parametres bdd CADASTRE (in)
|
|
# Données de sortie du plugin qgis "Cadastre"
|
|
user_cad = 'postgres'
|
|
pwd_cad = 'foncier_test1'
|
|
adr_cad = '172.17.0.2'
|
|
port_cad = '5432'
|
|
base_cad = 'postgres'
|
|
schema_cad = '202007'
|
|
|
|
|
|
# Parametres bdd FONCIER (out)
|
|
user_fon = 'postgres'
|
|
pwd_fon = 'tutu'
|
|
adr_fon = '192.168.60.9'
|
|
port_fon = '5432'
|
|
base_fon = 'bd_cen'
|
|
schema_fon = 'cadastre'
|
|
|
|
|
|
# Correspondance entre les tables
|
|
crs = 'EPSG:2154'
|
|
dpt_nom_tab = '_73'
|
|
chunk = 100000
|
|
list_dep = ['07', '26', '42', '38']
|
|
|
|
FIND_DOUBLON = [{
|
|
'tab_in': 'proprietaire',
|
|
'on_col': ['ddenom', 'dprnlp', 'dldnss','jdatnss','ccogrm','dsglpm','dnatpr'] }
|
|
]
|
|
|
|
DICT_TAB = [{
|
|
'table_in' : 'proprietaire', # Table source qui provient de la sortie du plugin cadastre de qgis
|
|
'index_tab': 'proprietaire', # Pkey de la table source
|
|
'columns_in': ['ccodep', 'ccocom', 'dnupro',
|
|
'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'],
|
|
'table_out': [{
|
|
'name': 'cptprop{}'.format(dpt_nom_tab),
|
|
'geom': None,
|
|
'drop_escape': False, # Supprime les champs vides à l'intérieure des chaines de carractères
|
|
'columns_in': ['ccodep', 'ccocom', 'dnupro'], # Liste des columns à récupérer en entrée.
|
|
'columns_add': {'dnupro': ['ccodep', 'ccocom', 'dnupro']}, # Définition des champs composés devant être ajoutés
|
|
'unique': {'cols': ['dnupro'], 'keep': 'first'}, # Champs devant être uniques à l'intérieur de la table en sortie
|
|
'dict': None, # Dictionnaire pour renommer les champs {'ancien_nom1': 'nouveau_nom1', 'ancien_nom2': 'nouveau_nom2', ...}
|
|
'join': [{
|
|
'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat',
|
|
'select_cols' : ['ccodep', 'ccocom', 'dnupro']},{
|
|
'bdd': 'in', 'table': 'lots', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat',
|
|
'select_cols' : ['ccodep', 'ccocom', 'dnuprol'],'dict': {'dnuprol': 'dnupro'}},{
|
|
'bdd': 'in', 'table': 'parcelle', 'on': ['ccodep', 'ccocom', 'dnupro'], 'type': 'concat',
|
|
'select_cols' : ['ccodep', 'ccocom', 'dnupro']},]
|
|
},{
|
|
'name': 'proprios{}'.format(dpt_nom_tab),
|
|
'geom': None,
|
|
'drop_escape': True,
|
|
'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm'],
|
|
'columns_add': {'dnuper': ['ccodep', 'dnuper']},
|
|
'unique': {'cols': ['dnuper'], 'keep': 'first'},
|
|
'dict': None,
|
|
'join': False
|
|
},{
|
|
'name': 'r_prop_cptprop{}'.format(dpt_nom_tab),
|
|
'geom': None,
|
|
'drop_escape': True,
|
|
'columns_in': ['ccodep', 'dnuper', 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'],
|
|
'columns_add': {
|
|
'dnuper': ['ccodep', 'dnuper'],
|
|
'dnupro': ['ccodep', 'ccocom', 'dnupro']},
|
|
'unique': {'cols': ['dnupro', 'dnuper'], 'keep': 'first'},
|
|
'dict': None,
|
|
'join': False
|
|
},]
|
|
},{
|
|
'table_in' : 'parcelle',
|
|
'index_tab': 'parcelle',
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'dvoilib', 'type_filiation', 'dnupro'],
|
|
'table_out': [{
|
|
'name': 'vl{}'.format(dpt_nom_tab),
|
|
'geom': None,
|
|
'drop_escape': False,
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccovoi', 'dvoilib'],
|
|
'columns_add': {
|
|
'vl_id': ['ccodep', 'ccocom', 'ccovoi'],
|
|
'geom': None},
|
|
'unique': {'cols': ['vl_id'], 'keep': 'first'},
|
|
'dict': {'dvoilib': 'libelle'},
|
|
'join': [{ # ERROR ! 2 dclssf pour 1 lot_id
|
|
'bdd': 'in', 'table': 'voie', 'on': ['ccodep', 'ccocom', 'ccovoi'], 'type': 'concat',
|
|
'select_cols' : ['ccodep', 'ccocom', 'codvoi', 'libvoi'],
|
|
'dict': {'libvoi': 'libelle', 'codvoi': 'ccovoi'},
|
|
}]
|
|
},{
|
|
'name': 'parcelles{}'.format(dpt_nom_tab),
|
|
'geom': {
|
|
'table_geom_in': 'geo_parcelle',
|
|
'index_geom': 'geo_parcelle'
|
|
},
|
|
'drop_escape': True,
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'ccovoi', 'dparpi', 'dcntpa', 'ccocomm', 'ccoprem', 'ccosecm', 'dnuplam', 'type_filiation'],
|
|
'columns_add': {
|
|
'par_id': ['ccodep', 'ccocom', 'ccopre','ccosec', 'dnupla'],
|
|
'codcom': ['ccodep', 'ccocom'],
|
|
'vl_id': ['ccodep', 'ccocom', 'ccovoi'],
|
|
'typprop_id': None },
|
|
'unique': False,
|
|
'dict': {'type_filiation': 'type'},
|
|
'join': False
|
|
},{
|
|
'name': 'lots{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526)
|
|
'geom': None,
|
|
'drop_escape': True,
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dcntpa'],
|
|
'columns_add': {
|
|
'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],
|
|
'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],
|
|
'dnulot': None, },
|
|
'unique': False,
|
|
'dict': {'dcntpa': 'dcntlo'},
|
|
'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(dpt_nom_tab), 'on': ['par_id'], 'type': 'isin',
|
|
'select_cols' :['par_id'] }]
|
|
# },{
|
|
# 'name': 'cptprop{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526)
|
|
# 'geom': None,
|
|
# 'drop_escape': True,
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'dnupro'],
|
|
# 'columns_add': {
|
|
# 'dnupro': ['ccodep', 'ccocom', 'dnupro'],
|
|
# },
|
|
# 'unique': {'cols': ['dnupro'], 'keep': 'first'},
|
|
# 'dict': None,
|
|
# 'join': [{'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'notin',
|
|
# 'select_cols' :['dnupro'] }]
|
|
},]
|
|
# },{
|
|
# 'table_in' : 'suf',
|
|
# 'index_tab': 'suf',
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'dnupro'],
|
|
# 'table_out': [{
|
|
# 'name': 'cptprop{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526)
|
|
# 'geom': None,
|
|
# 'drop_escape': True,
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'dnupro'],
|
|
# 'columns_add': {
|
|
# 'dnupro': ['ccodep', 'ccocom', 'dnupro'],
|
|
# },
|
|
# 'unique': {'cols': ['dnupro'], 'keep': 'first'},
|
|
# 'dict': None,
|
|
# 'join': [{'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'notin',
|
|
# 'select_cols' :['dnupro'] }]
|
|
# },]
|
|
},{
|
|
'table_in' : 'lots',
|
|
'index_tab': 'lots',
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo', 'dnuprol'],
|
|
'table_out': [{
|
|
# 'name': 'cptprop{}'.format(dpt_nom_tab), # !!!!!! Ne trouve pas de parcelles sans lots (ex: 38357000AE0526)
|
|
# 'geom': None,
|
|
# 'drop_escape': True,
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'dnuprol'],
|
|
# 'columns_add': {
|
|
# 'dnupro': ['ccodep', 'ccocom', 'dnuprol'],
|
|
# },
|
|
# 'unique': {'cols': ['dnupro'], 'keep': 'first'},
|
|
# 'dict': None,
|
|
# 'join': [{'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'notin',
|
|
# 'select_cols' :['dnupro'] }]
|
|
# },{
|
|
'name': 'lots{}'.format(dpt_nom_tab), # !!!!!! parcelles avec lots: existe par_id NOT IN parcelles_73
|
|
'geom': None,
|
|
'drop_escape': True,
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupdl', 'dcntlo'],
|
|
'columns_add': {
|
|
'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],
|
|
'par_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],},
|
|
'unique': {'cols': ['lot_id'], 'keep': 'first'},
|
|
'dict': None,
|
|
'join': [{'bdd': 'out', 'table': 'parcelles{}'.format(dpt_nom_tab), 'on': ['par_id'], 'type': 'isin',
|
|
'select_cols' :['par_id'] }]
|
|
},{
|
|
'name': 'lots_natcult{}'.format(dpt_nom_tab),
|
|
'geom': None,
|
|
'drop_escape': True,
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],
|
|
'columns_add': {
|
|
'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],},
|
|
'unique': {'cols': ['lot_id'], 'keep': 'first'},
|
|
'dict': None,
|
|
'join': [{ # ERROR ! 2 dclssf pour 1 lot_id
|
|
'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'], 'type': 'merge',
|
|
'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot','dsgrpf','cnatsp','dclssf','ccosub','dcntsf'],
|
|
},{
|
|
'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin',
|
|
'select_cols' :['lot_id'] }]
|
|
},{
|
|
'name': 'cadastre{}'.format(dpt_nom_tab),
|
|
'geom': None,
|
|
'drop_escape': True,
|
|
'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnuprol'],
|
|
'columns_add': {
|
|
'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot'],
|
|
'dnupro': ['ccodep', 'ccocom', 'dnuprol'],},
|
|
'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'},
|
|
'dict': None,
|
|
'join': [{ # ERROR ! 2 dclssf pour 1 lot_id
|
|
'bdd': 'in', 'table': 'suf', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnuprol'], 'type': 'concat',
|
|
'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnulot', 'dnupro'], 'dict': {'dnupro': 'dnuprol'}
|
|
},{
|
|
'bdd': 'in', 'table': 'parcelle', 'on': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnuprol'], 'type': 'concat',
|
|
'select_cols' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'], 'dict': {'dnupro': 'dnuprol'}
|
|
},{
|
|
'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin',
|
|
'select_cols' :['lot_id'] },{
|
|
'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'isin',
|
|
'select_cols' :['dnupro'] },]
|
|
},]
|
|
# },{
|
|
# 'table_in' : 'proprietaire', # Table source qui provient de la sortie du plugin cadastre de qgis
|
|
# 'index_tab': 'proprietaire', # Pkey de la table source
|
|
# 'columns_in': ['ccodep', 'dnuper', 'ccoqua', 'ddenom', 'jdatnss', 'dldnss', 'dsglpm', 'dlign3', 'dlign4', 'dlign5', 'dlign6', 'dnatpr', 'gtoper', 'ccogrm',
|
|
# 'ccocom', 'dnupro', 'dnomlp', 'dprnlp', 'epxnee', 'dnomcp', 'dprncp', 'ccodro', 'ccodem'],
|
|
# 'table_out': []
|
|
# },{
|
|
# 'table_in' : 'parcelle',
|
|
# 'index_tab': 'parcelle',
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'],
|
|
# 'table_out': [{
|
|
# 'name': 'cadastre{}'.format(dpt_nom_tab),
|
|
# 'geom': None,
|
|
# 'drop_escape': True,
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'],
|
|
# 'columns_add': {
|
|
# 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],
|
|
# 'dnupro': ['ccodep', 'ccocom', 'dnupro'],},
|
|
# 'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'},
|
|
# 'dict': None,
|
|
# 'join': [{
|
|
# 'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin',
|
|
# 'select_cols' :['lot_id'], 'where': {'dnulot': None} },{
|
|
# 'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'isin',
|
|
# 'select_cols' :['dnupro'] },]
|
|
# },]
|
|
# },{
|
|
# 'table_in' : 'parcelle',
|
|
# 'index_tab': 'parcelle',
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'],
|
|
# 'table_out': [{
|
|
# 'name': 'cadastre{}'.format(dpt_nom_tab),
|
|
# 'geom': None,
|
|
# 'drop_escape': True,
|
|
# 'columns_in' : ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla', 'dnupro'],
|
|
# 'columns_add': {
|
|
# 'lot_id': ['ccodep', 'ccocom', 'ccopre', 'ccosec', 'dnupla'],
|
|
# 'dnupro': ['ccodep', 'ccocom', 'dnupro'],},
|
|
# 'unique': {'cols': ['lot_id', 'dnupro'], 'keep': 'first'},
|
|
# 'dict': None,
|
|
# 'join': [{
|
|
# 'bdd': 'out', 'table': 'lots{}'.format(dpt_nom_tab), 'on': ['lot_id'], 'type': 'isin',
|
|
# 'select_cols' :['lot_id'], },{
|
|
# 'bdd': 'out', 'table': 'cptprop{}'.format(dpt_nom_tab), 'on': ['dnupro'], 'type': 'isin',
|
|
# 'select_cols' :['dnupro'] },]
|
|
# },]
|
|
}]
|
|
|
|
|
|
|
|
# # Connexion bdd
|
|
# bd_cad = bdd.CEN(
|
|
# user = user_cad,
|
|
# pwd = pwd_cad,
|
|
# adr = adr_cad,
|
|
# base = base_cad
|
|
# # schema = schema
|
|
# )
|
|
|
|
################################
|
|
########## Fonctions ##########
|
|
################################
|
|
start_time = dt.datetime.today()
|
|
def time_exec (init_time):
|
|
time = dt.datetime.today() - init_time
|
|
return str(time)
|
|
|
|
|
|
def replace_escape_by_0 (df):
|
|
# Remplacement des espaces dans les chaines de caractères par des 0
|
|
# if 'ccopre' in df.columns:
|
|
# df['ccopre'].replace([None, '', ' '], '000', inplace=True)
|
|
|
|
cols = ['ccopre', 'ccosec', 'dnupla', 'dparpi', 'dnuplam', 'dclssf', 'ccovoi']
|
|
for col in cols:
|
|
if col in df.columns:
|
|
df[col].replace([' '], '0', regex=True, inplace=True)
|
|
|
|
return df
|
|
|
|
|
|
def join_data (df, join, schema_in):
|
|
# Jointure des données avec une autre table
|
|
table = join['table']
|
|
bdd = join['bdd']
|
|
typ = join['type']
|
|
on = join['on']
|
|
if bdd == 'out':
|
|
con = engine_fon
|
|
sch = schema_fon
|
|
if bdd == 'in':
|
|
con = engine_cad
|
|
sch = schema_in
|
|
select_col = []
|
|
if 'select_cols' in join.keys():
|
|
select_col.extend(join['select_cols'])
|
|
if 'where' in join.keys():
|
|
select_col.extend(join['where'].keys())
|
|
|
|
tmp = pd.read_sql_table(
|
|
table_name = table,
|
|
con = con,
|
|
schema = sch,
|
|
columns = select_col
|
|
)
|
|
tmp = replace_escape_by_0(tmp)
|
|
if 'dict' in join.keys():
|
|
tmp.rename(columns=join['dict'], inplace=True)
|
|
if 'where' in join.keys():
|
|
where = join['where']
|
|
for key in where.keys():
|
|
tmp = tmp[tmp[key] == where[key] ]
|
|
|
|
if typ in ['isin', 'notin']:
|
|
# on = on[0]
|
|
for d in [df, tmp]:
|
|
d['on'] = ''
|
|
for col in on:
|
|
d['on'] += d[col].astype(str)
|
|
if typ == 'isin':
|
|
df = df[df['on'].isin(tmp['on'])]
|
|
if typ == 'notin':
|
|
df = df[~df['on'].isin(tmp['on'])]
|
|
df.drop(columns='on', inplace=True)
|
|
# if typ == 'notin':
|
|
# on = on[0]
|
|
# df = df[~df[on].isin(tmp[on])]
|
|
# df = pd.concat([df,tmp]).drop_duplicates(on, keep=False)
|
|
if typ == 'merge':
|
|
df = df.merge(tmp, on = on, how='left')
|
|
if typ == 'concat':
|
|
df = pd.concat([df,tmp], ignore_index=True).drop_duplicates()
|
|
|
|
return df
|
|
|
|
def get_geom_parcelle (df,get_geo,schema):
|
|
print('INIT import geodata ........... %s sec'%( time_exec(start_time) ))
|
|
|
|
# Définition des variables géometriques
|
|
ind_geo = get_geo['index_geom']
|
|
tab_geo = get_geo['table_geom_in']
|
|
|
|
sql = """select distinct on (t2.{0})
|
|
t2.{0},
|
|
t1.geom,
|
|
t1.supf::integer as dcntpa -- récupération de la contenance cadastrale associée car présence de géometrie non référencées dans la table "parcelles"
|
|
FROM "{1}".{2} t1
|
|
INNER JOIN (select distinct on ({0}) {0}, max(creat_date) creat_date, max(update_dat) update_dat FROM "{1}".{2} GROUP BY ({0})) t2
|
|
USING ({0}, creat_date, update_dat)""".format(ind_geo, schema, tab_geo)
|
|
tmp = gpd.read_postgis(
|
|
sql = sql,
|
|
con = engine_cad,
|
|
geom_col = 'geom',
|
|
crs = crs,
|
|
chunksize = chunk,
|
|
)
|
|
|
|
if chunk:
|
|
gdf = gpd.GeoDataFrame(pd.concat(tmp, ignore_index=True))
|
|
else:
|
|
gdf = tmp.copy()
|
|
# del tmp; gc.collect()
|
|
# gdf = tmp.copy()
|
|
del tmp
|
|
gdf.set_index(ind_geo, inplace=True)
|
|
gdf.index.name = ind_in
|
|
print('END import geodata ........... %s sec'%( time_exec(start_time) ))
|
|
|
|
|
|
print('INIT merge data - geodata ........... %s sec'%( time_exec(start_time) ))
|
|
if not gdf[gdf.dcntpa.isna()].empty:
|
|
gdf.dcntpa.fillna(0, inplace=True)
|
|
gdf['dcntpa'] = gdf['dcntpa'].astype(df.dtypes['dcntpa'].type)
|
|
# gdf = gdf.merge(df, on = [ind_in, 'dcntpa'], how='left')
|
|
tmp = gdf.merge(df, on = [ind_in, 'dcntpa'], how='right')
|
|
tmp = tmp.set_geometry('geom', drop=True, crs=crs)
|
|
tmp.rename(columns={'geometry': 'geom'}, inplace=True)
|
|
|
|
if tmp[tmp.geom.isna()].empty:
|
|
lst_ind_df = tmp[tmp.geom.isna()].index.tolist()
|
|
lst_ind_gdf = gdf.loc[gdf.index.isin(lst_ind_df)].index.tolist()
|
|
tmp.loc[tmp.index.isin(lst_ind_gdf), 'geom'] = gdf.loc[gdf.index.isin(lst_ind_gdf), 'geom']
|
|
|
|
del [gdf, df]
|
|
gdf = tmp.copy()
|
|
del tmp
|
|
export_data(gdf)
|
|
|
|
|
|
def export_data( df):
|
|
print('INIT export data TO {0}, {1} ........... {2} sec'.format(tab_out, df.shape[0], time_exec(start_time) ))
|
|
rang = [e for e in range(0, df.shape[0], chunk*5)]
|
|
for i, j in enumerate(rang):
|
|
if j == max(rang) :
|
|
jj = df.shape[0]
|
|
else:
|
|
jj = rang[i+1]
|
|
|
|
df_imp = df[j:jj].copy()
|
|
|
|
print('INIT export data TO {0} ..... {1}/{2} ...... {3} sec'.format(tab_out, jj, df.shape[0], time_exec(start_time) ))
|
|
if 'geom' in df.columns and not df[~df['geom'].isna()].empty :
|
|
df_imp = df_imp.set_geometry('geom', drop=True, crs=crs)
|
|
df_imp.rename(columns={'geometry': 'geom'}, inplace=True)
|
|
df_imp.to_postgis(
|
|
name = tab_out,
|
|
con = engine_fon,
|
|
schema = schema_fon,
|
|
index = False,
|
|
if_exists = 'append',
|
|
geom_col = 'geom',
|
|
chunksize = chunk,
|
|
)
|
|
else:
|
|
df_imp.to_sql(
|
|
name = tab_out,
|
|
con = engine_fon,
|
|
schema = schema_fon,
|
|
index = False,
|
|
if_exists = 'append',
|
|
chunksize = chunk,
|
|
method = 'multi',
|
|
)
|
|
print('END export data TO {0} ........... {1} sec'.format(tab_out, time_exec(start_time) ))
|
|
|
|
def optimize_data_frame(df):
|
|
columns = df.columns
|
|
for col in columns:
|
|
dtype = df[col].dtypes
|
|
# if dtype == 'int64' or dtype == 'int32':
|
|
len_col = len(df[col].unique())
|
|
if len_col <= df.shape[0]*0.8:
|
|
df[col] = df[col].astype('category')
|
|
|
|
return df
|
|
|
|
|
|
# Initiation des connexions bdd
|
|
engine_cad = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_cad,pwd_cad,adr_cad,port_cad,base_cad), echo=False)
|
|
engine_fon = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_fon,pwd_fon,adr_fon,port_fon,base_fon), echo=False)
|
|
con_cad = engine_cad.connect()
|
|
con_fon = engine_fon.connect()
|
|
|
|
|
|
################################
|
|
########## Main ##########
|
|
################################
|
|
if __name__ == "__main__":
|
|
################
|
|
# CORRECTION DUPLICATES TABLE_IN
|
|
if check_duplicates:
|
|
for DOUBLON in FIND_DOUBLON:
|
|
tab = DOUBLON['tab_in']
|
|
on_col = DOUBLON['on_col']
|
|
for col in on_col:
|
|
for dep in list_dep:
|
|
schema_in = dep + '_' + schema_cad
|
|
sql = '''
|
|
-- il existe des doublons en raison d'orthographes voisines :
|
|
-- recherche de ces doublons
|
|
SELECT DISTINCT '{0}' as insee_dep, dnuper, string_agg(DISTINCT {1},' / ') as orthographes_voisines
|
|
FROM "{2}".{3} GROUP BY dnuper HAVING count(DISTINCT {1}) > 1'''.format(dep, col, schema_in, tab)
|
|
df = pd.read_sql(
|
|
sql = sql,
|
|
con = engine_cad,
|
|
)
|
|
if df.empty:
|
|
print('No duplicate value dep {0} table {1} column {2} ====> next request'.format(dep, tab, col))
|
|
continue
|
|
|
|
for i, row in df.iterrows():
|
|
dnuper = row.dnuper
|
|
choix = row.orthographes_voisines.split(' / ')
|
|
choix = [i.strip() for i in choix]
|
|
Question = input("""Des orthographes voisines existent pour l'identifiant : {0}
|
|
dans la colonne : {1}.
|
|
Les valeurs voisines sont : {2}
|
|
Ecrire la mise à jour du champs {1} à enregistrer (c cancel) :""".format(dnuper,col, choix))
|
|
if Question.lower() == 'c' or Question.lower() == 'cancel':
|
|
continue
|
|
|
|
update = '''UPDATE "{0}".{1}
|
|
SET {2} = '{3}'
|
|
WHERE {2} like '{4}%'
|
|
AND dnuper = '{5}';'''.format(schema_in, tab, col, Question, "%' OR {} like '".format(col).join(map(str,choix)), dnuper)
|
|
try:
|
|
con_cad.execute(text(update))
|
|
print('''
|
|
Update OK !''')
|
|
except Exception as exept:
|
|
print('ERROR : {0}'.format(update))
|
|
print(exept)
|
|
sys.exit()
|
|
|
|
|
|
|
|
################
|
|
# TRUNCATE TABLE OUT
|
|
for i, DICT in enumerate(DICT_TAB):
|
|
# continue
|
|
# Définition des variables
|
|
# i = 1
|
|
# if i != 2:
|
|
# continue
|
|
tab_in = DICT_TAB[i]['table_in']
|
|
col_in = DICT_TAB[i]['columns_in']
|
|
ind_in = DICT_TAB[i]['index_tab']
|
|
tabs_out = DICT_TAB[i]['table_out']
|
|
|
|
for tab_out in reversed(tabs_out):
|
|
# continue
|
|
sql = "TRUNCATE TABLE {0}.{1} CASCADE".format(schema_fon, tab_out['name'])
|
|
print(sql)
|
|
con_fon.execute(sql)
|
|
|
|
|
|
|
|
for dep in list_dep:
|
|
schema_in = dep + '_' + schema_cad
|
|
print('''
|
|
|
|
INIT import data FROM {}
|
|
|
|
'''.format(schema_in))
|
|
################
|
|
# IMPORT IN TABLE OUT
|
|
for i, DICT in enumerate(DICT_TAB):
|
|
# Définition des variables
|
|
# i = 1
|
|
# if i != 1:
|
|
# continue
|
|
tab_in = DICT_TAB[i]['table_in']
|
|
col_in = DICT_TAB[i]['columns_in']
|
|
ind_in = DICT_TAB[i]['index_tab']
|
|
tabs_out = DICT_TAB[i]['table_out']
|
|
|
|
# Import data
|
|
print('''
|
|
INIT import data FROM {0}........... {1} sec'''.format(tab_in, time_exec(start_time) ))
|
|
tmp = pd.read_sql_table(
|
|
table_name = tab_in,
|
|
con = engine_cad,
|
|
schema = schema_in,
|
|
columns = col_in + [ind_in],
|
|
chunksize = chunk,
|
|
)
|
|
|
|
# Mise en forme des données
|
|
# start_time = dt.datetime.today()
|
|
if chunk:
|
|
DF = pd.concat(tmp, ignore_index=True)
|
|
else:
|
|
DF = tmp.copy()
|
|
|
|
DF.drop_duplicates(inplace=True)
|
|
del tmp
|
|
# DF = optimize_data_frame(DF)
|
|
DF.set_index(ind_in, inplace=True)
|
|
print('END import data ........... %s sec'%( time_exec(start_time) ))
|
|
|
|
for tab in tabs_out:
|
|
tab_out = tab['name']
|
|
dictio = tab['dict']
|
|
col_df = tab['columns_in']
|
|
col_ad = tab['columns_add']
|
|
get_geo = tab['geom']
|
|
drp_esc = tab['drop_escape']
|
|
unique = tab['unique']
|
|
join = tab['join']
|
|
# if tab_out == 'parcelles_73':
|
|
# break
|
|
# continue
|
|
|
|
print('INIT TABLE {0} ........... {1} sec'.format(tab_out, time_exec(start_time) ))
|
|
df = DF[DF.columns.intersection(col_df)].copy()
|
|
# df = optimize_data_frame(df)
|
|
# del DF; gc.collect()
|
|
|
|
|
|
# Remplacement des espaces dans les chaines de caractères par des 0
|
|
df = replace_escape_by_0(df)
|
|
if drp_esc:
|
|
df_obj = df.select_dtypes(['object'])
|
|
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
|
|
# df.replace([' '], '', regex=True, inplace=True)
|
|
|
|
if dictio :
|
|
df.rename(columns=dictio, inplace=True)
|
|
|
|
if join :
|
|
for j in join:
|
|
if j['bdd'] == 'in' :
|
|
# sys.exit()
|
|
df = join_data(df, j, schema_in)
|
|
if df.empty:
|
|
print('df EMPTY ====> next table')
|
|
# pass
|
|
continue
|
|
|
|
# Ajout des champs additionnels
|
|
if col_ad :
|
|
print('INIT addition columns ........... %s sec'%( time_exec(start_time) ))
|
|
for key in col_ad.keys():
|
|
if key in df.columns:
|
|
df[key + '_tmp'] = df[key].copy()
|
|
col_ad[key] = [x if x != key else key+'_tmp' for x in col_ad[key]]
|
|
|
|
aggreg = col_ad[key]
|
|
if aggreg :
|
|
df[key] = ''
|
|
for col in aggreg:
|
|
df[key] += df[col].fillna('')
|
|
# df[key] = df[aggreg].agg(''.join, axis=1)
|
|
# break
|
|
else:
|
|
df[key] = aggreg
|
|
|
|
print('ADD column {0} : {1} ........... {2} sec'.format(key,aggreg, time_exec(start_time) ))
|
|
|
|
# JOINTURE
|
|
if join :
|
|
for j in join:
|
|
if j['bdd'] == 'out' :
|
|
# break
|
|
# sys.exit()
|
|
df = join_data(df, j, schema_in)
|
|
if df.empty:
|
|
print('df EMPTY ====> next table')
|
|
# pass
|
|
continue
|
|
|
|
# sys.exit()
|
|
if unique:
|
|
df.drop_duplicates(unique['cols'], keep=unique['keep'], inplace=True)
|
|
|
|
# Conservation des champs utiles à l'insertion en bdd
|
|
name_col_out = engine_fon.dialect.get_columns(engine_fon, tab_out, schema=schema_fon)
|
|
name_col_out = [ sub['name'] for sub in name_col_out ]
|
|
if 'geom' in name_col_out and 'geom' not in df.columns:
|
|
name_col_out.remove('geom')
|
|
df = df[df.columns.intersection(name_col_out)]
|
|
|
|
####################
|
|
# Read geodataframe
|
|
# Dans le cas où un champs géometrique est nécessaire.
|
|
if get_geo:
|
|
get_geom_parcelle(df=df, get_geo=get_geo, schema=schema_in)
|
|
# print('INIT import geodata ........... %s sec'%( time_exec(start_time) ))
|
|
|
|
# # Définition des variables géometriques
|
|
# ind_geo = get_geo['index_geom']
|
|
# tab_geo = get_geo['table_geom_in']
|
|
|
|
# # Get geodata from Postgis
|
|
# # sql = "SELECT {0}, geom FROM (SELECT {0}, geom, max(creat_date), max(update_dat) FROM {1}.{2})".format(ind_geo,schema_in,tab_geo)
|
|
|
|
# sql = """select distinct on (t2.{0})
|
|
# t2.{0},
|
|
# t1.geom,
|
|
# t1.supf::integer as dcntpa -- récupération de la contenance cadastrale associée car présence de géometrie non référencées dans la table "parcelles"
|
|
# FROM "{1}".{2} t1
|
|
# INNER JOIN (select distinct on ({0}) {0}, max(creat_date) creat_date, max(update_dat) update_dat FROM "{1}".{2} GROUP BY ({0})) t2
|
|
# USING ({0}, creat_date, update_dat)""".format(ind_geo, schema_in, tab_geo)
|
|
# tmp = gpd.read_postgis(
|
|
# sql = sql,
|
|
# con = engine_cad,
|
|
# geom_col = 'geom',
|
|
# crs = crs,
|
|
# chunksize = chunk,
|
|
# )
|
|
|
|
# if chunk:
|
|
# gdf = gpd.GeoDataFrame(pd.concat(tmp, ignore_index=True))
|
|
# else:
|
|
# gdf = tmp.copy()
|
|
# # del tmp; gc.collect()
|
|
# # gdf = tmp.copy()
|
|
# del tmp; gc.collect()
|
|
# gdf.set_index(ind_geo, inplace=True)
|
|
# gdf.index.name = ind_in
|
|
# print('END import geodata ........... %s sec'%( time_exec(start_time) ))
|
|
|
|
|
|
# print('INIT merge data - geodata ........... %s sec'%( time_exec(start_time) ))
|
|
# if not gdf[gdf.dcntpa.isna()].empty:
|
|
# gdf.dcntpa.fillna(0, inplace=True)
|
|
# gdf['dcntpa'] = gdf['dcntpa'].astype(df.dtypes['dcntpa'].type)
|
|
# # gdf = gdf.merge(df, on = [ind_in, 'dcntpa'], how='left')
|
|
# tmp = gdf.merge(df, on = [ind_in, 'dcntpa'], how='right')
|
|
# tmp = tmp.set_geometry('geom', drop=True, crs=crs)
|
|
# tmp.rename(columns={'geometry': 'geom'}, inplace=True)
|
|
|
|
# if tmp[tmp.geom.isna()].empty:
|
|
# lst_ind_df = tmp[tmp.geom.isna()].index.tolist()
|
|
# lst_ind_gdf = gdf.loc[gdf.index.isin(lst_ind_df)].index.tolist()
|
|
# tmp.loc[tmp.index.isin(lst_ind_gdf), 'geom'] = gdf.loc[gdf.index.isin(lst_ind_gdf), 'geom']
|
|
|
|
# del [gdf, df]; gc.collect()
|
|
# gdf = tmp.copy()
|
|
# del tmp; gc.collect()
|
|
# export_data(gdf)
|
|
# del gdf; gc.collect()
|
|
# récupération de la liste des géometries où l'id est non présentent dans la table parcelles
|
|
# lst = gdf[gdf.par_id.isna()].index.tolist()
|
|
# # Recomposition des infos principales
|
|
# par_id = [l.replace('0','',1) for l in lst]
|
|
# gdf.loc[gdf.index.isin(lst), 'par_id'] = par_id
|
|
# gdf.loc[gdf.index.isin(lst), 'codcom'] = [l[:5] for l in par_id]
|
|
# gdf.loc[gdf.index.isin(lst), 'ccopre'] = [l[5:8] for l in par_id]
|
|
# gdf.loc[gdf.index.isin(lst), 'ccosec'] = [l[8:10] for l in par_id]
|
|
# gdf.loc[gdf.index.isin(lst), 'dnupla'] = [l[10:14] for l in par_id]
|
|
# # gdf.loc[gdf.index.isin(lst), 'vl_id'] = [l[:8] for l in par_id]
|
|
# # gdf = gdf[gdf.vl_id.str.len() == 10]
|
|
else:
|
|
export_data(df)
|
|
del df
|
|
del DF #; gc.collect()
|
|
print('END transfert data FROM département {0} ........... {1} sec'.format(dep, time_exec(start_time) ))
|
|
print('END SCRIPT')
|
|
sys.exit()
|
|
print('NOT EXIT')
|
|
|