#!/usr/bin/env python3 # -*- coding: UTF-8 -*- from pycen import con_fon from sqlalchemy.sql import text from sqlalchemy.engine import URL from sqlalchemy import create_engine import pandas as pd import geopandas as gpd # user = 'cgeier' # pwd = 'adm1n*bdCen' # adr = '91.134.194.221' # base = 'cadastre' user = 'postgres' pwd = 'foncier_test1' adr = '172.17.0.2' base = 'postgres' url = URL.create('postgresql+psycopg2', username=user, password=pwd, host=adr, database=base, ) con_cad = create_engine(url) # from pycen import con_cad # sql = '''SELECT * FROM "38_202207".suf WHERE parcelle = '3800740000B0705';''' # df = pd.read_sql_query(sql,con_cad) # df.drop_duplicates(inplace=True) # df.annee = '2020' # df.to_sql('suf',con_cad,"38_202207",if_exists='append',index=False) # fadd = '/home/colas/Documents/tmp/FONCIER_FEDE/add_parc.gpkg' # add = gpd.read_file(fadd) def recup_cols_table(table,con,schema='38_202207'): lst_cols = con.dialect.get_columns(con,table,schema) return [x['name'] for x in lst_cols] def __get_pkey__(engine,table_name,schema): pk = engine.dialect.get_pk_constraint(engine,table_name=table_name,schema=schema) return pk def _where_parcelle(sql0,schema,list_parid): if list_parid is not None: chunk = None if isinstance(list_parid,str): list_parid = [list_parid] LIST_ID = str(tuple(list_parid)).replace(',)',')') sql1 = ''' WHERE p.parcelle IN {list_id} ;'''.format(sch=schema,list_id=LIST_ID) else : chunk = 200000 sql1 = ';' sql = sql0 + sql1 df = pd.read_sql_query(text(sql),con=con_cad,chunksize=chunk) # if chunk is not None: # for d in df: # print(d.shape[0]) # d.drop_duplicates(inplace=True) # print(d.drop_duplicates().shape[0]) # else : # df.drop_duplicates(inplace=True) return df def _get_chunk(df1,df2): # cptp1 = pd.DataFrame() list_DF1 = [] for d1 in df1: list_DF1.append(d1) DF1 = pd.concat(list_DF1) # cptp2 = pd.DataFrame() list_DF2 = [] for d2 in df2: list_DF2.append(d2) DF2 = pd.concat(list_DF2) return pd.concat([DF1,DF2]).drop_duplicates() def __get_parcelles__(sql0,list_parid): if list_parid is not None: chunk = None if isinstance(list_parid,str): list_parid = [list_parid] LIST_ID = str(tuple(list_parid)).replace(',)',')') sql1 = ''' WHERE t1.geo_parcelle IN {list_id} ;'''.format(list_id=LIST_ID) else : chunk = None sql1 = ';' sql = sql0 + sql1 # print(text(sql)) return gpd.read_postgis(sql=sql,con=con_cad,chunksize=chunk) def _get_parcelles1(schema='38_202207',list_parid=None): sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle) t1.geo_parcelle, case when t1.geom is null then t2.geom else t1.geom end geom, substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 12) par_id, substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3) codcom, substring(p.parcelle from 1 for 2) ccodep, substring(p.parcelle from 4 for 3) ccocom, substring(p.parcelle from 7 for 3) ccopre, substring(p.parcelle from 10 for 2) ccosec, substring(p.parcelle from 12 for 4) dnupla, p.annee annee_pci, t1.update_dat, p.dparpi, p.dcntpa, p.ccocomm, p.ccoprem, p.ccosecm, p.dnuplam, p.ccovoi, p.ccoriv, p.type_filiation "type", substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id, (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) gtoper, (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm, (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm, (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ddenom FROM "{sch}".{t1} p LEFT JOIN "{sch}".parcelle_info t2 ON t2.geo_parcelle = p.parcelle LEFT JOIN "{sch}"."geo_parcelle" t1 ON t1.geo_parcelle = p.parcelle LEFT JOIN "{sch}".proprietaire t3 USING (ccodep,ccocom,dnupro) '''.format( sch=schema, t1='parcelle') return __get_parcelles__(sql0,list_parid) def _get_parcelles2(schema='38_202207',list_parid=None): sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle) t1.geo_parcelle, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3) codcom, substring(t1.geo_parcelle from 1 for 2) ccodep, substring(t1.geo_parcelle from 4 for 3) ccocom, substring(t1.geo_parcelle from 7 for 3) ccopre, substring(t1.geo_parcelle from 10 for 2) ccosec, substring(t1.geo_parcelle from 12 for 4) dnupla, t1.annee annee_pci, t1.update_dat, p.dparpi, p.dcntpa, p.ccocomm, p.ccoprem, p.ccosecm, p.dnuplam, p.ccovoi, p.ccoriv, p.type_filiation "type", t1.geom, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id, (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) gtoper, (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm, (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm, (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ddenom FROM "{sch}"."{t1}" t1 LEFT JOIN ("{sch}".parcelle_info t2 LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal)) USING (geo_parcelle) LEFT JOIN "{sch}".parcelle p ON t1.geo_parcelle = p.parcelle '''.format( sch=schema, t1='geo_parcelle') return __get_parcelles__(sql0,list_parid) def _get_parcelles(schema='38_202207',list_parid=None): p1 = _get_parcelles1(schema,list_parid) print('parcelles from parcelle .......... OK') p2 = _get_parcelles2(schema,list_parid) print('parcelles from geo_parcelle ...... OK') return _get_chunk(p1,p2) def _get_voie2(schema='38_202207',list_parid=None): sql0 = ''' SELECT t1.ccodep||t1.ccocom||p.ccovoi vl_id, t1.libvoi libelle FROM "{sch}"."{t1}" t1 JOIN "{sch}".parcelle p USING (voie) '''.format( sch=schema, t1='voie') return _where_parcelle(sql0,schema,list_parid) def _get_lots_natcult2(schema='38_202207',list_parid=None): sql0 = ''' SELECT CASE WHEN TRIM(dnulot) = '' THEN substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||'0000000' ELSE substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||TRIM(dnulot) END lot_id, parcelle, substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12) par_id, dsgrpf, cnatsp, dclssf, ccosub, dcntsf FROM "{sch}"."{t1}" JOIN "{sch}".parcelle p USING(parcelle) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_lots2(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT CASE WHEN TRIM(t.dnulot) = '' THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000' ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot) END lot_id, t.parcelle, substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12) par_id, CASE WHEN TRIM(t.dnulot) = '' OR TRIM(t.dnulot) IS NULL THEN TRIM(l.dnulot) ELSE TRIM(t.dnulot) END dnulot, CASE WHEN l.dcntlo IS NULL THEN 0 ELSE l.dcntlo END dcntlo, geo_sub.geom FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p USING (parcelle) LEFT JOIN "{sch}".lots l USING (parcelle) LEFT JOIN "{sch}".lotslocaux ll USING (lots) LEFT JOIN "{sch}".geo_subdfisc_parcelle geo_sub_p ON p.parcelle = geo_sub_p.geo_parcelle LEFT JOIN "{sch}".geo_subdfisc geo_sub USING (geo_subdfisc) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_cadastre2(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT CASE WHEN TRIM(t.dnulot) = '' THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000' ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot) END lot_id, substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p LEFT JOIN "{sch}".lots l USING (parcelle) USING (parcelle) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_cptprop1(schema='38_202207',list_parid=None): sql0 = ''' WITH lot as ( SELECT ccodep,ccocom,dnuprol dnupro FROM "{sch}".lots ) SELECT DISTINCT t.ccodep||t.ccocom||t.dnupro dnupro, t."annee" annee_matrice FROM "{sch}"."{t1}" t --JOIN "{sch}".suf s USING (ccodep,ccocom,dnupro) --JOIN lot l USING (ccodep,ccocom,dnupro) JOIN "{sch}".parcelle p USING (ccodep,ccocom,dnupro) '''.format( sch=schema, t1='proprietaire') return _where_parcelle(sql0,schema,list_parid) def _get_cptprop2(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro, t.annee annee_matrice FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p LEFT JOIN "{sch}".lots l USING (parcelle) USING (parcelle) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_cptprop(schema='38_202207',list_parid=None): cptprop1 = _get_cptprop1(schema=schema, list_parid=list_parid) print('cptprop from proprietaire ... OK') cptprop2 = _get_cptprop2(schema=schema, list_parid=list_parid) print('cptprop from suf ............ OK') return _get_chunk(cptprop1,cptprop2) def _get_r_prop_cptprop1(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3)||t.dnupro dnupro, substring(p.parcelle from 1 for 2)||t.dnuper dnuper, ccodro, ccodem FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro) '''.format( sch=schema, t1='proprietaire') return _where_parcelle(sql0,schema,list_parid) def _get_proprios1(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT t.ccodep||t.dnuper dnuper, TRIM(ccoqua)::int ccoqua, TRIM(ddenom) ddenom, TRIM(dqualp) dqualp, TRIM(dnomlp) dnomlp, TRIM(dnomus) dnomus, TRIM(dprnus) dprnus, TRIM(dprnlp) dprnlp, TRIM(epxnee) epxnee, TRIM(dnomcp) dnomcp, TRIM(dprncp) dprncp, TRIM(jdatnss) jdatnss, TRIM(dldnss) dldnss, TRIM(dlign3) dlign3, TRIM(dlign4) dlign4, TRIM(dlign5) dlign5, TRIM(dlign6) dlign6, TRIM(gtoper)::int gtoper, TRIM(ccogrm)::int ccogrm, TRIM(dnatpr) dnatpr, TRIM(dsglpm) dsglpm, t."annee" annee_matrice FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro) '''.format( sch=schema, t1='proprietaire') return _where_parcelle(sql0,schema,list_parid) def _to_cadaste_table(df,con,pkey,table,schema): # Si la clé-primaire de la table est > 1 colonne if len(pkey) > 1 : lstid = str( tuple( df[pkey]\ .drop_duplicates()\ .itertuples(index=False, name=None) ) ).replace(',)',')') pk = str(tuple(pkey)).replace("'",'"') # Si la clé-primaire de la table est 1 colonne else: pk = pkey[0] lstid = str(tuple(df[pk].drop_duplicates()))\ .replace(',)',')') # Récupération des données déjà en BDD sql = ''' SELECT * FROM {sch}.{tab} WHERE {id} IN {lst} ;'''.format( sch = schema, tab = table, id = pk, lst = lstid ) indb = pd.read_sql_query(sql,con,index_col=pkey) # Exclusion des données déjà en base df.set_index(pkey,inplace=True) todb = df[ ~df.index.isin(indb.index) ].copy() df.reset_index(drop=False, inplace=True) todb.reset_index(drop=False, inplace=True) indb.reset_index(drop=False, inplace=True) # Envoie du tableau dans la bdd si non vide if not todb.empty: # Adaptation des types des champs si nécessaire dtyp = {} if 'geom' in todb.columns: from geoalchemy2 import Geometry todb = todb.to_wkt() dtyp = {'geom':Geometry(geometry_type='MULTIPOLYGON',srid=2154)} cols = todb.columns[todb.columns.isin(indb.columns)] todb[cols]\ .to_sql( table, con, schema, if_exists='append', index=False, dtype = dtyp ) print('INSERT %s news data ! OK'%todb.shape[0]) else : print('NO news data to insert !') def to_vl(df,con): table = 'vl' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_parcelles(df,con): table = 'parcelles' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_lots(df,con): table = 'lots' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_lots_natcult(df,con): table = 'lots_natcult' schema = 'cadastre' # pkey = __get_pkey__(con,table,schema)['constrained_columns'] pkey = ['lot_id'] _to_cadaste_table(df,con,pkey,table,schema) def to_cadastre(df,con): table = 'cadastre' schema = 'cadastre' # pkey = __get_pkey__(con,table,schema)['constrained_columns'] pkey = ['lot_id','dnupro'] _to_cadaste_table(df,con,pkey,table,schema) def to_r_prop_cptprop(df,con): table = 'r_prop_cptprop' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_proprios(df,con): table = 'proprios' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_cptprop(df,con): table = 'cptprop' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) if __name__ == "__main__": from pycen import update_to_sql # par = '3805050000E0523' par = None sch = '38_202007' # cptprop1 = _get_cptprop1(schema=sch, list_parid=par) # cptprop2 = _get_cptprop2(schema=sch, list_parid=par) cptprop = _get_cptprop(schema=sch, list_parid=par) proprios = _get_proprios1(schema=sch, list_parid=par) r_prop_cptprop = _get_r_prop_cptprop1(schema=sch, list_parid=par) voie = _get_voie2(schema=sch, list_parid=par) # parcelles = _get_parcelles2(schema=sch, list_parid=par) parcelles = _get_parcelles(schema=sch, list_parid=par) lots = _get_lots2(schema=sch, list_parid=par) lots_natcult = _get_lots_natcult2(schema=sch, list_parid=par) cadastre = _get_cadastre2(schema=sch, list_parid=par) to_cptprop(cptprop,con_fon) to_proprios(proprios,con_fon) to_r_prop_cptprop(r_prop_cptprop,con_fon) to_vl(voie,con_fon) to_parcelles(parcelles,con_fon) to_lots(lots,con_fon) to_lots_natcult(lots_natcult,con_fon) to_cadastre(cadastre,con_fon)