#!/usr/bin/env python3 # -*- coding: UTF-8 -*- from pycen import con_fon from sqlalchemy.sql import text from sqlalchemy.engine import URL from sqlalchemy import create_engine import pandas as pd import geopandas as gpd # user = 'cgeier' # pwd = 'adm1n*bdCen' # adr = '91.134.194.221' # base = 'cadastre' user = 'postgres' pwd = 'foncier_test1' adr = '172.17.0.2' base = 'postgres' url = URL.create('postgresql+psycopg2', username=user, password=pwd, host=adr, database=base, ) con_cad = create_engine(url) # from pycen import con_cad # sql = '''SELECT * FROM "38_202207".suf WHERE parcelle = '3800740000B0705';''' # df = pd.read_sql_query(sql,con_cad) # df.drop_duplicates(inplace=True) # df.annee = '2020' # df.to_sql('suf',con_cad,"38_202207",if_exists='append',index=False) # fadd = '/home/colas/Documents/tmp/FONCIER_FEDE/add_parc.gpkg' # add = gpd.read_file(fadd) def recup_cols_table(table,con,schema='38_202207'): lst_cols = con.dialect.get_columns(con,table,schema) return [x['name'] for x in lst_cols] def __get_pkey__(engine,table_name,schema): pk = engine.dialect.get_pk_constraint(engine,table_name=table_name,schema=schema) return pk def _where_parcelle(sql0,schema,list_parid): if list_parid is not None: chunk = None if isinstance(list_parid,str): list_parid = [list_parid] LIST_ID = str(tuple(list_parid)).replace(',)',')') sql1 = ''' WHERE p.parcelle IN {list_id} ;'''.format(sch=schema,list_id=LIST_ID) else : chunk = 200000 sql1 = ';' sql = sql0 + sql1 df = pd.read_sql_query(text(sql),con=con_cad,chunksize=chunk) # if chunk is not None: # for d in df: # print(d.shape[0]) # d.drop_duplicates(inplace=True) # print(d.drop_duplicates().shape[0]) # else : # df.drop_duplicates(inplace=True) return df def _get_chunk(df1,df2): # cptp1 = pd.DataFrame() list_DF1 = [] for d1 in df1: list_DF1.append(d1) DF1 = pd.concat(list_DF1) # cptp2 = pd.DataFrame() list_DF2 = [] for d2 in df2: list_DF2.append(d2) DF2 = pd.concat(list_DF2) return pd.concat([DF1,DF2]).drop_duplicates() def __get_parcelles__(sql0,list_parid): if list_parid is not None: chunk = None if isinstance(list_parid,str): list_parid = [list_parid] LIST_ID = str(tuple(list_parid)).replace(',)',')') sql1 = ''' WHERE t1.geo_parcelle IN {list_id} ;'''.format(list_id=LIST_ID) else : chunk = None sql1 = ';' sql = sql0 + sql1 # print(text(sql)) return gpd.read_postgis(sql=sql,con=con_cad,chunksize=chunk) def _get_parcelles1(schema='38_202207',list_parid=None): sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle) t1.geo_parcelle, case when t1.geom is null then t2.geom else t1.geom end geom, substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 12) par_id, substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3) codcom, substring(p.parcelle from 1 for 2) ccodep, substring(p.parcelle from 4 for 3) ccocom, substring(p.parcelle from 7 for 3) ccopre, substring(p.parcelle from 10 for 2) ccosec, substring(p.parcelle from 12 for 4) dnupla, p.annee annee_pci, t1.update_dat, p.dparpi, p.dcntpa, p.ccocomm, p.ccoprem, p.ccosecm, p.dnuplam, p.ccovoi, p.ccoriv, p.type_filiation "type", substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id, (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) gtoper, (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm, (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm, (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ddenom FROM "{sch}".{t1} p LEFT JOIN "{sch}".parcelle_info t2 ON t2.geo_parcelle = p.parcelle LEFT JOIN "{sch}"."geo_parcelle" t1 ON t1.geo_parcelle = p.parcelle LEFT JOIN "{sch}".proprietaire t3 USING (ccodep,ccocom,dnupro) '''.format( sch=schema, t1='parcelle') return __get_parcelles__(sql0,list_parid) def _get_parcelles2(schema='38_202207',list_parid=None): sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle) t1.geo_parcelle, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3) codcom, substring(t1.geo_parcelle from 1 for 2) ccodep, substring(t1.geo_parcelle from 4 for 3) ccocom, substring(t1.geo_parcelle from 7 for 3) ccopre, substring(t1.geo_parcelle from 10 for 2) ccosec, substring(t1.geo_parcelle from 12 for 4) dnupla, t1.annee annee_pci, t1.update_dat, p.dparpi, p.dcntpa, p.ccocomm, p.ccoprem, p.ccosecm, p.dnuplam, p.ccovoi, p.ccoriv, p.type_filiation "type", t1.geom, substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id, (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) gtoper, (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm, (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm, (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ddenom FROM "{sch}"."{t1}" t1 LEFT JOIN ("{sch}".parcelle_info t2 LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal)) USING (geo_parcelle) LEFT JOIN "{sch}".parcelle p ON t1.geo_parcelle = p.parcelle '''.format( sch=schema, t1='geo_parcelle') return __get_parcelles__(sql0,list_parid) def _get_parcelles(schema='38_202207',list_parid=None): p1 = _get_parcelles1(schema,list_parid) print('parcelles from parcelle .......... OK') p2 = _get_parcelles2(schema,list_parid) print('parcelles from geo_parcelle ...... OK') return _get_chunk(p1,p2) def _get_voie2(schema='38_202207',list_parid=None): sql0 = ''' SELECT t1.ccodep||t1.ccocom||p.ccovoi vl_id, t1.libvoi libelle FROM "{sch}"."{t1}" t1 JOIN "{sch}".parcelle p USING (voie) '''.format( sch=schema, t1='voie') return _where_parcelle(sql0,schema,list_parid) def _get_lots_natcult1(schema='38_202207',list_parid=None): sql0 = ''' SELECT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||btrim(dnulot) as lot_id, -- Identifiant du lot (c'est bon, quand il n'y a pas de lot, c'est la même façon de noter l'identifiant) btrim(dsgrpf) as dsgrpf , -- Sous-groupe de nature de culture btrim(cnatsp) as cnatsp , -- Code nature de culture spéciale CASE WHEN trim(dclssf) = '' OR dclssf = '00' THEN NULL::integer ELSE ltrim(dclssf,'0')::integer END AS dclssf, -- Classe dans le groupe et la série tarif btrim(ccosub) as ccosub , -- Lettres indicatives de la suf dcntsf -- Contenance de la suf FROM "{sch}"."{t1}" '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_lots_natcult2(schema='38_202207',list_parid=None): sql0 = ''' SELECT CASE WHEN TRIM(dnulot) = '' THEN substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||'0000000' ELSE substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||TRIM(dnulot) END lot_id, parcelle, substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12) par_id, dsgrpf, cnatsp, dclssf, ccosub, dcntsf FROM "{sch}"."{t1}" JOIN "{sch}".parcelle p USING(parcelle) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_lots01(schema='38_202207'): sql0 = ''' SELECT DISTINCT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||dnulot as lot_id, -- Identifiant du lot character varying(21) ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla as par_id, -- Identifiant de la parcelle dnulot, -- Numéro du lot max(dcntlo) as dcntlo -- Contenance cadastrale (m²) on prend contenance maxi quand même lot_id mais dcntlo différentes FROM "{sch}".{t1} JOIN "{sch}".geo_parcelle ON lots.parcelle = geo_parcelle.geo_parcelle -- on ne garde que les lots sur des parcelles dont on a la géométrie WHERE ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||dnulot IN ( -- on sélectionne uniquement les lots qui supportent une subdivision fiscale pour exclure les lots batis SELECT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||dnulot FROM "{sch}".suf WHERE btrim(dnulot) <> '') GROUP BY ccodep, ccocom, ccopre, ccosec, dnupla, dnulot '''.format( sch=schema, t1='lots') return _where_parcelle(sql0,schema,None) def _get_lots02(schema='38_202207'): sql0 = ''' SELECT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||dnulot as lot_id, -- Identifiant du lot character varying(21) ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla as par_id, -- Identifiant de la parcelle dnulot, -- Numéro du lot sum(dcntsf) as dcntlo -- Contenance cadastrale (m²) = On additionne les contenances des différentes suf qui composent le lot FROM "{sch}".suf JOIN "{sch}".geo_parcelle ON suf.parcelle = geo_parcelle.geo_parcelle -- on ne garde que les sufs sur des parcelles dont on a la géométrie WHERE btrim(dnulot) <> '' AND ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||dnulot NOT IN ( SELECT DISTINCT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||dnulot FROM "{sch}".lots WHERE btrim(dnulot)<>'') GROUP BY ccodep, ccocom, ccopre, ccosec, dnupla, dnulot '''.format( sch=schema, t1='lots') return _where_parcelle(sql0,schema,None) def _get_lots03(schema='38_202207'): sql0 = ''' -- on sélectionne les parcelles qui supportent une subdivision fiscale (unique ou multiple) même si divisées en lots car dans ce cas ce sont surement des copropriétés (lots batis/jardins...) SELECT DISTINCT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla as lot_id, -- Identifiant du lot fictif(=id parcelle) ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla as par_id, -- Identifiant de la parcelle null as dnulot, -- Numéro du lot dcntpa as dcntlo-- Contenance cadastrale (m²) FROM "{sch}".parcelle JOIN "{sch}".geo_parcelle ON parcelle.parcelle = geo_parcelle.geo_parcelle -- on ne garde que les les parcelles dont on a la géométrie WHERE ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla IN (SELECT ccodep||ccocom||replace(ccopre, ' ', '0')||replace(ccosec, ' ', '0')||dnupla||trim(dnulot) FROM "{sch}".suf); -- toutes les parcelles dont dnulot est NULL '''.format( sch=schema, t1='lots') return _where_parcelle(sql0,schema,None) def _get_lots1(schema='38_202207'): l1 = _get_lots01(schema) l2 = _get_lots02(schema) l3 = _get_lots03(schema) return pd.concat([*l1,*l2,*l3]) def _get_lots2(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT CASE WHEN TRIM(t.dnulot) = '' OR TRIM(t.dnulot) IS NULL THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.ccosub) ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot) END lot_id, t.parcelle, substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12) par_id, CASE WHEN TRIM(t.dnulot) = '' OR TRIM(t.dnulot) IS NULL THEN TRIM(t.ccosub) ELSE TRIM(t.dnulot) END dnulot, CASE WHEN t.dcntsf IS NULL THEN l.dcntlo ELSE t.dcntsf END dcntlo, geo_sub1.creat_date, CASE WHEN geo_sub1.geom IS NULL THEN geo_sub2.geom ELSE geo_sub1.geom END geom FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p USING (parcelle) LEFT JOIN "{sch}".geo_parcelle geo_p ON geo_p.geo_parcelle = p.parcelle LEFT JOIN "{sch}".lots l USING (parcelle) --LEFT JOIN "{sch}".lotslocaux ll USING (lots) LEFT JOIN "{sch}".geo_subdfisc_parcelle geo_sub_p ON p.parcelle = geo_sub_p.geo_parcelle --LEFT JOIN "{sch}".geo_subdfisc geo_sub USING (geo_subdfisc) LEFT JOIN "{sch}".geo_subdfisc geo_sub1 ON (geo_sub1.geo_subdfisc = geo_sub_p.geo_subdfisc AND LOWER(geo_sub1.tex) = LOWER(t.ccosub)) LEFT JOIN "{sch}".geo_subdfisc geo_sub2 ON (geo_sub2.geo_subdfisc = geo_sub_p.geo_subdfisc AND TRIM(LOWER(geo_sub2.tex)) = '') WHERE ST_INTERSECTS(geo_sub1.geom,geo_p.geom) OR ST_INTERSECTS(geo_sub2.geom,geo_p.geom) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_cadastre2(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT CASE WHEN TRIM(t.dnulot) = '' THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000' ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot) END lot_id, substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p LEFT JOIN "{sch}".lots l USING (parcelle) USING (parcelle) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_cptprop1(schema='38_202207',list_parid=None): sql0 = ''' WITH lot as ( SELECT ccodep,ccocom,dnuprol dnupro FROM "{sch}".lots ) SELECT DISTINCT t.ccodep||t.ccocom||t.dnupro dnupro, t."annee" annee_matrice FROM "{sch}"."{t1}" t --JOIN "{sch}".suf s USING (ccodep,ccocom,dnupro) --JOIN lot l USING (ccodep,ccocom,dnupro) JOIN "{sch}".parcelle p USING (ccodep,ccocom,dnupro) '''.format( sch=schema, t1='proprietaire') return _where_parcelle(sql0,schema,list_parid) def _get_cptprop2(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro, t.annee annee_matrice FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p LEFT JOIN "{sch}".lots l USING (parcelle) USING (parcelle) '''.format( sch=schema, t1='suf') return _where_parcelle(sql0,schema,list_parid) def _get_cptprop(schema='38_202207',list_parid=None): cptprop1 = _get_cptprop1(schema=schema, list_parid=list_parid) print('cptprop from proprietaire ... OK') cptprop2 = _get_cptprop2(schema=schema, list_parid=list_parid) print('cptprop from suf ............ OK') return _get_chunk(cptprop1,cptprop2) def _get_r_prop_cptprop1(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3)||t.dnupro dnupro, substring(p.parcelle from 1 for 2)||t.dnuper dnuper, ccodro, ccodem FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro) '''.format( sch=schema, t1='proprietaire') return _where_parcelle(sql0,schema,list_parid) def _get_proprios1(schema='38_202207',list_parid=None): sql0 = ''' SELECT DISTINCT t.ccodep||t.dnuper dnuper, TRIM(ccoqua)::int ccoqua, TRIM(ddenom) ddenom, TRIM(dqualp) dqualp, TRIM(dnomlp) dnomlp, TRIM(dnomus) dnomus, TRIM(dprnus) dprnus, TRIM(dprnlp) dprnlp, TRIM(epxnee) epxnee, TRIM(dnomcp) dnomcp, TRIM(dprncp) dprncp, TRIM(jdatnss) jdatnss, TRIM(dldnss) dldnss, TRIM(dlign3) dlign3, TRIM(dlign4) dlign4, TRIM(dlign5) dlign5, TRIM(dlign6) dlign6, TRIM(gtoper)::int gtoper, TRIM(ccogrm)::int ccogrm, TRIM(dnatpr) dnatpr, TRIM(dsglpm) dsglpm, t."annee" annee_matrice FROM "{sch}"."{t1}" t JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro) '''.format( sch=schema, t1='proprietaire') return _where_parcelle(sql0,schema,list_parid) def _to_cadaste_table(df,con,pkey,table,schema): # Si la clé-primaire de la table est > 1 colonne if len(pkey) > 1 : lstid = str( tuple( df[pkey]\ .drop_duplicates()\ .itertuples(index=False, name=None) ) ).replace(',)',')') pk = str(tuple(pkey)).replace("'",'"') # Si la clé-primaire de la table est 1 colonne else: pk = pkey[0] lstid = str(tuple(df[pk].drop_duplicates()))\ .replace(',)',')') # Récupération des données déjà en BDD sql = ''' SELECT * FROM {sch}.{tab} WHERE {id} IN {lst} ;'''.format( sch = schema, tab = table, id = pk, lst = lstid ) indb = pd.read_sql_query(sql,con,index_col=pkey) # Exclusion des données déjà en base df.set_index(pkey,inplace=True) todb = df[ ~df.index.isin(indb.index) ].copy() df.reset_index(drop=False, inplace=True) todb.reset_index(drop=False, inplace=True) indb.reset_index(drop=False, inplace=True) # Envoie du tableau dans la bdd si non vide if not todb.empty: # Adaptation des types des champs si nécessaire dtyp = {} if 'geom' in todb.columns: from geoalchemy2 import Geometry todb = todb.to_wkt() dtyp = {'geom':Geometry(geometry_type='MULTIPOLYGON',srid=2154)} cols = todb.columns[todb.columns.isin(indb.columns)] todb[cols]\ .to_sql( table, con, schema, if_exists='append', index=False, dtype = dtyp ) print('INSERT %s news data ! OK'%todb.shape[0]) else : print('NO news data to insert !') def to_vl(df,con): table = 'vl' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_parcelles(df,con): table = 'parcelles' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_lots(df,con): table = 'lots' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_lots_natcult(df,con): table = 'lots_natcult' schema = 'cadastre' # pkey = __get_pkey__(con,table,schema)['constrained_columns'] pkey = ['lot_id'] _to_cadaste_table(df,con,pkey,table,schema) def to_cadastre(df,con): table = 'cadastre' schema = 'cadastre' # pkey = __get_pkey__(con,table,schema)['constrained_columns'] pkey = ['lot_id','dnupro'] _to_cadaste_table(df,con,pkey,table,schema) def to_r_prop_cptprop(df,con): table = 'r_prop_cptprop' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_proprios(df,con): table = 'proprios' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) def to_cptprop(df,con): table = 'cptprop' schema = 'cadastre' pkey = __get_pkey__(con,table,schema)['constrained_columns'] _to_cadaste_table(df,con,pkey,table,schema) if __name__ == "__main__": from pycen import update_to_sql # par = '3805050000E0523' par = None sch = '38_202007' # cptprop1 = _get_cptprop1(schema=sch, list_parid=par) # cptprop2 = _get_cptprop2(schema=sch, list_parid=par) cptprop = _get_cptprop(schema=sch, list_parid=par) proprios = _get_proprios1(schema=sch, list_parid=par) r_prop_cptprop = _get_r_prop_cptprop1(schema=sch, list_parid=par) voie = _get_voie2(schema=sch, list_parid=par) # parcelles = _get_parcelles2(schema=sch, list_parid=par) parcelles = _get_parcelles(schema=sch, list_parid=par) lots = _get_lots2(schema=sch, list_parid=par) lots_natcult = _get_lots_natcult2(schema=sch, list_parid=par) cadastre = _get_cadastre2(schema=sch, list_parid=par) to_cptprop(cptprop,con_fon) to_proprios(proprios,con_fon) to_r_prop_cptprop(r_prop_cptprop,con_fon) to_vl(voie,con_fon) to_parcelles(parcelles,con_fon) to_lots(lots,con_fon) to_lots_natcult(lots_natcult,con_fon) to_cadastre(cadastre,con_fon)