Commit 50edee48 authored by Christine Plumejeaud's avatar Christine Plumejeaud
Browse files

First commit of code publishing ports like a gazetteer, with full description...

First commit of code publishing ports like a gazetteer, with full description using the linked place model.
parent 963488aa
# -*- coding: utf-8 -*-
'''
Created on 06 june 2020
@author: cplumejeaud
ANR PORTIC : used to build tables for ports : port_points
This requires :
1. to have loaded data (geo_general, port_points_old) into ports manually
CREATE TABLE ports.geo_general (
pointcall_name text NULL,
pointcall_uhgs_id text NULL,
latitude text NULL,
longitude text NULL,
shippingarea text NULL
); -- ok
2. You must restore the file portic_v4.ports.GIS-202006061653.sql in flatdata/GIS to get extra information
* GIS data that are required to build portic gazetteer
- code_levels : list of admiralties with their head-quarters (siège amirauté) (by name)
- obliques : liste of oblique harbors
- matching_ports : computed using geonames version : 2019 November (see below in computeMatching )
- etats : comes from listings of Silvia Marzagalli + extra computing using geonames (see below : SQL code to translate in Python in updateStateGeonameid )
- world_borders : shp downloaded http://www.mappinghacks.com/data/ , last update 30 July 2008
All is packaged like postgres 11.6 tables, using postgis extension. 2.5
3. If you have new points and you want to add admiralty, province, belonging_states or shiparea information, you must open the GIS files and QGIS to set them manually
Many of data can be fetched from old versions of port_points
Manual updates of etats table are to be done also for new points
'''
# Comprendre les imports en Python : http://sametmax.com/les-imports-en-python/
# print sys.path
# sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
## pour avoir le path d'un package
## print (psycopg2.__file__)
## C:\Users\cplume01\AppData\Local\Programs\Python\Python37-32\lib\site-packages\psycopg2\__init__.py
from __future__ import nested_scopes
import logging
import configparser
import xlsxwriter
import os
import psycopg2
#from os import sys, path
import sys, traceback
import json
import random
from xlrd import open_workbook, cellname, XL_CELL_TEXT, XL_CELL_DATE, XL_CELL_BLANK, XL_CELL_EMPTY, XL_CELL_NUMBER, \
xldate_as_tuple
class BuildPorts(object):
def __init__(self, config):
"""
Ouvre les fichiers de log et une connexion à la base de données (en fonction des paramètres de config)
"""
## Ouvrir le fichier de log
logging.basicConfig(filename=config.get('log', 'file'), level=int(config.get('log', 'level')), filemode='w')
self.logger = logging.getLogger('BuildPorts')
self.logger.debug('log file for DEBUG')
self.logger.info('log file for INFO')
self.logger.warning('log file for WARNINGS')
self.logger.error('log file for ERROR')
#self = LoadFilemaker(config)
## Open both a ssh connexion for copy/remove, and a tunnel for postgres connexion
self.postgresconn = self.open_connection(config)
def close_connection(self):
'''
Cleanly close DB connection
:param postgresconn:
:return:
'''
if self.postgresconn is not None:
self.postgresconn.close()
def open_connection(self, config):
'''
Open database connection with Postgres
:param config:
:return:
'''
# Acceder aux parametres de configuration
host = config.get('base', 'host')
port = config.get('base', 'port')
dbname = config.get('base', 'dbname')
user = config.get('base', 'user')
password = config.get('base', 'password')
# schema = config.get('base', 'schema')
driverPostgres = 'host=' + host + ' port=' + port + ' user=' + user + ' dbname=' + dbname + ' password=' + password
self.logger.debug(driverPostgres)
conn = None
try:
conn = psycopg2.connect(driverPostgres)
except Exception as e:
self.logger.error("I am unable to connect to the database. " + str(e))
# Test DB
if conn is not None:
cur = conn.cursor()
cur.execute('select count(*) from pg_namespace')
result = cur.fetchone()
if result is None:
print('open_connection Failed to get count / use of database failed')
else:
print('open_connection Got database connexion : ' + str(result[0]))
else:
print('open_connection Failed to get database connexion')
return conn
def setExtensionsFunctions(self, config):
"""
Installe les extensions nécessaires à l'import et l'utilisation des données de navigocorpus dans portic,
crée les 3 schémas navigo, navigocheck et navigoviz
et crée les 4 fonctions spécifiques à navigo pour importer les données
Ces fonctions programmées en PLPython 3 permettent de retirer les crochets et parenthèses des données de navigo
pour les conserver épurées dans navigocheck, avec également un code associé à la présence de ces signes
- 0 : pas de signe
- -1 : parenthèses autour ()
- -2 : crochets autour []
- -3 : donnée manquante
testé le 14 mai 2020
encore un bug sur la dernière function frequency_topo, qu'il faut installer à la main avec un fichier SQL
"""
#Prerequis : en tant que postgres,
#CREATE ROLE dba WITH SUPERUSER NOINHERIT;
#GRANT dba TO navigo;
self.execute_sql('SET ROLE dba')
self.execute_sql("create extension if not exists postgis")
self.execute_sql("create extension if not exists fuzzystrmatch")
self.execute_sql("create extension if not exists pg_trgm")
self.execute_sql("create extension if not exists postgis_topology")
self.execute_sql("create extension if not exists plpython3u")
self.execute_sql("create extension if not exists dblink")
self.execute_sql("create schema if not exists ports")
self.execute_sql("DROP FUNCTION if exists ports.test_double_type(tested_value VARCHAR)")
query = """CREATE OR REPLACE FUNCTION ports.test_double_type (tested_value VARCHAR) RETURNS boolean AS
$BODY$
begin
EXECUTE 'select '||quote_literal(tested_value)||'::float';
return true;
exception when others then
raise notice '% %', SQLERRM, SQLSTATE;
return false;
end;
$BODY$ LANGUAGE plpgsql VOLATILE;"""
self.execute_sql(query)
self.execute_sql("DROP FUNCTION if exists ports.test_int_type(tested_value VARCHAR)")
query = """CREATE OR REPLACE FUNCTION ports.test_int_type (tested_value VARCHAR) RETURNS boolean AS
$BODY$
begin
EXECUTE 'select '||quote_literal(tested_value)||'::int';
return true;
exception when others then
raise notice '% %', SQLERRM, SQLSTATE;
return false;
end;
$BODY$ LANGUAGE plpgsql VOLATILE;"""
self.execute_sql(query)
self.execute_sql("drop type qual_value cascade")
query = """CREATE TYPE qual_value AS (
value text,
code integer
)"""
self.execute_sql(query)
self.execute_sql("DROP FUNCTION if exists ports.rm_parentheses_crochets(tested_value text)")
query = """CREATE OR REPLACE FUNCTION ports.rm_parentheses_crochets (tested_value text) RETURNS qual_value AS
$$
global result
global code
if tested_value is not None :
result = tested_value.strip()
code = 0
if (tested_value.strip().find('(') == 0):
result = result.replace('(', '').replace(')', '')
code = -1
if (tested_value.strip().find('[') == 0) or tested_value.strip().find(']') > 0:
result = result.replace('[', '').replace(']', '')
code = -2
if (result.strip().find('(') == 0):
result = result.replace('(', '').replace(')', '')
if code > -1 :
code = -1
if (result.strip().find('[') == 0):
result = result.replace('[', '').replace(']', '')
code = -2
if(len(result.strip()) == 0):
code = -3
result = None
else :
result = None
code = -3
return [result, code]
$$ LANGUAGE plpython3u;"""
self.execute_sql(query)
# BUG à cause des ''' :+item.replace('\'', '\'\''')+"'"
query = """CREATE OR REPLACE FUNCTION ports.frequency_topo(uhgs_id text, toustopo text)
RETURNS json
LANGUAGE plpython3u
AS $function$
import json
from operator import itemgetter
global temp
global result
global topos
global query
if toustopo is not None :
result = []
temp = toustopo.replace('{', '').replace('}', '').strip(' ')
temp = temp.replace('"', '').strip(' ')
topos = temp.split(',')
for item in topos:
#plpy.notice(item)
query = "select count(*) as freq from ports.geo_general where pointcall_uhgs_id = '"+uhgs_id+"' and (ports.rm_parentheses_crochets(pointcall_name)).value='"+item.replace('\'', '\'\'')+"'"
#plpy.notice(query)
rv = plpy.execute(query)
#plpy.notice(rv[0]["freq"])
result.append(dict(topo=item, freq=rv[0]["freq"]))
result = sorted(result, key=itemgetter('freq'), reverse=True)
else :
result = None
#plpy.notice (result)
return json.dumps(result, ensure_ascii=False)
$function$
"""
self.execute_sql(query)
self.execute_sql("ALTER FUNCTION ports.frequency_topo(text, text) OWNER TO dba")
self.execute_sql('RESET ROLE')
def updatePlace(self, config):
'''
alter table ports.port_points add column place_state text;
alter table ports.port_points add column place_admiralty text;
alter table ports.port_points add column place_province text;
update port_points pp set place_state = k.appartenances
from (
select uhgs_id, json_build_object('relations', array_agg(arelation)) as appartenances
from
(select uhgs_id, etat, json_build_object('relationType', 'gvp:broaderPartitive', 'relationTo',
case when geonameid is not null then 'http://www.geonames.org/'||geonameid else 'http://vocab.getty.edu/tgn/'||tgnid end,
'label', etat, 'when', json_build_object('timespans', array_agg(intervalle))) as arelation
from (
select uhgs_id, etat, geonameid, tgnid, json_build_object('start', json_build_object('in',dfrom), 'end', json_build_object('in',dto)) ::text as intervalle
from ports.etats
where dfrom is not null and dto is not null
union
(
select uhgs_id, etat, geonameid, tgnid, json_build_object('start', json_build_object('in',dfrom), 'end', json_build_object('in','*')) ::text as intervalle
from ports.etats
where dfrom is not null and dto is null
)
union
(
select uhgs_id, etat, geonameid, tgnid, json_build_object('start', json_build_object('in','*'), 'end', json_build_object('in',dto))::text as intervalle
from ports.etats
where dfrom is null and dto is not null
)
union
(
select uhgs_id, etat, geonameid, tgnid, json_build_object('start', json_build_object('in','*'), 'end', json_build_object('in','*'))::text as intervalle
from ports.etats
where dfrom is null and dto is null
)
) as k
group by uhgs_id, etat, geonameid, tgnid
) as k
group by uhgs_id
order by uhgs_id
) as k
where pp.uhgs_id = k.uhgs_id
-- 641
select distinct state_labels from port_points where place_state is null
-- france, iceland, Greece
/*
* {France}
{Iceland}
{"Hellenic Republic"} 390903
A0339882 Largentiere Greece {Greece}
A0146289 Islande Iceland {Iceland} 2629691
*/
update port_points pp set place_state = k.appartenances
from (
select uhgs_id, json_build_object('relations', array_agg(arelation)) as appartenances
from
(select uhgs_id, etat, json_build_object('relationType', 'gvp:broaderPartitive', 'relationTo',
'http://www.geonames.org/'||geonameid ,
'label', etat, 'when', json_build_object('timespans', array_agg(intervalle))) as arelation
from (
select uhgs_id, 'France' as etat, 3017382 as geonameid, json_build_object('start', json_build_object('in','*'), 'end', json_build_object('in','*'))::text as intervalle
from ports.port_points
where place_state is null and country2019_name = 'France'
union
(
select 'A0339882' as uhgs_id, 'Hellenic Republic' as etat, 390903 as geonameid, json_build_object('start', json_build_object('in','*'), 'end', json_build_object('in','*'))::text as intervalle
)
union
(
select 'A0146289' as uhgs_id, 'Iceland' as etat, 2629691 as geonameid, json_build_object('start', json_build_object('in','*'), 'end', json_build_object('in','*'))::text as intervalle
)
) as k
group by uhgs_id, etat, geonameid
) as k
group by uhgs_id
order by uhgs_id
) as k
where pp.uhgs_id = k.uhgs_id
-- 319
'''
def buildPortTable(self, config):
"""
A partir de la table geo_general,
cette fonction fabrique une table port qui liste pour chaque pointcall_uhgs_id (identifiant du port)
- le toponym le plus fréquent
- les autres toponymes
- l'appartenance aux nomenclatures (amirautés, provinces, fermes, pays actuels)
- les coordonnées géographiques
de ce port.
Testée le 03 juin 2020
"""
self.execute_sql("drop table if exists ports.port_points cascade");
query = """create table if not exists ports.port_points as (
select pointcall_uhgs_id as uhgs_id, latitude, longitude, null as amiraute, null as province,
array_agg(distinct (ports.rm_parentheses_crochets(pointcall_name)).value) as toustopos, (array_agg(distinct shippingarea))[1] as shiparea
from ports.geo_general
group by pointcall_uhgs_id, latitude, longitude
)"""
self.execute_sql(query)
# table qui va servir pour cartographier les ports issus de geo_general (extraction limitée à 1787 le G5 et Marseille) ; une ligne par code UHGS_id, la clé primaire, et les informations attenantes avec
self.execute_sql("comment on table ports.port_points is ' table used for mapping ports extracted from geo_general (only 1787 year, and G5 and Marseille sources) ; one line per UHGS_id code, the primary key, with associated data'")
#Calcul du toponyme le plus fréquent
self.execute_sql("alter table ports.port_points add column topofreq json")
self.execute_sql("alter table ports.port_points add column toponyme text")
query = """update ports.port_points set topofreq = ports.frequency_topo(uhgs_id, array_to_string(toustopos, ',')) where array_length(toustopos, 1) > 0 and ports.test_double_type(longitude) is true"""
self.execute_sql(query)
query = """update ports.port_points set toponyme = (topofreq::json->>0)::json->>'topo'"""
self.execute_sql(query)
#Récupérer les amirautés et province et shiparea déjà calcules
query = """update ports.port_points p set amiraute = pp.amiraute , province = pp.province, shiparea=pp.shiparea
from ports.port_points_old pp where pp.uhgs_id = p.uhgs_id """
self.execute_sql(query)
#ajouter un point (geometry)
self.execute_sql("alter table ports.port_points add column geom geometry")
query = """
update ports.port_points set geom = st_setsrid(st_makepoint(regexp_replace(longitude, ',', '.')::float, regexp_replace(latitude, ',', '.')::float), 4326)
"""
self.execute_sql(query)
self.execute_sql("alter table ports.port_points add column point3857 geometry")
query = """
update ports.port_points set point3857 = st_setsrid(st_transform(geom, 3857) , 3857)
"""
self.execute_sql(query)
#Appartenance aux pays actuels
self.execute_sql("alter table ports.world_borders add column mpolygone3857 geometry");
query = """update ports.world_borders set mpolygone3857 = st_setsrid(st_transform(geom, 3857), 3857)
where region in (150, 2, 19, 142, 9, 0) and iso2 <>'AQ' """
'''
-- 150 : Europe
-- 2 : Afrique
-- 19 : Amériques
-- 142 : Orient / Asie (de la turquie à l'inde)
-- 9 : Océanie
'''
self.execute_sql(query)
self.execute_sql("alter table ports.port_points add column country2019_name text")
self.execute_sql("alter table ports.port_points add column country2019_iso2code text")
self.execute_sql("alter table ports.port_points add column country2019_region text")
query = """ update ports.port_points ports set country2019_name = k.name, country2019_iso2code=k.iso2, country2019_region=k.region
from (
select uhgs_id, toponyme, shiparea, iso2, name, region
from ports.port_points p , ports.world_borders w
where st_contains(w.geom, p.geom)
) as k
where ports.uhgs_id = k.uhgs_id """
self.execute_sql(query)
self.updateCountry(35000)
self.updateCountry(50000)
self.updateCountry(100000)
self.updateCountry(150000)
# Cas particulier de Lampeduse qui n'est pas dans le fichier world_borders
query = """update ports.port_points set country2019_name='Italy', country2019_iso2code ='IT', country2019_region ='150'
where uhgs_id='A0249937'"""
self.execute_sql(query)
query = """select count(*) from ports.port_points where country2019_iso2code is null and point3857 is not null"""
rows = self.select_sql(query);
for rowk in rows:
self.logger.error('Nombre de ports.port_points sans country: '+str(rowk[0]))
print ('Nombre de ports.port_points sans country: ', str(rowk[0]))
#Appartenance aux états historiques
# Mise à jour des états avec geoname (TODO)
# self.updateStateGeonameid(config)
self.execute_sql("alter table ports.port_points add column state_labels text")
self.execute_sql("alter table ports.port_points add column state_geonameids text")
self.execute_sql("alter table ports.port_points add column state_labels_en text")
"""
* 1. attribuer l'agrégat des etats, geonameids, name_ens à  state_labels, state_geonameids, state_labels_en
* 2. attribuer l'état de country2019_name à  state_labels, state_geonameids, state_labels_en si state_labels est vide
* 3. attribuer la suite de relations d'appartenance et leurs date à  state
"""
# 1. attribuer l'agrégat des etats, geonameids, name_ens à  state_labels, state_geonameids, state_labels_en
query = """update ports.port_points p set state_labels=k.state_labels, state_geonameids=k.state_geonameids, state_labels_en=k.state_labels_en
from (
select p.uhgs_id, p.toponyme, p.country2019_name, array_agg( e.dfrom ||'-'|| e.dto) as aperiod, array_agg( distinct e.etat) as state_labels, array_agg(distinct e.geonameid) as state_geonameids, array_agg(distinct e.name_en) as state_labels_en
from ports.port_points p, ports.etats e
where p.uhgs_id =e.uhgs_id
-- and e.etat = 'Etats pontificaux'
group by p.uhgs_id, p.toponyme, p.country2019_name
order by aperiod
) as k
where p.uhgs_id = k.uhgs_id"""
self.execute_sql(query)
# 2. attribuer l'état de country2019_name à  state_labels, state_geonameids, state_labels_en si state_labels est vide
# Cas majoritaire de la France
query = """update ports.port_points p set state_labels=k.state_labels, state_geonameids=k.state_geonameids, state_labels_en=k.state_labels_en
from (
select p.uhgs_id, p.toponyme, p.country2019_name, array_agg(country2019_name) as state_labels, array_agg(3017382) as state_geonameids, array_agg(country2019_name) as state_labels_en
from ports.port_points p
where state_labels is null and p.country2019_name = 'France'
group by p.uhgs_id, p.toponyme, p.country2019_name
) as k
where p.uhgs_id = k.uhgs_id"""
self.execute_sql(query)
#Iceland
query = """ update ports.port_points p set state_labels=k.state_labels, state_geonameids=k.state_geonameids, state_labels_en=k.state_labels_en
from (
select p.uhgs_id, p.toponyme, p.country2019_name, array_agg(country2019_name) as state_labels, array_agg(2629691) as state_geonameids, array_agg(country2019_name) as state_labels_en
from ports.port_points p
where state_labels is null and p.country2019_name = 'Iceland'
group by p.uhgs_id, p.toponyme, p.country2019_name
) as k
where p.uhgs_id = k.uhgs_id and k.uhgs_id = 'A0146289'"""
self.execute_sql(query)
# Greece
query = """ update ports.port_points p set state_labels=k.state_labels, state_geonameids=k.state_geonameids, state_labels_en=k.state_labels_en
from (
select p.uhgs_id, p.toponyme, p.country2019_name, array_agg('Hellenic Republic'::text) as state_labels, array_agg(390903::int) as state_geonameids, array_agg('Hellenic Republic'::text) as state_labels_en
from ports.port_points p
where state_labels is null and p.country2019_name = 'Greece'
group by p.uhgs_id, p.toponyme, p.country2019_name
) as k
where p.uhgs_id = k.uhgs_id and k.uhgs_id = 'A0339882' """
self.execute_sql(query)
# 3. attribuer la suite de relations d'appartenance et leurs date à  state
self.execute_sql("alter table ports.port_points add column belonging_states text")
query = """update ports.port_points pp set belonging_states = '['||k.appartenances ||']'
from (
select uhgs_id, STRING_AGG(belonging, ',') as appartenances
from (
select uhgs_id, etat, case when dfrom is null then 1749 else dfrom end as orderingdate,
json_build_object((case when dfrom is null then 1749 else dfrom end)||'-'||(case when dto is null then 1815 else dto end), etat) :: text as belonging
from ports.etats
order by uhgs_id, orderingdate
) as k
group by uhgs_id
) as k
where pp.uhgs_id = k.uhgs_id"""
self.execute_sql(query)
query = """update ports.port_points pp set belonging_states = '['||k.appartenances ||']'
from (
select uhgs_id, json_build_object(1749 ||'-'||1815, 'France')::text as appartenances
from ports.port_points
where belonging_states is null and country2019_name = 'France'
union
(select 'A0339882' as uhgs_id, json_build_object(1749 ||'-'||1815, 'Hellenic Republic')::text as appartenances)
union
(select 'A0146289' as uhgs_id, json_build_object(1749 ||'-'||1815, 'Iceland')::text as appartenances)
) as k
where pp.uhgs_id = k.uhgs_id"""
self.execute_sql(query)
# Rajouter un identifiant entier unique pour l'export des features
self.execute_sql("alter table ports.port_points add column ogc_fid serial")
self.execute_sql("alter table ports.port_points add primary key (ogc_fid)")
# Le port est oblique ou non
self.execute_sql("alter table ports.port_points add column oblique boolean")
self.execute_sql("CREATE INDEX if not Exists trgm_toponyme_idx ON ports.port_points USING GIST (toponyme gist_trgm_ops)")
self.execute_sql(" update ports.port_points pp set oblique = true from ports.obliques o where pp.uhgs_id =o.uhgs_id");
# Le status du port : siège d'amiraute / oblique / null
self.execute_sql("alter table ports.port_points add column status text default null")
self.execute_sql("update ports.port_points set status = 'oblique' where oblique is true")
query = """update ports.port_points n set status = 'siège amirauté' from
(SELECT o.amiraute, toponyme, pp.ogc_fid, similarity(toponyme, o.amiraute) AS sml
FROM ports.port_points pp, ports.codes_levels o
WHERE oblique is true and o.amiraute % toponyme
and similarity(o.amiraute, toponyme) > 0.8
order by o.amiraute_code
) as q
where q.ogc_fid = n.ogc_fid"""
self.execute_sql(query)
query = """update ports.port_points n set status = 'siège amirauté' where uhgs_id in
('A0152606', 'A0196771', 'A0212320', 'A0135548', 'A0187101', 'A0173748', 'A0189004', 'A0138383', 'A0170986')"""
self.execute_sql(query)
self.execute_sql("alter table ports.port_points add column geonameid int default null")
#Récupérer les geonameid déjà calculés
query = """update ports.port_points p set geonameid = pp.geonameid
from ports.port_points_old pp where pp.uhgs_id = p.uhgs_id """
self.execute_sql(query)
def useGeonames(self, config) :
self.execute_sql("drop view IF EXISTS ports.myremote_geonamesplaces")
query = """create materialized VIEW ports.myremote_geonamesplaces AS
SELECT *
FROM dblink('dbname=geonames user=postgres password=postgres options=-csearch_path=',
'select geonameid::int, name, (feature_class||''.''||feature_code) as feature_code, alternatenames, country_code, latitude::float, longitude::float , point3857
from geonames.geonames_nov2019.allcountries a
where feature_class||''.''||feature_code in (select code from geonames.geonames_nov2019.feature_code where keep = ''x'')')
AS t1(geonameid int, name text, feature_code text, alternatenames text , country_code text, latitude float, longitude float, point3857 geometry)"""
# --- 34 s, 5 012 322 lignes
self.execute_sql(query)
self.execute_sql("grant select on ports.myremote_geonamesplaces to api_user")
self.execute_sql("CREATE INDEX trgm_geoname_name_idx ON ports.myremote_geonamesplaces USING GIST (name gist_trgm_ops)");
#-- 1 min 22s
self.execute_sql("CREATE INDEX gist_geoname_point_idx ON ports.myremote_geonamesplaces USING GIST (point3857)");
#-- 55 s
def computeMatching(self, config) :
"""
Compute the geoname id of each geo_general point
not tested yet
"""
self.execute_sql("drop table IF EXISTS matching_port")
query = """ create table if not exists ports.matching_port (
source1 text,
source2 text,
id1 int,
id2 int,
uhgs_id text,
topo1 text,
topo2 text,
simtext float,
distgeo float,
best boolean default false,
certainity int default 1)"""
self.execute_sql(query)
query = """comment on table ports.matching_port is 'table de calcul des meilleurs appariements entre les ports de géogenéral - port_points, geonames,
et les ports saisis - saisie_13fev2020'"""
self.execute_sql(query)
query = """comment on column ports.matching_port.certainity is 'Certitude levels : 2 is less certain (multiple representation for instance) than 1'"""
self.execute_sql(query)
# Certitude levels : 2 is less certain (multiple representation for instance) than 1
# Tous les lieux de geonames à moins de 5 km
query = """insert into matching_port(source1, id1, uhgs_id, topo1, source2, id2, topo2, distgeo, simtext)
select 'geo_general' as source1, pp.ogc_fid as id1, pp.uhgs_id, pp.toponyme, 'geonames' as source2, s.geonameid, s.toponyme , st_distance(s.point3857, pp.point3857 ), similarity(s.toponyme, pp.toponyme)
from
(select ogc_fid, uhgs_id, unnest(toustopos) as toponyme, point3857 from ports.port_points where geonameid is null) as pp,
(select geonameid, name as toponyme, point3857 from myremote_geonamesplaces ) as s
where st_distance(s.point3857, pp.point3857 ) < 5000
"""