Commit ff1275b9 authored by Christine Plumejeaud's avatar Christine Plumejeaud
Browse files

a script to generate all descriptions, and save them into both the database...

a script to generate all descriptions, and save them into both the database (table gazetteer) and in a file to download and keep on GIT.
parent 1b55f2f0
##
## Auteur : Christine Plumejeaud, 28 octobre 2020
## FICHIER DE CONFIGURATION
## A utiliser avec : recordgaz.py
##
#################################################################################################################
##
## Coordonnées de la base postgres dans laquelle on importe les données
##
#################################################################################################################
[base]
host=localhost
#134.158.33.179
port=5432
#port=8004
dbname=portic_v5
#user=navigo
user=postgres
password=postgres
#################################################################################################################
##
## Fichiers de log
## https://docs.python.org/2/library/logging.html
## levels
## ERROR 40
## WARNING 30
## INFO 20
## DEBUG 10
##
#################################################################################################################
[log]
# Fichier de log pour garder une trace des opérations réalisées (il écrase le précédent si il porte le même nom)
file=C:\\Travail\\Dev\\portic_humanum\\gazetteer\\log_28oct2020.txt
level=10
#################################################################################################################
##
## HTTP config
##
#################################################################################################################
[proxy]
## At ULR, you have to use proxy (True), else not (False)
need=True
## Adresse du proxy : IP:port
http_proxy=10.1.30.18:3128
#################################################################################################################
##
## Check outputs
##
#################################################################################################################
[outputs]
geojson_output=C:\\Travail\\Dev\\portic_humanum\\gazetteer\\gazetteer_portic.geojson
# -*- coding: utf-8 -*-
'''
Created on 06 june 2020
@author: cplumejeaud
ANR PORTIC : used to build tables for ports : port_points
'''
from __future__ import nested_scopes
import logging
import configparser
import xlsxwriter
import os
import psycopg2 as pg
#from os import sys, path
import sys, traceback
import json
#import saveGazetteer from porticgaz
class RecordGaz(object):
def __init__(self, config):
"""
Ouvre les fichiers de log et une connexion à la base de données (en fonction des paramètres de config)
"""
## Ouvrir le fichier de log
logging.basicConfig(filename=config.get('log', 'file'), level=int(config.get('log', 'level')), filemode='w')
self.logger = logging.getLogger('BuildPorts')
self.logger.debug('log file for DEBUG')
self.logger.info('log file for INFO')
self.logger.warning('log file for WARNINGS')
self.logger.error('log file for ERROR')
#self = LoadFilemaker(config)
## Open both a ssh connexion for copy/remove, and a tunnel for postgres connexion
self.postgresconn = self.open_connection(config)
def retrieveDataFromPostgres(self, query) :
'''
Internal method to select data using SQL query
return a dataframe
'''
import pandas.io.sql as psql
import pandas as pd
#connection = pg.connect("host='134.158.33.179' port='5433' dbname='portic_v3' user='api_user' password='portic'")
#connection = pg.connect("""host='localhost' port='%s' dbname='%s' user='api_user' password='portic'"""% (postgresport, database))
connection = pg.connect("host='localhost' port='5432' dbname='portic_v5' user='postgres' password='postgres'")
#ssh -N -L 8004:localhost:5433 -v navigo@134.158.33.179
df = pd.read_sql_query(query,con=connection)
connection.close()
return df
#print(df)
def close_connection(self):
'''
Cleanly close DB connection
:param postgresconn:
:return:
'''
if self.postgresconn is not None:
self.postgresconn.close()
def open_connection(self, config):
'''
Open database connection with Postgres
:param config:
:return:
'''
# Acceder aux parametres de configuration
host = config.get('base', 'host')
port = config.get('base', 'port')
dbname = config.get('base', 'dbname')
user = config.get('base', 'user')
password = config.get('base', 'password')
# schema = config.get('base', 'schema')
driverPostgres = 'host=' + host + ' port=' + port + ' user=' + user + ' dbname=' + dbname + ' password=' + password
self.logger.debug(driverPostgres)
conn = None
try:
conn = pg.connect(driverPostgres)
except Exception as e:
self.logger.error("I am unable to connect to the database. " + str(e))
# Test DB
if conn is not None:
cur = conn.cursor()
cur.execute('select count(*) from pg_namespace')
result = cur.fetchone()
if result is None:
print('open_connection Failed to get count / use of database failed')
else:
print('open_connection Got database connexion : ' + str(result[0]))
else:
print('open_connection Failed to get database connexion')
return conn
def execute_sql(self, sql_query):
cur = self.postgresconn.cursor()
try:
cur.execute(sql_query)
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
print(e)
print(repr(traceback.format_exception(exc_type, exc_value, exc_traceback)))
self.logger.error(sql_query)
cur.close()
self.postgresconn.commit()
def saveGazetteer(self, filename = 'gazetteer_portic.geojson'):
import porticgaz
output = open(filename, "w", encoding="utf-8")
output.write('[')
query = """select distinct province from ports.port_points where province is not null"""
results = self.retrieveDataFromPostgres(query)
for k in results['province'].tolist() :
kind_of_entity = 'province'
print(k)
desc = porticgaz.describePlace(kind_of_entity, k)
query = """insert into ports.gazetteer (pkid, linked_place_desc) values ('%s', '%s'::jsonb)""" %(k, desc.replace("'", "''"))
self.execute_sql(query)
output.write(desc)
output.write(',')
query = """select distinct amiraute from ports.port_points where amiraute is not null"""
results = self.retrieveDataFromPostgres(query)
for k in results['amiraute'].tolist() :
kind_of_entity = 'amiraute'
print(k)
desc = porticgaz.describePlace(kind_of_entity, k)
query = """insert into ports.gazetteer (pkid, linked_place_desc) values ('%s', '%s'::jsonb)""" %(k, desc.replace("'", "''"))
self.execute_sql(query)
output.write(desc)
output.write(',')
query = """select uhgs_id, toponyme from ports.port_points where uhgs_id is not null"""
results = self.retrieveDataFromPostgres(query)
#print(results['uhgs_id'].tolist())
for k in results['uhgs_id'].tolist() :
kind_of_entity = 'port'
print(k)
desc = porticgaz.describePlace(kind_of_entity, k)
query = """insert into ports.gazetteer (pkid, linked_place_desc) values ('%s', '%s'::jsonb)""" %(k, desc.replace("'", "''"))
#print(query)
self.execute_sql(query)
output.write(desc)
output.write(',')
output.write(']')
output.close()
if __name__ == '__main__':
# Passer en parametre le nom du fichier de configuration
# configfile = sys.argv[1]
configfile = 'config.txt'
config = configparser.RawConfigParser()
config.read(configfile)
print("Fichier de LOGS : " + config.get('log', 'file'))
r = RecordGaz(config)
r.saveGazetteer(config.get('outputs', 'geojson_output'))
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment