Commit 75556d65 authored by Emmanuel Raviart's avatar Emmanuel Raviart

Initial commit

parents
Pipeline #117 failed with stages
in 21 seconds
{
"presets": [
"@babel/preset-env"
],
"plugins": [
"@babel/plugin-syntax-dynamic-import",
"@babel/plugin-syntax-import-meta",
"@babel/plugin-proposal-class-properties",
"@babel/plugin-proposal-json-strings",
[
"@babel/plugin-proposal-decorators",
{
"legacy": true
}
],
"@babel/plugin-proposal-function-sent",
"@babel/plugin-proposal-export-namespace-from",
"@babel/plugin-proposal-numeric-separator",
"@babel/plugin-proposal-throw-expressions",
"@babel/plugin-proposal-export-default-from",
"@babel/plugin-proposal-logical-assignment-operators",
"@babel/plugin-proposal-optional-chaining",
[
"@babel/plugin-proposal-pipeline-operator",
{
"proposal": "minimal"
}
],
"@babel/plugin-proposal-nullish-coalescing-operator",
"@babel/plugin-proposal-do-expressions",
"@babel/plugin-proposal-function-bind"
]
}
{
"env": {
"node": true
},
"globals": {
"Promise": true,
"Set": true
},
"parser": "babel-eslint",
"plugins": [
"html"
],
"rules": {
"comma-dangle": ["error", "always-multiline"],
"extends": "eslint:recommended",
"max-len": ["error", {"code": 120, "ignoreRegExpLiterals": true, "ignoreUrls": true, "tabWidth": 2}],
"no-unused-expressions": 2,
"no-use-before-define": ["error", "nofunc"],
"quotes": ["error", "double", "avoid-escape"],
"semi": ["error", "never"]
}
}
node_modules/
\ No newline at end of file
# DFIH-Wikibase
_Wikibase bots for the "Données financières historiques (DFIH)" project_
By: Emmanuel Raviart <emmanuel@raviart.com>
Copyright (C) 2018 École d’économie de Paris (PSE)
https://gitlab.huma-num.fr/eurhisfirm/dfih-wikibase
> DFIH-Wikibase is free software; you can redistribute it and/or modify
> it under the terms of the GNU Affero General Public License as
> published by the Free Software Foundation, either version 3 of the
> License, or (at your option) any later version.
>
> DFIH-Wikibase is distributed in the hope that it will be useful,
> but WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU Affero General Public License for more details.
>
> You should have received a copy of the GNU Affero General Public License
> along with this program. If not, see <http://www.gnu.org/licenses/>.
# DFIH-Wikibase
_Wikibase bots for the "Données financières historiques (DFIH)" project_
## Install
```bash
git clone https://gitlab.huma-num.fr/eurhisfirm/dfih-wikibase.git
cd dfih-wikibase/
```
Edit `src/config.js` to change database informations. Then
```bash
npm install
```
## Update Wikibase with DFIH issuers
```bash
node_modules/.bin/babel-node src/scripts/generate_wikibase_issuers.js
```
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"name": "dfih-wikibase",
"version": "0.0.1",
"description": "Wikibase bots for the \"Données financières historiques (DFIH)\" project",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "https://framagit.org/eurhisfirm/dfih-wikibase.git"
},
"keywords": [
"bot",
"dfih",
"wikibase",
"wikidata"
],
"author": "Emmanuel Raviart <emmanuel@raviart.com>",
"license": "AGPL-3.0-or-later",
"dependencies": {
"js-levenshtein": "^1.1.4",
"oracledb": "^3.0.0",
"request": "^2.88.0",
"request-promise-native": "^1.0.5",
"url-resolve": "^1.0.0"
},
"devDependencies": {
"@babel/cli": "^7.1.2",
"@babel/core": "^7.1.2",
"@babel/node": "^7.0.0",
"@babel/plugin-proposal-class-properties": "^7.1.0",
"@babel/plugin-proposal-decorators": "^7.1.2",
"@babel/plugin-proposal-do-expressions": "^7.0.0",
"@babel/plugin-proposal-export-default-from": "^7.0.0",
"@babel/plugin-proposal-export-namespace-from": "^7.0.0",
"@babel/plugin-proposal-function-bind": "^7.0.0",
"@babel/plugin-proposal-function-sent": "^7.1.0",
"@babel/plugin-proposal-json-strings": "^7.0.0",
"@babel/plugin-proposal-logical-assignment-operators": "^7.0.0",
"@babel/plugin-proposal-nullish-coalescing-operator": "^7.0.0",
"@babel/plugin-proposal-numeric-separator": "^7.0.0",
"@babel/plugin-proposal-optional-chaining": "^7.0.0",
"@babel/plugin-proposal-pipeline-operator": "^7.0.0",
"@babel/plugin-proposal-throw-expressions": "^7.0.0",
"@babel/plugin-syntax-dynamic-import": "^7.0.0",
"@babel/plugin-syntax-import-meta": "^7.0.0",
"@babel/preset-env": "^7.1.0",
"babel-eslint": "^10.0.1",
"eslint": "^5.8.0"
}
}
import { validateConfig } from "./validators/config"
const config = {
database: {
user: "monitor",
password: "monitor",
connectString: "129.199.194.26/dbdfih",
},
wikibase: {
url: "http://localhost/mediawiki/",
user: "Eraviart@GenerateIssuersBot",
password: "gn64jfuasr9blf82tp8ejtpjlnt9u3d2",
},
}
const [validConfig, error] = validateConfig(config)
if (error !== null) {
console.error(
`Error in configuration:\n${JSON.stringify(validConfig, null, 2)}\nError:\n${JSON.stringify(error, null, 2)}`
)
process.exit(-1)
}
export default validConfig
import oracledb from "oracledb"
import config from "./config"
const databaseConfig = config.database
export let pool = null
export async function createPool() {
try {
pool = await oracledb.createPool({
user: databaseConfig.user,
password: databaseConfig.password,
connectString: databaseConfig.connectString,
// Default values shown below:
// events: false, // whether to handle Oracle Database FAN and RLB events
// externalAuth: false, // whether connections should be established using External Authentication
// poolAlias: 'myalias' // set an alias to allow access to the pool via a name.
// poolIncrement: 1, // only grow the pool by one connection at a time
poolMax: 10, // poolMax: 4, maximum size of the pool. Increase UV_THREADPOOL_SIZE if you increase poolMax
poolMin: 5, // poolMin: 0, // start with no connections; let the pool shrink completely
// poolPingInterval: 60, // check aliveness of connection if in the pool for 60 seconds
// poolTimeout: 60, // terminate connections that are idle in the pool for 60 seconds
queueTimeout: 0, // terminate getConnection() calls in the queue longer than 60000 milliseconds
// stmtCacheSize: 30 // number of statements that are cached in the statement cache of each connection
})
return pool
} catch (error) {
console.log("An error occured while calling oracledb.createPool:\n", error.stack || error)
throw error
}
}
export function cleanUpLine(text) {
text = text.trim()
if (text.startsWith('"') && text.endsWith('"')) {
text = text.substring(1, text.length - 1).trim()
}
return text
.split(/\s+/)
.join(" ")
}
export function objectFromCouples(couples) {
return couples.reduce((accumulator, [key, value]) => {
accumulator[key] = value
return accumulator
}, {})
}
export function objectsFromSqlResult({ metaData, rows }) {
return rows.map(row => {
return metaData.map((keyInfo, index) => [keyInfo.name, row[index]]).reduce((accumulator, [key, value]) => {
accumulator[key.toLowerCase()] = value
return accumulator
}, {})
})
}
import assert from "assert"
import levenshtein from "js-levenshtein"
// import oracledb from "oracledb"
import rpn from "request-promise-native"
import resolveUrl from "url-resolve"
import { config } from "../config"
import { createPool } from "../database"
import { cleanUpLine, objectsFromSqlResult } from "../helpers"
let pool = null
async function generateIssuers() {
const connection = await pool.getConnection()
try {
// Use cookies.
const request = rpn.defaults({ jar: true })
const issuerById = {}
let result = null
console.log("Login...")
result = await request.post(
resolveUrl(config.wikibase.url, "api.php?action=query&format=json&meta=tokens&type=login"),
{
form: {
action: "query",
format: "json",
meta: "tokens",
type: "login",
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
const loginToken = result.query.tokens.logintoken
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "login",
format: "json",
lgname: config.wikibase.user,
lgpassword: config.wikibase.password,
lgtoken: loginToken,
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "query",
format: "json",
meta: "tokens",
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
const csrfToken = result.query.tokens.csrftoken
// Retrieve or create property "identifiant DFIH".
let dfihIdPropertyId = null
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "wbsearchentities",
format: "json",
language: "en",
limit: 2,
search: "DFIH ID",
type: "property",
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
assert(result.search.length <= 1, 'Too much properties named "DFIH ID"')
if (result.search.length > 0) {
dfihIdPropertyId = result.search[0].id
} else {
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "wbeditentity",
data: JSON.stringify({
datatype: "external-id",
descriptions: [
{
language: "en",
value: "identifier issued by DFIH.fr web site",
},
{
language: "fr",
value: "identifiant émis par le web site DFIH.fr",
},
],
labels: [
{
language: "en",
value: "DFIH ID",
},
{
language: "fr",
value: "identifiant DFIH",
},
],
}),
format: "json",
new: "property",
token: csrfToken,
},
json: true,
},
)
console.log(JSON.stringify(result, null, 2))
dfihIdPropertyId = result.entity.id
}
console.log("Loading names...")
{
// Add corporation_name.name to issuers names.
const entries = objectsFromSqlResult(
await connection.execute(
`
select
cn.corporation,
cn.name
from corporation_name cn
where cn.corporation in (
select distinct corporation
from stock_corporation sc
join stock_name sn on sc.stock = sn.stock
where sn.name <> 'STOCK_FICTIF'
)
`
)
)
for (let entry of entries) {
let issuer = issuerById[entry.corporation]
if (issuer === undefined) {
issuer = issuerById[entry.corporation] = { id: entry.corporation }
}
let names = issuer.names
if (names === undefined) {
names = issuer.names = new Set()
}
names.add(cleanUpLine(entry.name))
}
}
{
// Add corporation_true_name.truename to issuers names.
const entries = objectsFromSqlResult(
await connection.execute(
`
select
ctn.corporation,
ctn.truename
from corporation_true_name ctn
where ctn.corporation in (
select distinct corporation
from stock_corporation sc
join stock_name sn on sc.stock = sn.stock
where sn.name <> 'STOCK_FICTIF'
)
`
)
)
for (let entry of entries) {
let issuer = issuerById[entry.corporation]
if (issuer === undefined) {
issuer = issuerById[entry.corporation] = { id: entry.corporation }
}
let names = issuer.names
if (names === undefined) {
names = issuer.names = new Set()
}
names.add(cleanUpLine(entry.truename))
}
}
// Convert issuers names from set to sorted array and compute best names.
for (let issuer of Object.values(issuerById)) {
let bestName = null
let minDistance = Number.MAX_SAFE_INTEGER
if (issuer.names !== undefined) {
issuer.names = [...issuer.names].sort()
for (let name of issuer.names) {
let distance = 0
for (let otherName of issuer.names) {
distance += levenshtein(name, otherName)
}
if (distance < minDistance || distance === minDistance && name.length < bestName.length) {
bestName = name
minDistance = distance
}
}
}
if (bestName !== null) {
issuer.name = bestName
}
}
// Add issuer names to Wikibase.
console.log("Updating or creating issuers items...")
for (let issuer of Object.values(issuerById)) {
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "wbgetentities",
format: "json",
sites: "dfih",
titles: `issuers/${issuer.id}`,
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
assert(result.entities)
assert(Object.keys(result.entities).length === 1)
let item = Object.values(result.entities)[0]
if (item.missing !== undefined) {
// Create missing Wikibase item for issuer.
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "wbeditentity",
data: JSON.stringify({
sitelinks: [{
site: "dfih",
title: `issuers/${issuer.id}`,
}],
}, null, 2),
format: "json",
new: "item",
token: csrfToken,
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
item = result.entity
}
result = await request.post(
resolveUrl(config.wikibase.url, "api.php"),
{
form: {
action: "wbeditentity",
data: JSON.stringify({
aliases: issuer.names
.filter(name => name !== issuer.name)
.map(name => {
return {
add: "",
language: "fr",
value: name,
}
}),
descriptions: [{
language: "fr",
value: "Émetteur",
}],
labels: [{
language: "fr",
value: issuer.name,
}],
}, null, 2),
format: "json",
id: item.id,
token: csrfToken,
},
json: true,
},
)
// console.log(JSON.stringify(result, null, 2))
console.log(issuer.id, issuer.name, item.id)
}
} finally {
connection.close()
}
}
createPool()
.then(newPool => {
pool = newPool
generateIssuers().catch(error => {
console.log(error.stack || error)
process.exit(1)
})
})
.catch(error => {
console.log(error.stack || error)
process.exit(1)
})
import fs from "fs"
import path from "path"
import { validateNonEmptyTrimmedString } from "./core"
export function validateConfig(config) {
if (config === null || config === undefined) {
return [config, "Missing config"]
}
if (typeof config !== "object") {
return [config, `Expected an object got "${typeof config}"`]
}
config = { ...config }
const errors = {}
const remainingKeys = new Set(Object.keys(config))
{
const key = "database"
if (remainingKeys.delete(key)) {
const [value, error] = validateDatabase(config[key])
config[key] = value
if (error !== null) {
errors[key] = error
}
} else {
errors[key] = "Missing item"
}
}
{
const key = "wikibase"
if (remainingKeys.delete(key)) {
const [value, error] = validateWikibase(config[key])
config[key] = value
if (error !== null) {
errors[key] = error
}
} else {
errors[key] = "Missing item"
}
}
for (let key of remainingKeys) {
errors[key] = "Unexpected item"
}
return [config, Object.keys(errors).length === 0 ? null : errors]
}
function validateDatabase(database) {
if (database === null || database === undefined) {
return [database, "Missing value"]
}
if (typeof database !== "object") {
return [database, `Expected an object got "${typeof database}"`]
}
database = { ...database }
const errors = {}
const remainingKeys = new Set(Object.keys(database))
for (let key of ["connectString", "password", "user"]) {
if (remainingKeys.delete(key)) {
const [value, error] = validateNonEmptyTrimmedString(database[key])
database[key] = value
if (error !== null) {
errors[key] = error
}
} else {
errors[key] = "Missing item"
}
}
for (let key of remainingKeys) {
errors[key] = "Unexpected item"
}
return [database, Object.keys(errors).length === 0 ? null : errors]
}
function validateWikibase(wikibase) {
if (wikibase === null || wikibase === undefined) {
return [wikibase, "Missing value"]
}
if (typeof wikibase !== "object") {
return [wikibase, `Expected an object got "${typeof wikibase}"`]
}
wikibase = { ...wikibase }
const errors = {}
const remainingKeys = new Set(Object.keys(wikibase))
for (let key of ["password", "url", "user"]) {
if (remainingKeys.delete(key)) {
const [value, error] = validateNonEmptyTrimmedString(wikibase[key])
wikibase[key] = value
if (error !== null) {
errors[key] = error
}
} else {
errors[key] = "Missing item"
}
}
for (let key of remainingKeys) {
errors[key] = "Unexpected item"
}
return [wikibase, Object.keys(errors).length === 0 ? null : errors]
}
export function validateMayBeTrimmedString(value) {
if (value === null || value === undefined) {
return [null, null]
}
if (typeof value !== "string") {
return [value, `Expected a string got "${typeof value}"`]
}
value = value.trim()
if (!value) {
return [null, null]
}
return [value, null]
}