Calling Talend Cloud Rest API from Apache Drill via sql
2022-02-04
Abstract
I’ll show how to connect to Talend Cloud API via sql using Apache Drill.
Download
Download Apache Drill from https://drill.apache.org/download/
Configure
Create or edit the file conf/storage-plugins-override.conf
"storage": {
"talendcloud" : {
"type" : "http",
"cacheResults" : true,
"connections" : {
"get" : {
"url" : "https://api.eu.cloud.talend.com",
"method" : "GET",
"headers" : {
"Authorization" : "Bearer MYSECRETKEY"
},
"authType" : "none",
"userName" : null,
"password" : null,
"postBody" : null,
"params" : ["query"],
"dataPath" : null,
"requireTail" : true,
"inputType" : "json",
"xmlDataLevel" : 1
}
},
"proxyType" : "direct",
"enabled" : true
}
}
Start Drill
Run drill with
./bin/drill-embedded
Now the system is started and you have a command line sql interface. There is also a web ui at http://localhost:8047/ where you can run queries or add/change storages/connections instead of editing the previous file (http://localhost:8047/storage)
Queries
Clusters and remote engines
alter session set `store.json.all_text_mode` = true;
select
engine.*, clusters.*
from
talendcloud.get.`/tmc/v2.6/runtimes/remote-engines` as engine
inner join
talendcloud.get.`/tmc/v2.6/runtimes/remote-engine-clusters` as clusters
on engine.clusterId = clusters.id
where
engine.query='workspace.environment.name==PRD;status==PAIRED;workspace.name==DATA' limit 1;
The output is
createDate 2021-06-10T09:40:37.676Z
updateDate 2021-06-10T10:09:47.780Z
id 646464
name AWS_PRD_02
workspace {"id":"8886","name":"DATA","owner":"matteo","type":"custom","environment":{"id":"8787","name":"PRD","default":"false"}}
runtimeId 4434
availability AVAILABLE
status PAIRED
runProfiles ["TALEND_RUNTIME","MICROSERVICE","JOB_SERVER"]
debug {}
clusterId 6565
esbCompatibilityVersion 7.3.1
description null
createDate0 2021-06-30T15:16:43.538Z
updateDate0 2021-06-30T15:16:43.538Z
id0 5454
name0 AWS_CLUSTER
description0 Remote Engine Cluster - Produzione AWS
workspace0 {"id":"32323","name":"DATA","owner":"matteo","type":"custom","environment":{"id":"5454","name":"PRD","default":"false"}}
runtimeId0 1112
availability0 AVAILABLE
Users and groups
From the TMC it id not possibile to extract a csv with users and groups. The following queries can help.
select
g.name,
u.email
from
(select name, flatten(userIds) as uid
from talendcloud.get.`v1/management/groups`) g
inner join
talendcloud.get.`v1/management/users` u
on
g.uid = u.id
order by
g.name,
u.mail;
with users as
(select email, flatten(roles) role from talendcloud.get.`v1/management/users`)
select
users.email as email,
users.role.name as role
from users
where
users.role.name like 'COMPANY1%' or users.role.name like 'COMPANY2%';