Calling Talend Cloud Rest API from Apache Drill via sql

2022-02-04

Abstract

I’ll show how to connect to Talend Cloud API via sql using Apache Drill.

drill

Download

Download Apache Drill from https://drill.apache.org/download/

Configure

Create or edit the file conf/storage-plugins-override.conf

"storage": {
  "talendcloud" : {
	"type" : "http",
	"cacheResults" : true,
	"connections" : {
	  "get" : {
		"url" : "https://api.eu.cloud.talend.com",
		"method" : "GET",
		"headers" : {
		  "Authorization" : "Bearer MYSECRETKEY"
		},
		"authType" : "none",
		"userName" : null,
		"password" : null,
		"postBody" : null,
		"params" : ["query"],
		"dataPath" : null,
		"requireTail" : true,
		"inputType" : "json",
		"xmlDataLevel" : 1
	  }
	},
	"proxyType" : "direct",
	"enabled" : true
  }
}

Start Drill

Run drill with

./bin/drill-embedded

Now the system is started and you have a command line sql interface. There is also a web ui at http://localhost:8047/ where you can run queries or add/change storages/connections instead of editing the previous file (http://localhost:8047/storage)

Queries

Clusters and remote engines

alter session set `store.json.all_text_mode` = true;

select
	engine.*, clusters.*
from
	talendcloud.get.`/tmc/v2.6/runtimes/remote-engines` as engine
inner join
	talendcloud.get.`/tmc/v2.6/runtimes/remote-engine-clusters` as clusters
on engine.clusterId = clusters.id
where
	engine.query='workspace.environment.name==PRD;status==PAIRED;workspace.name==DATA' limit 1;

The output is

createDate               2021-06-10T09:40:37.676Z
updateDate               2021-06-10T10:09:47.780Z
id                       646464
name                     AWS_PRD_02
workspace                {"id":"8886","name":"DATA","owner":"matteo","type":"custom","environment":{"id":"8787","name":"PRD","default":"false"}}
runtimeId                4434
availability             AVAILABLE
status                   PAIRED
runProfiles              ["TALEND_RUNTIME","MICROSERVICE","JOB_SERVER"]
debug                    {}
clusterId                6565
esbCompatibilityVersion  7.3.1
description              null
createDate0              2021-06-30T15:16:43.538Z
updateDate0              2021-06-30T15:16:43.538Z
id0                      5454
name0                    AWS_CLUSTER
description0             Remote Engine Cluster - Produzione AWS
workspace0               {"id":"32323","name":"DATA","owner":"matteo","type":"custom","environment":{"id":"5454","name":"PRD","default":"false"}}
runtimeId0               1112
availability0            AVAILABLE

Users and groups

From the TMC it id not possibile to extract a csv with users and groups. The following queries can help.

select
	g.name,
	u.email
from
	(select name, flatten(userIds) as uid
	from talendcloud.get.`v1/management/groups`) g
inner join
	  talendcloud.get.`v1/management/users` u
on
	g.uid = u.id
order by
	g.name,
	u.mail;
with users as
	 (select email, flatten(roles) role from talendcloud.get.`v1/management/users`)
select
	users.email as email,
	users.role.name as role
from users
where
	users.role.name like 'COMPANY1%' or users.role.name like 'COMPANY2%';

Enter your instance's address


More posts like this