umami/src/lib/clickhouse.ts

237 lines
5.8 KiB
TypeScript
Raw Normal View History

2023-09-29 18:00:06 +00:00
import { ClickHouseClient, createClient } from '@clickhouse/client';
2024-08-17 02:44:16 +00:00
import { formatInTimeZone } from 'date-fns-tz';
2022-08-28 04:38:35 +00:00
import debug from 'debug';
import { CLICKHOUSE } from 'lib/db';
2024-08-02 05:57:54 +00:00
import { DEFAULT_PAGE_SIZE, OPERATORS } from './constants';
2023-08-04 20:18:30 +00:00
import { maxDate } from './date';
2024-08-02 05:57:54 +00:00
import { fetchWebsite } from './load';
2024-04-03 00:06:06 +00:00
import { filtersToArray } from './params';
2024-08-02 05:57:54 +00:00
import { PageParams, QueryFilters, QueryOptions } from './types';
2022-08-28 04:38:35 +00:00
export const CLICKHOUSE_DATE_FORMATS = {
2024-08-16 07:35:08 +00:00
utc: '%Y-%m-%dT%H:%i:%SZ',
second: '%Y-%m-%d %H:%i:%S',
minute: '%Y-%m-%d %H:%i:00',
hour: '%Y-%m-%d %H:00:00',
day: '%Y-%m-%d',
month: '%Y-%m-01',
year: '%Y-01-01',
2022-08-28 04:38:35 +00:00
};
2022-08-29 03:20:54 +00:00
const log = debug('umami:clickhouse');
2022-08-26 05:04:32 +00:00
2023-09-29 18:00:06 +00:00
let clickhouse: ClickHouseClient;
2022-10-06 22:00:16 +00:00
const enabled = Boolean(process.env.CLICKHOUSE_URL);
2022-08-26 05:04:32 +00:00
function getClient() {
2022-08-28 04:38:35 +00:00
const {
hostname,
port,
pathname,
protocol,
2022-08-28 04:38:35 +00:00
username = 'default',
password,
} = new URL(process.env.CLICKHOUSE_URL);
2023-09-29 18:00:06 +00:00
const client = createClient({
url: `${protocol}//${hostname}:${port}`,
2023-09-29 18:00:06 +00:00
database: pathname.replace('/', ''),
username: username,
password,
2022-08-26 05:04:32 +00:00
});
2022-08-28 04:38:35 +00:00
if (process.env.NODE_ENV !== 'production') {
2022-08-29 17:47:01 +00:00
global[CLICKHOUSE] = client;
2022-08-28 04:38:35 +00:00
}
2022-08-26 05:04:32 +00:00
2022-08-28 04:38:35 +00:00
log('Clickhouse initialized');
2022-08-26 05:04:32 +00:00
2022-08-28 04:38:35 +00:00
return client;
}
2022-08-26 05:04:32 +00:00
2024-08-17 02:44:16 +00:00
function getUTCString(date?: Date) {
return formatInTimeZone(date || new Date(), 'UTC', 'yyyy-MM-dd HH:mm:ss');
}
2024-08-16 07:35:08 +00:00
function getDateStringSQL(data: any, unit: string = 'utc', timezone?: string) {
2024-07-09 06:41:40 +00:00
if (timezone) {
return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}', '${timezone}')`;
}
2022-08-26 05:04:32 +00:00
return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`;
}
function getDateSQL(field: string, unit: string, timezone?: string) {
2022-08-26 05:04:32 +00:00
if (timezone) {
return `date_trunc('${unit}', ${field}, '${timezone}')`;
}
return `date_trunc('${unit}', ${field})`;
}
2024-03-27 00:31:16 +00:00
function mapFilter(column: string, operator: string, name: string, type: string = 'String') {
const value = `{${name}:${type}}`;
2024-03-27 00:31:16 +00:00
switch (operator) {
2023-08-11 16:05:56 +00:00
case OPERATORS.equals:
return `${column} = ${value}`;
2023-08-11 16:05:56 +00:00
case OPERATORS.notEquals:
return `${column} != ${value}`;
2024-03-05 08:45:55 +00:00
case OPERATORS.contains:
return `positionCaseInsensitive(${column}, ${value}) > 0`;
case OPERATORS.doesNotContain:
return `positionCaseInsensitive(${column}, ${value}) = 0`;
2023-08-11 16:05:56 +00:00
default:
return '';
}
}
function getFilterQuery(filters: QueryFilters = {}, options: QueryOptions = {}) {
2024-04-03 00:06:06 +00:00
const query = filtersToArray(filters, options).reduce((arr, { name, column, operator }) => {
if (column) {
arr.push(`and ${mapFilter(column, operator, name)}`);
2022-08-26 05:04:32 +00:00
2024-04-03 00:06:06 +00:00
if (name === 'referrer') {
2023-08-11 16:05:56 +00:00
arr.push('and referrer_domain != {websiteDomain:String}');
}
2023-08-04 07:51:52 +00:00
}
2022-08-26 05:04:32 +00:00
return arr;
}, []);
return query.join('\n');
}
function getDateQuery(filters: QueryFilters = {}) {
const { startDate, endDate, timezone } = filters;
if (startDate) {
if (endDate) {
if (timezone) {
return `and created_at between toTimezone({startDate:DateTime64},{timezone:String}) and toTimezone({endDate:DateTime64},{timezone:String})`;
}
return `and created_at between {startDate:DateTime64} and {endDate:DateTime64}`;
} else {
if (timezone) {
return `and created_at >= toTimezone({startDate:DateTime64},{timezone:String})`;
}
return `and created_at >= {startDate:DateTime64}`;
}
}
return '';
}
2024-04-03 00:06:06 +00:00
function getFilterParams(filters: QueryFilters = {}) {
return filtersToArray(filters).reduce((obj, { name, value }) => {
if (name && value !== undefined) {
obj[name] = value;
}
2023-08-16 20:56:41 +00:00
return obj;
}, {});
}
2023-08-11 16:05:56 +00:00
async function parseFilters(websiteId: string, filters: QueryFilters = {}, options?: QueryOptions) {
2024-04-26 07:31:38 +00:00
const website = await fetchWebsite(websiteId);
2023-08-04 20:18:30 +00:00
2022-08-26 05:04:32 +00:00
return {
filterQuery: getFilterQuery(filters, options),
dateQuery: getDateQuery(filters),
2023-08-04 20:18:30 +00:00
params: {
2024-04-03 00:06:06 +00:00
...getFilterParams(filters),
2023-08-04 20:18:30 +00:00
websiteId,
startDate: maxDate(filters.startDate, new Date(website?.resetAt)),
2023-08-04 20:18:30 +00:00
websiteDomain: website.domain,
},
2022-08-26 05:04:32 +00:00
};
}
async function pagedQuery(
query: string,
queryParams: { [key: string]: any },
pageParams: PageParams = {},
) {
const { page = 1, pageSize, orderBy, sortDescending = false } = pageParams;
const size = +pageSize || DEFAULT_PAGE_SIZE;
const offset = +size * (page - 1);
const direction = sortDescending ? 'desc' : 'asc';
const statements = [
orderBy && `order by ${orderBy} ${direction}`,
+size > 0 && `limit ${+size} offset ${offset}`,
]
.filter(n => n)
.join('\n');
const count = await rawQuery(`select count(*) as num from (${query}) t`, queryParams).then(
res => res[0].num,
);
const data = await rawQuery(`${query}${statements}`, queryParams);
return { data, count, page: +page, pageSize: size, orderBy };
}
2024-06-20 04:41:45 +00:00
async function rawQuery<T = unknown>(
query: string,
params: Record<string, unknown> = {},
): Promise<T> {
2022-08-29 03:20:54 +00:00
if (process.env.LOG_QUERY) {
2023-03-30 16:44:04 +00:00
log('QUERY:\n', query);
log('PARAMETERS:\n', params);
2022-08-26 05:04:32 +00:00
}
2022-10-06 22:00:16 +00:00
await connect();
2023-09-29 18:00:06 +00:00
const resultSet = await clickhouse.query({
query: query,
query_params: params,
format: 'JSONEachRow',
});
2024-08-18 03:27:40 +00:00
return (await resultSet.json()) as T;
}
async function insert(table: string, values: any[]) {
await connect();
return clickhouse.insert({ table, values, format: 'JSONEachRow' });
2022-08-26 05:04:32 +00:00
}
2024-01-14 10:21:39 +00:00
async function findUnique(data: any[]) {
2022-08-26 05:04:32 +00:00
if (data.length > 1) {
throw `${data.length} records found when expecting 1.`;
}
return findFirst(data);
2022-08-26 05:04:32 +00:00
}
2022-08-26 05:20:30 +00:00
2024-01-14 10:21:39 +00:00
async function findFirst(data: any[]) {
2022-08-26 05:20:30 +00:00
return data[0] ?? null;
}
2022-08-26 05:43:22 +00:00
2022-10-06 22:00:16 +00:00
async function connect() {
2022-12-27 08:00:31 +00:00
if (enabled && !clickhouse) {
2022-10-06 22:00:16 +00:00
clickhouse = process.env.CLICKHOUSE_URL && (global[CLICKHOUSE] || getClient());
}
return clickhouse;
}
2022-08-28 04:38:35 +00:00
2022-08-26 05:43:22 +00:00
export default {
2022-10-06 22:00:16 +00:00
enabled,
2022-08-28 04:38:35 +00:00
client: clickhouse,
log,
2022-10-06 22:00:16 +00:00
connect,
getDateStringSQL,
getDateSQL,
2022-08-26 05:43:22 +00:00
getFilterQuery,
2024-08-17 02:44:16 +00:00
getUTCString,
2022-08-26 05:43:22 +00:00
parseFilters,
pagedQuery,
2022-08-26 05:43:22 +00:00
findUnique,
findFirst,
2022-08-29 03:20:54 +00:00
rawQuery,
insert,
2022-08-26 05:43:22 +00:00
};