umami/src/queries/analytics/reports/getJourney.ts

220 lines
5.2 KiB
TypeScript
Raw Normal View History

2024-05-17 08:42:36 +00:00
import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
2024-06-01 18:45:06 +00:00
interface JourneyResult {
e1: string;
e2: string;
e3: string;
e4: string;
e5: string;
2024-06-04 06:40:38 +00:00
e6: string;
e7: string;
2024-06-01 18:45:06 +00:00
count: string;
}
2024-05-17 08:42:36 +00:00
export async function getJourney(
...args: [
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
2024-06-04 06:40:38 +00:00
steps: number;
startStep?: string;
endStep?: string;
2024-05-17 08:42:36 +00:00
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
},
2024-06-01 18:45:06 +00:00
): Promise<JourneyResult[]> {
2024-05-17 08:42:36 +00:00
const { startDate, endDate } = filters;
const { rawQuery } = prisma;
return rawQuery(
`
WITH events AS (
select distinct
session_id,
referrer_path,
COALESCE(event_name, url_path) event,
ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY created_at) AS event_number
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and referrer_path != url_path),
sequences as (
2024-06-04 06:40:38 +00:00
select s.e1,
2024-05-17 08:42:36 +00:00
s.e2,
s.e3,
s.e4,
s.e5,
count(*) count
FROM (
2024-06-04 06:40:38 +00:00
select session_id,
2024-05-17 08:42:36 +00:00
MAX(CASE WHEN event_number = 1 THEN event ELSE NULL END) AS e1,
MAX(CASE WHEN event_number = 2 THEN event ELSE NULL END) AS e2,
MAX(CASE WHEN event_number = 3 THEN event ELSE NULL END) AS e3,
MAX(CASE WHEN event_number = 4 THEN event ELSE NULL END) AS e4,
MAX(CASE WHEN event_number = 5 THEN event ELSE NULL END) AS e5
FROM events
group by session_id) s
group by s.e1,
s.e2,
s.e3,
s.e4,
s.e5)
select *
from sequences
order by count desc
limit 100
`,
{
websiteId,
startDate,
endDate,
},
2024-06-01 18:45:06 +00:00
).then(parseResult);
2024-05-17 08:42:36 +00:00
}
async function clickhouseQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
2024-06-04 06:40:38 +00:00
steps: number;
startStep?: string;
endStep?: string;
2024-05-17 08:42:36 +00:00
},
2024-06-01 18:45:06 +00:00
): Promise<JourneyResult[]> {
2024-06-04 06:40:38 +00:00
const { startDate, endDate, steps, startStep, endStep } = filters;
2024-05-17 08:42:36 +00:00
const { rawQuery } = clickhouse;
2024-06-04 06:40:38 +00:00
const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery(
steps,
startStep,
endStep,
);
function getJourneyQuery(
steps: number,
startStep?: string,
endStep?: string,
): {
sequenceQuery: string;
startStepQuery: string;
endStepQuery: string;
params: { [key: string]: string };
} {
const params = {};
let sequenceQuery = '';
let startStepQuery = '';
let endStepQuery = '';
// create sequence query
let selectQuery = '';
let maxQuery = '';
let groupByQuery = '';
for (let i = 1; i <= steps; i++) {
const endQuery = i < steps ? ',' : '';
selectQuery += `s.e${i},`;
maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN event ELSE NULL END) AS e${i}${endQuery}`;
groupByQuery += `s.e${i}${endQuery} `;
}
sequenceQuery = `\nsequences as (
select ${selectQuery}
count(*) count
FROM (
select visit_id,
${maxQuery}
FROM events
group by visit_id) s
group by ${groupByQuery})
`;
// create start Step params query
if (startStep) {
startStepQuery = `and e1 = {startStep:String}`;
params['startStep'] = startStep;
}
// create end Step params query
if (endStep) {
for (let i = 1; i < steps; i++) {
const startQuery = i === 1 ? 'and (' : '\nor ';
endStepQuery += `${startQuery}(e${i} = {endStep:String} and e${i + 1} is null) `;
}
endStepQuery += `\nor (e${steps} = {endStep:String}))`;
params['endStep'] = endStep;
}
return {
sequenceQuery,
startStepQuery,
endStepQuery,
params,
};
}
2024-05-17 08:42:36 +00:00
return rawQuery(
`
WITH events AS (
select distinct
2024-06-04 06:40:38 +00:00
visit_id,
2024-05-17 08:42:36 +00:00
referrer_path,
coalesce(nullIf(event_name, ''), url_path) event,
2024-06-04 06:40:38 +00:00
row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number
from umami.website_event
2024-05-17 08:42:36 +00:00
where website_id = {websiteId:UUID}
2024-06-04 06:40:38 +00:00
and created_at between {startDate:DateTime64} and {endDate:DateTime64}),
${sequenceQuery}
2024-05-17 08:42:36 +00:00
select *
from sequences
2024-06-04 06:40:38 +00:00
where 1 = 1
${startStepQuery}
${endStepQuery}
2024-05-17 08:42:36 +00:00
order by count desc
limit 100
`,
{
websiteId,
startDate,
endDate,
2024-06-04 06:40:38 +00:00
...params,
2024-05-17 08:42:36 +00:00
},
2024-06-01 18:45:06 +00:00
).then(parseResult);
}
2024-06-04 06:40:38 +00:00
function combineSequentialDuplicates(array: any) {
if (array.length === 0) return array;
const result = [array[0]];
for (let i = 1; i < array.length; i++) {
if (array[i] !== array[i - 1]) {
result.push(array[i]);
}
}
return result;
}
2024-06-01 18:45:06 +00:00
function parseResult(data: any) {
2024-06-04 06:40:38 +00:00
return data.map(({ e1, e2, e3, e4, e5, e6, e7, count }) => ({
items: combineSequentialDuplicates([e1, e2, e3, e4, e5, e6, e7]),
count,
}));
2024-05-17 08:42:36 +00:00
}