Skip to content

Commit 87a92ba

Browse files
committed
fix(web): batch findMany queries fixes #360
1 parent fb2ef05 commit 87a92ba

File tree

11 files changed

+182
-36
lines changed

11 files changed

+182
-36
lines changed

docs/docs/configuration/environment-variables.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ The following environment variables allow you to configure your Sourcebot deploy
2020
| `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> |
2121
| `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> |
2222
| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url </p> |
23+
| `DB_QUERY_BATCH_SIZE` | `500` | <p>The batch size for database queries to prevent memory issues with large datasets. This is a workaround for Prisma issue [#13864](https://github.com/prisma/prisma/issues/13864). Can also be configured via the `dbQueryBatchSize` setting in the configuration file. Valid range: 100-10000.</p> |
2324
| `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
2425
| `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> |
2526
| `REDIS_URL` | `redis://localhost:6379` | <p>Connection string of your Redis instance. By default, a Redis database is automatically provisioned at startup within the container.</p> |

docs/snippets/schemas/v3/index.schema.mdx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@
6868
"type": "boolean",
6969
"description": "[Sourcebot EE] When enabled, allows unauthenticated users to access Sourcebot. Requires an enterprise license with an unlimited number of seats.",
7070
"default": false
71+
},
72+
"dbQueryBatchSize": {
73+
"type": "number",
74+
"description": "The batch size for database queries to prevent memory issues with large datasets. This is a workaround for Prisma issue #13864. Defaults to 500.",
75+
"minimum": 100,
76+
"maximum": 10000,
77+
"default": 500
7178
}
7279
},
7380
"additionalProperties": false
@@ -182,6 +189,13 @@
182189
"type": "boolean",
183190
"description": "[Sourcebot EE] When enabled, allows unauthenticated users to access Sourcebot. Requires an enterprise license with an unlimited number of seats.",
184191
"default": false
192+
},
193+
"dbQueryBatchSize": {
194+
"type": "number",
195+
"description": "The batch size for database queries to prevent memory issues with large datasets. This is a workaround for Prisma issue #13864. Defaults to 500.",
196+
"minimum": 100,
197+
"maximum": 10000,
198+
"default": 500
185199
}
186200
},
187201
"additionalProperties": false

packages/backend/src/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ export const DEFAULT_SETTINGS: Settings = {
1616
repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds
1717
repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours
1818
enablePublicAccess: false,
19+
dbQueryBatchSize: 500, // Default batch size for database queries
1920
}

packages/schemas/src/v3/index.schema.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ const schema = {
6767
"type": "boolean",
6868
"description": "[Sourcebot EE] When enabled, allows unauthenticated users to access Sourcebot. Requires an enterprise license with an unlimited number of seats.",
6969
"default": false
70+
},
71+
"dbQueryBatchSize": {
72+
"type": "number",
73+
"description": "The batch size for database queries to prevent memory issues with large datasets. This is a workaround for Prisma issue #13864. Defaults to 500.",
74+
"minimum": 100,
75+
"maximum": 10000,
76+
"default": 500
7077
}
7178
},
7279
"additionalProperties": false
@@ -181,6 +188,13 @@ const schema = {
181188
"type": "boolean",
182189
"description": "[Sourcebot EE] When enabled, allows unauthenticated users to access Sourcebot. Requires an enterprise license with an unlimited number of seats.",
183190
"default": false
191+
},
192+
"dbQueryBatchSize": {
193+
"type": "number",
194+
"description": "The batch size for database queries to prevent memory issues with large datasets. This is a workaround for Prisma issue #13864. Defaults to 500.",
195+
"minimum": 100,
196+
"maximum": 10000,
197+
"default": 500
184198
}
185199
},
186200
"additionalProperties": false

packages/schemas/src/v3/index.type.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ export interface Settings {
8383
* [Sourcebot EE] When enabled, allows unauthenticated users to access Sourcebot. Requires an enterprise license with an unlimited number of seats.
8484
*/
8585
enablePublicAccess?: boolean;
86+
/**
87+
* The batch size for database queries to prevent memory issues with large datasets. This is a workaround for Prisma issue #13864. Defaults to 500.
88+
*/
89+
dbQueryBatchSize?: number;
8690
}
8791
/**
8892
* Search context

packages/web/src/actions.ts

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -662,28 +662,46 @@ export const getConnectionInfo = async (connectionId: number, domain: string) =>
662662
export const getRepos = async (domain: string, filter: { status?: RepoIndexingStatus[], connectionId?: number } = {}) => sew(() =>
663663
withAuth((userId) =>
664664
withOrgMembership(userId, domain, async ({ org }) => {
665-
const repos = await prisma.repo.findMany({
666-
where: {
667-
orgId: org.id,
668-
...(filter.status ? {
669-
repoIndexingStatus: { in: filter.status }
670-
} : {}),
671-
...(filter.connectionId ? {
672-
connections: {
673-
some: {
674-
connectionId: filter.connectionId
675-
}
676-
}
677-
} : {}),
678-
},
679-
include: {
665+
// Use batched query to prevent memory issues with large datasets.
666+
// The batch size is configurable via DB_QUERY_BATCH_SIZE environment variable.
667+
const whereClause = {
668+
orgId: org.id,
669+
...(filter.status ? {
670+
repoIndexingStatus: { in: filter.status }
671+
} : {}),
672+
...(filter.connectionId ? {
680673
connections: {
681-
include: {
682-
connection: true,
674+
some: {
675+
connectionId: filter.connectionId
683676
}
684677
}
685-
}
686-
});
678+
} : {}),
679+
};
680+
681+
// First get the total count
682+
const totalCount = await prisma.repo.count({ where: whereClause });
683+
684+
const repos = [];
685+
const batchSize = env.DB_QUERY_BATCH_SIZE;
686+
const totalBatches = Math.ceil(totalCount / batchSize);
687+
688+
// Execute queries in batches
689+
for (let i = 0; i < totalBatches; i++) {
690+
const skip = i * batchSize;
691+
const batchResults = await prisma.repo.findMany({
692+
where: whereClause,
693+
include: {
694+
connections: {
695+
include: {
696+
connection: true,
697+
}
698+
}
699+
},
700+
skip,
701+
take: batchSize,
702+
});
703+
repos.push(...batchResults);
704+
}
687705

688706
return repos.map((repo) => repositoryQuerySchema.parse({
689707
codeHostType: repo.external_codeHostType,

packages/web/src/auth.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ export const { handlers, signIn, signOut, auth } = NextAuth({
141141
trustHost: true,
142142
events: {
143143
createUser: onCreateUser,
144-
signIn: async ({ user, account }) => {
144+
signIn: async ({ user, account: _account }) => {
145145
if (user.id) {
146146
await auditService.createAudit({
147147
action: "user.signed_in",

packages/web/src/env.mjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ export const env = createEnv({
1919
TOTAL_MAX_MATCH_COUNT: numberSchema.default(100000),
2020
ZOEKT_MAX_WALL_TIME_MS: numberSchema.default(10000),
2121

22+
// Database Query Performance
23+
DB_QUERY_BATCH_SIZE: numberSchema.default(500),
24+
2225
// Auth
2326
AUTH_SECRET: z.string(),
2427
AUTH_URL: z.string().url(),

packages/web/src/features/search/searchApi.ts

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import * as Sentry from "@sentry/nextjs";
1212
import { sew, withAuth, withOrgMembership } from "@/actions";
1313
import { base64Decode } from "@sourcebot/shared";
1414
import { getAuditService } from "@/ee/features/audit/factory";
15+
import { batchedFindReposByIds, batchedFindReposByNames } from "@/lib/repoBatchQueries";
1516

1617
const auditService = getAuditService();
1718

@@ -198,23 +199,19 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
198199
const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []);
199200
const repos = new Map<string | number, Repo>();
200201

201-
(await prisma.repo.findMany({
202-
where: {
203-
id: {
204-
in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"),
205-
},
206-
orgId: org.id,
207-
}
208-
})).forEach(repo => repos.set(repo.id, repo));
202+
// Batch query repos by ID to prevent memory issues with large datasets
203+
const numericIds = Array.from(repoIdentifiers).filter((id) => typeof id === "number") as number[];
204+
if (numericIds.length > 0) {
205+
const reposByIds = await batchedFindReposByIds(numericIds, org.id);
206+
reposByIds.forEach((repo) => repos.set(repo.id, repo));
207+
}
209208

210-
(await prisma.repo.findMany({
211-
where: {
212-
name: {
213-
in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"),
214-
},
215-
orgId: org.id,
216-
}
217-
})).forEach(repo => repos.set(repo.name, repo));
209+
// Batch query repos by name to prevent memory issues with large datasets
210+
const stringNames = Array.from(repoIdentifiers).filter((id) => typeof id === "string") as string[];
211+
if (stringNames.length > 0) {
212+
const reposByNames = await batchedFindReposByNames(stringNames, org.id);
213+
reposByNames.forEach((repo) => repos.set(repo.name, repo));
214+
}
218215

219216
const files = Result.Files?.map((file) => {
220217
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Utility functions for batched Repo queries to handle large datasets efficiently
3+
* and prevent memory issues like "Failed to convert rust String into napi string"
4+
*
5+
* This is a workaround for the Prisma issue: https://github.com/prisma/prisma/issues/13864
6+
*
7+
* The batch size can be configured via the DB_QUERY_BATCH_SIZE environment variable
8+
* or the dbQueryBatchSize setting in the configuration file.
9+
*/
10+
11+
import { Repo } from "@sourcebot/db";
12+
import { prisma } from "@/prisma";
13+
import { env } from "@/env.mjs";
14+
15+
const DEFAULT_BATCH_SIZE = env.DB_QUERY_BATCH_SIZE;
16+
17+
/**
18+
* Fetches repos by IDs in batches to prevent memory issues
19+
* @param ids - Array of repo IDs to fetch
20+
* @param orgId - Organization ID to filter by
21+
* @param batchSize - Size of each batch (default: 500)
22+
* @returns Array of repos
23+
*/
24+
export async function batchedFindReposByIds(
25+
ids: number[],
26+
orgId: number,
27+
batchSize: number = DEFAULT_BATCH_SIZE
28+
): Promise<Repo[]> {
29+
if (ids.length === 0) {
30+
return [];
31+
}
32+
33+
const results: Repo[] = [];
34+
const totalBatches = Math.ceil(ids.length / batchSize);
35+
36+
for (let i = 0; i < totalBatches; i++) {
37+
const startIndex = i * batchSize;
38+
const endIndex = Math.min(startIndex + batchSize, ids.length);
39+
const batchIds = ids.slice(startIndex, endIndex);
40+
41+
const batchResults = await prisma.repo.findMany({
42+
where: {
43+
id: { in: batchIds },
44+
orgId,
45+
}
46+
});
47+
results.push(...batchResults);
48+
}
49+
50+
return results;
51+
}
52+
53+
/**
54+
* Fetches repos by names in batches to prevent memory issues
55+
* @param names - Array of repo names to fetch
56+
* @param orgId - Organization ID to filter by
57+
* @param batchSize - Size of each batch (default: 500)
58+
* @returns Array of repos
59+
*/
60+
export async function batchedFindReposByNames(
61+
names: string[],
62+
orgId: number,
63+
batchSize: number = DEFAULT_BATCH_SIZE
64+
): Promise<Repo[]> {
65+
if (names.length === 0) {
66+
return [];
67+
}
68+
69+
const results: Repo[] = [];
70+
const totalBatches = Math.ceil(names.length / batchSize);
71+
72+
for (let i = 0; i < totalBatches; i++) {
73+
const startIndex = i * batchSize;
74+
const endIndex = Math.min(startIndex + batchSize, names.length);
75+
const batchNames = names.slice(startIndex, endIndex);
76+
77+
const batchResults = await prisma.repo.findMany({
78+
where: {
79+
name: { in: batchNames },
80+
orgId,
81+
}
82+
});
83+
results.push(...batchResults);
84+
}
85+
86+
return results;
87+
}

0 commit comments

Comments
 (0)