Skip to content

Commit 5988089

Browse files
authored
Merge pull request #8743 from Turbo87/dump-db-entry-order
dump_db: Sort non-data paths before appending to the tarball
2 parents 33c563f + e2032d9 commit 5988089

File tree

2 files changed

+86
-5
lines changed

2 files changed

+86
-5
lines changed

src/tests/dump_db.rs

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,88 @@
1-
use crates_io::worker::jobs::dump_db;
1+
use crate::builders::CrateBuilder;
2+
use crate::util::TestApp;
3+
use bytes::Buf;
4+
use crates_io::worker::jobs::{dump_db, DumpDb};
25
use crates_io_test_db::TestDatabase;
3-
use insta::assert_snapshot;
6+
use crates_io_worker::BackgroundJob;
7+
use flate2::read::GzDecoder;
8+
use insta::{assert_debug_snapshot, assert_snapshot};
49
use once_cell::sync::Lazy;
10+
use regex::Regex;
11+
use secrecy::ExposeSecret;
12+
use std::io::Read;
513
use std::sync::Mutex;
14+
use tar::Archive;
615

716
/// Mutex to ensure that only one test is dumping the database at a time, since
817
/// the dump directory is shared between all invocations of the background job.
918
static DUMP_DIR_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
1019

20+
static PATH_DATE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{4}-\d{2}-\d{2}-\d{6}").unwrap());
21+
22+
#[tokio::test(flavor = "multi_thread")]
23+
async fn test_dump_db_job() {
24+
let _guard = DUMP_DIR_MUTEX.lock();
25+
26+
let (app, _, _, token) = TestApp::full().with_token();
27+
28+
app.db(|conn| {
29+
CrateBuilder::new("test-crate", token.as_model().user_id).expect_build(conn);
30+
31+
let database_url = app.as_inner().config.db.primary.url.expose_secret();
32+
DumpDb::new(database_url).enqueue(conn).unwrap();
33+
});
34+
35+
app.run_pending_background_jobs().await;
36+
37+
let stored_files = app.stored_files().await;
38+
assert_eq!(stored_files.len(), 1);
39+
assert_eq!(stored_files[0], "db-dump.tar.gz");
40+
41+
let path = object_store::path::Path::parse("db-dump.tar.gz").unwrap();
42+
let result = app.as_inner().storage.as_inner().get(&path).await.unwrap();
43+
let bytes = result.bytes().await.unwrap();
44+
45+
let gz = GzDecoder::new(bytes.reader());
46+
let mut tar = Archive::new(gz);
47+
48+
let paths = tar_paths(&mut tar);
49+
assert_debug_snapshot!(paths, @r###"
50+
[
51+
"YYYY-MM-DD-HHMMSS",
52+
"YYYY-MM-DD-HHMMSS/README.md",
53+
"YYYY-MM-DD-HHMMSS/export.sql",
54+
"YYYY-MM-DD-HHMMSS/import.sql",
55+
"YYYY-MM-DD-HHMMSS/metadata.json",
56+
"YYYY-MM-DD-HHMMSS/schema.sql",
57+
"YYYY-MM-DD-HHMMSS/data",
58+
"YYYY-MM-DD-HHMMSS/data/categories.csv",
59+
"YYYY-MM-DD-HHMMSS/data/crate_downloads.csv",
60+
"YYYY-MM-DD-HHMMSS/data/crates.csv",
61+
"YYYY-MM-DD-HHMMSS/data/keywords.csv",
62+
"YYYY-MM-DD-HHMMSS/data/metadata.csv",
63+
"YYYY-MM-DD-HHMMSS/data/reserved_crate_names.csv",
64+
"YYYY-MM-DD-HHMMSS/data/teams.csv",
65+
"YYYY-MM-DD-HHMMSS/data/users.csv",
66+
"YYYY-MM-DD-HHMMSS/data/crates_categories.csv",
67+
"YYYY-MM-DD-HHMMSS/data/crates_keywords.csv",
68+
"YYYY-MM-DD-HHMMSS/data/crate_owners.csv",
69+
"YYYY-MM-DD-HHMMSS/data/versions.csv",
70+
"YYYY-MM-DD-HHMMSS/data/default_versions.csv",
71+
"YYYY-MM-DD-HHMMSS/data/dependencies.csv",
72+
"YYYY-MM-DD-HHMMSS/data/version_downloads.csv",
73+
]
74+
"###);
75+
}
76+
77+
fn tar_paths<R: Read>(archive: &mut Archive<R>) -> Vec<String> {
78+
archive
79+
.entries()
80+
.unwrap()
81+
.map(|entry| entry.unwrap().path().unwrap().display().to_string())
82+
.map(|path| PATH_DATE_RE.replace(&path, "YYYY-MM-DD-HHMMSS").to_string())
83+
.collect()
84+
}
85+
1186
#[test]
1287
fn dump_db_and_reimport_dump() {
1388
let _guard = DUMP_DIR_MUTEX.lock();

src/worker/jobs/dump_db.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,15 +237,21 @@ impl DumpTarball {
237237
archive.append_dir(&tar_top_dir, export_dir)?;
238238

239239
// Append readme, metadata, schemas.
240+
let mut paths = Vec::new();
240241
for entry in fs::read_dir(export_dir)? {
241242
let entry = entry?;
242243
let file_type = entry.file_type()?;
243244
if file_type.is_file() {
244-
let name_in_tar = tar_top_dir.join(entry.file_name());
245-
debug!(name = ?name_in_tar, "Appending file to tarball");
246-
archive.append_path_with_name(entry.path(), name_in_tar)?;
245+
paths.push((entry.path(), entry.file_name()));
247246
}
248247
}
248+
// Sort paths to make the tarball deterministic.
249+
paths.sort();
250+
for (path, file_name) in paths {
251+
let name_in_tar = tar_top_dir.join(file_name);
252+
debug!(name = ?name_in_tar, "Appending file to tarball");
253+
archive.append_path_with_name(path, name_in_tar)?;
254+
}
249255

250256
// Append topologically sorted tables to make it possible to pipeline
251257
// importing with gz extraction.

0 commit comments

Comments
 (0)