diff --git a/javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected b/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected index 5222ecb5ad29..231252364953 100644 --- a/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected +++ b/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected @@ -1,29 +1,29 @@ multipleCallTargets -| test.rs:55:7:55:26 | ... .as_str() | -| test.rs:56:7:56:21 | ... .as_str() | -| test.rs:72:7:72:26 | ... .as_str() | -| test.rs:73:7:73:36 | ... .as_str() | -| test.rs:74:7:74:34 | ... .as_str() | -| test.rs:75:7:75:27 | ... .as_str() | -| test.rs:258:7:258:36 | ... .as_str() | -| test.rs:260:7:260:33 | ... .as_str() | -| test.rs:261:7:261:36 | ... .as_str() | -| test.rs:262:7:262:26 | ... .as_str() | -| test.rs:266:7:266:28 | ... .as_str() | -| test.rs:267:7:267:37 | ... .as_str() | -| test.rs:268:7:268:36 | ... .as_str() | -| test.rs:271:7:271:32 | ... .as_str() | -| test.rs:281:7:281:34 | ... .as_str() | -| test.rs:284:7:284:36 | ... .as_str() | -| test.rs:288:7:288:39 | ... .as_str() | -| test.rs:295:7:295:53 | ... .as_str() | -| test.rs:296:7:296:45 | ... .as_str() | -| test.rs:298:7:298:39 | ... .as_str() | -| test.rs:299:7:299:34 | ... .as_str() | -| test.rs:300:7:300:42 | ... .as_str() | -| test.rs:302:7:302:48 | ... .as_str() | -| test.rs:303:7:303:35 | ... .as_str() | -| test.rs:304:7:304:35 | ... .as_str() | -| test.rs:313:8:313:19 | num.as_str() | -| test.rs:324:8:324:19 | num.as_str() | -| test.rs:343:7:343:39 | ... .as_str() | +| test.rs:56:7:56:26 | ... .as_str() | +| test.rs:57:7:57:21 | ... .as_str() | +| test.rs:73:7:73:26 | ... .as_str() | +| test.rs:74:7:74:36 | ... .as_str() | +| test.rs:75:7:75:34 | ... .as_str() | +| test.rs:76:7:76:27 | ... .as_str() | +| test.rs:262:7:262:36 | ... .as_str() | +| test.rs:264:7:264:33 | ... .as_str() | +| test.rs:265:7:265:36 | ... .as_str() | +| test.rs:266:7:266:26 | ... .as_str() | +| test.rs:270:7:270:28 | ... .as_str() | +| test.rs:271:7:271:37 | ... .as_str() | +| test.rs:272:7:272:36 | ... .as_str() | +| test.rs:275:7:275:32 | ... .as_str() | +| test.rs:285:7:285:34 | ... .as_str() | +| test.rs:288:7:288:36 | ... .as_str() | +| test.rs:292:7:292:39 | ... .as_str() | +| test.rs:299:7:299:53 | ... .as_str() | +| test.rs:300:7:300:45 | ... .as_str() | +| test.rs:302:7:302:39 | ... .as_str() | +| test.rs:303:7:303:34 | ... .as_str() | +| test.rs:304:7:304:42 | ... .as_str() | +| test.rs:306:7:306:48 | ... .as_str() | +| test.rs:307:7:307:35 | ... .as_str() | +| test.rs:308:7:308:35 | ... .as_str() | +| test.rs:317:8:317:19 | num.as_str() | +| test.rs:328:8:328:19 | num.as_str() | +| test.rs:347:7:347:39 | ... .as_str() | diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index ad13fee59f15..0f4965ce2856 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -23,7 +23,7 @@ impl MyStruct { fn get_password() -> String { get_string() } fn test_passwords( - password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, + password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, password_confirmation: &str, pass_phrase: &str, passphrase: &str, passPhrase: &str, backup_code: &str, auth_key: &str, authkey: &str, authKey: &str, authentication_key: &str, authenticationkey: &str, authenticationKey: &str, oauth: &str, one_time_code: &str, @@ -37,6 +37,7 @@ fn test_passwords( sink(passwd); // $ sensitive=password sink(my_password); // $ sensitive=password sink(password_str); // $ sensitive=password + sink(password_confirmation); // $ sensitive=password sink(pass_phrase); // $ sensitive=password sink(passphrase); // $ sensitive=password sink(passPhrase); // $ sensitive=password @@ -48,12 +49,12 @@ fn test_passwords( sink(authentication_key); // $ sensitive=password sink(authenticationkey); // $ sensitive=password sink(authenticationKey); // $ sensitive=password - sink(oauth); // $ MISSING: sensitive=password + sink(oauth); // $ sensitive=password sink(one_time_code); // $ MISSING: sensitive=password sink(ms); // $ MISSING: sensitive=password sink(ms.password.as_str()); // $ sensitive=password - sink(ms.mfa.as_str()); // $ MISSING: sensitive=password + sink(ms.mfa.as_str()); // $ sensitive=password sink(get_password()); // $ sensitive=password let password2 = get_string(); @@ -67,10 +68,10 @@ fn test_passwords( sink(harmless); sink(encrypted_password); sink(password_hash); - sink(passwordFile); // $ SPURIOUS: sensitive=password + sink(passwordFile); sink(ms.harmless.as_str()); - sink(ms.password_file_path.as_str()); // $ SPURIOUS: sensitive=password + sink(ms.password_file_path.as_str()); sink(ms.password_enabled.as_str()); // $ SPURIOUS: sensitive=password sink(ms.numfailed.as_str()); @@ -127,11 +128,11 @@ fn test_credentials( sink(hashkey); sink(hash_key); - sink(sessionkeypath); // $ SPURIOUS: sensitive=id - sink(account_key_path); // $ SPURIOUS: sensitive=id + sink(sessionkeypath); + sink(account_key_path); - sink(ms.get_certificate_url()); // $ SPURIOUS: sensitive=certificate - sink(ms.get_certificate_file()); // $ SPURIOUS: sensitive=certificate + sink(ms.get_certificate_url()); + sink(ms.get_certificate_file()); sink(get_public_key()); sink(get_next_token()); @@ -160,16 +161,19 @@ impl DeviceInfo { fn test_device_info(&self, other: &DeviceInfo) { // private device info - sink(&self.api_key); // $ MISSING: sensitive=id - sink(&other.api_key); // $ MISSING: sensitive=id - sink(&self.deviceApiToken); // $ MISSING: sensitive=id - sink(&self.finger_print); // $ MISSING: sensitive=id - sink(&self.ip_address); // $ MISSING: sensitive=id - sink(self.macaddr12); // $ MISSING: sensitive=id - sink(&self.mac_addr); // $ MISSING: sensitive=id - sink(self.mac_addr.values); // $ MISSING: sensitive=id - sink(self.mac_addr.values[0]); // $ MISSING: sensitive=id - sink(&self.networkMacAddress); // $ MISSING: sensitive=id + sink(&self.api_key); // $ sensitive=password + sink(&other.api_key); // $ sensitive=password + sink(&self.deviceApiToken); // $ sensitive=password + sink(self.macaddr12); // $ sensitive=private + sink(&self.mac_addr); // $ sensitive=private + sink(self.mac_addr.values); // $ sensitive=private + sink(self.mac_addr.values[0]); // $ sensitive=private + sink(&self.networkMacAddress); // $ sensitive=private + + // dubious (may or may not be private device info, depending on context) + + sink(&self.finger_print); + sink(&self.ip_address); // not private device info @@ -267,26 +271,26 @@ fn test_private_info( sink(info.emergency_contact.as_str()); // $ sensitive=private sink(info.name_of_employer.as_str()); // $ sensitive=private - sink(&info.gender); // $ MISSING: sensitive=private - sink(info.genderString.as_str()); // $ MISSING: sensitive=private + sink(&info.gender); // $ sensitive=private + sink(info.genderString.as_str()); // $ sensitive=private let sex = "Male"; let gender = Gender::Female; let a = Gender::Female; - sink(sex); // $ MISSING: sensitive=private - sink(gender); // $ MISSING: sensitive=private + sink(sex); // $ sensitive=private + sink(gender); // $ sensitive=private sink(a); // $ MISSING: sensitive=private - sink(info.patient_id); // $ MISSING: sensitive=private - sink(info.linkedPatientId); // $ MISSING: sensitive=private - sink(info.patient_record.as_str()); // $ MISSING: sensitive=private - sink(info.patient_record.trim()); // $ MISSING: sensitive=private + sink(info.patient_id); // $ sensitive=private + sink(info.linkedPatientId); // $ sensitive=private + sink(info.patient_record.as_str()); // $ sensitive=private + sink(info.patient_record.trim()); // $ sensitive=private sink(&info.medical_notes); // $ sensitive=private sink(info.medical_notes[0].as_str()); // $ sensitive=private for n in info.medical_notes.iter() { sink(n.as_str()); // $ MISSING: sensitive=private } - sink(info.confidentialMessage.as_str()); // $ MISSING: sensitive=private - sink(info.confidentialMessage.to_lowercase()); // $ MISSING: sensitive=private + sink(info.confidentialMessage.as_str()); // $ sensitive=secret + sink(info.confidentialMessage.to_lowercase()); // $ sensitive=secret sink(info.latitude); // $ sensitive=private let x = info.longitude.unwrap(); @@ -296,12 +300,12 @@ fn test_private_info( sink(info.financials.credit_card_no.as_str()); // $ sensitive=private sink(info.financials.credit_rating); // $ sensitive=private sink(info.financials.user_ccn.as_str()); // $ sensitive=private - sink(info.financials.cvv.as_str()); // $ MISSING: sensitive=private - sink(info.financials.beneficiary.as_str()); // $ MISSING: sensitive=private - sink(info.financials.routing_number); // $ MISSING: sensitive=private - sink(info.financials.routingNumberText.as_str()); // $ MISSING: sensitive=private - sink(info.financials.iban.as_str()); // $ MISSING: sensitive=private - sink(info.financials.iBAN.as_str()); // $ MISSING: sensitive=private + sink(info.financials.cvv.as_str()); // $ sensitive=private + sink(info.financials.beneficiary.as_str()); // $ sensitive=private + sink(info.financials.routing_number); // $ sensitive=private + sink(info.financials.routingNumberText.as_str()); // $ sensitive=private + sink(info.financials.iban.as_str()); // $ sensitive=private + sink(info.financials.iBAN.as_str()); // $ sensitive=private sink(ContactDetails::HomePhoneNumber("123".to_string())); // $ sensitive=private sink(ContactDetails::MobileNumber("123".to_string())); // $ sensitive=private @@ -343,8 +347,8 @@ fn test_private_info( sink(info.financials.harmless.as_str()); sink(info.financials.num_accounts); // $ SPURIOUS: sensitive=id sink(info.financials.total_accounts); // $ SPURIOUS: sensitive=id - sink(info.financials.accounting); // $ SPURIOUS: sensitive=id - sink(info.financials.unaccounted); // $ SPURIOUS: sensitive=id + sink(info.financials.accounting); + sink(info.financials.unaccounted); sink(info.financials.multiband); sink(ContactDetails::FavouriteColor("blue".to_string())); @@ -362,5 +366,5 @@ impl MyArray { fn test_iterator() { let iter = std::iter::repeat(1).take(10); - sink(MyArray::from_trusted_iterator(iter)); // $ SPURIOUS: sensitive=secret + sink(MyArray::from_trusted_iterator(iter)); } diff --git a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll index c50d1341c778..4271784577f0 100644 --- a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll +++ b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll @@ -56,15 +56,16 @@ module HeuristicNames { * Gets a regular expression that identifies strings that may indicate the presence of secret * or trusted data. */ - string maybeSecret() { result = "(?is).*((?