From 748da1aa5fd670495fbc5a6c656b7f505691113d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Wed, 4 Jun 2025 18:00:17 +0200 Subject: [PATCH 1/3] Remove lifetime from `CommandParseError` It would not be compatible with Markdown parsing --- src/bors/command/parser.rs | 98 ++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/src/bors/command/parser.rs b/src/bors/command/parser.rs index 6d7788a7..9004c50e 100644 --- a/src/bors/command/parser.rs +++ b/src/bors/command/parser.rs @@ -10,12 +10,12 @@ use crate::github::CommitSha; use super::{Priority, RollupMode}; #[derive(Debug, PartialEq)] -pub enum CommandParseError<'a> { +pub enum CommandParseError { MissingCommand, - UnknownCommand(&'a str), - MissingArgValue { arg: &'a str }, - UnknownArg(&'a str), - DuplicateArg(&'a str), + UnknownCommand(String), + MissingArgValue { arg: String }, + UnknownArg(String), + DuplicateArg(String), ValidationError(String), } @@ -64,7 +64,7 @@ impl CommandParser { type ParseResult<'a, T = BorsCommand> = Option>>; // The order of the parsers in the vector is important -const PARSERS: &[for<'b> fn(&CommandPart<'b>, &[CommandPart<'b>]) -> ParseResult<'b>] = &[ +const PARSERS: &[fn(&CommandPart<'_>, &[CommandPart<'_>]) -> ParseResult] = &[ parser_approval, parser_unapprove, parser_rollup, @@ -93,7 +93,7 @@ fn parse_command(input: &str) -> ParseResult { CommandPart::Bare(c) => c, CommandPart::KeyValue { key, .. } => key, }; - Some(Err(CommandParseError::UnknownCommand(unknown))) + Some(Err(CommandParseError::UnknownCommand(unknown.to_string()))) } }, Err(error) => Some(Err(error)), @@ -113,10 +113,12 @@ fn parse_parts(input: &str) -> Result, CommandParseError> { match item.split_once('=') { Some((key, value)) => { if value.is_empty() { - return Err(CommandParseError::MissingArgValue { arg: key }); + return Err(CommandParseError::MissingArgValue { + arg: key.to_string(), + }); } if seen_keys.contains(key) { - return Err(CommandParseError::DuplicateArg(key)); + return Err(CommandParseError::DuplicateArg(key.to_string())); } seen_keys.insert(key); parts.push(CommandPart::KeyValue { key, value }); @@ -130,12 +132,14 @@ fn parse_parts(input: &str) -> Result, CommandParseError> { /// Parses: /// - "@bors r+ [p=] [rollup=]" /// - "@bors r= [p=] [rollup=]" -fn parser_approval<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_approval(command: &CommandPart<'_>, parts: &[CommandPart<'_>]) -> ParseResult { let approver = match command { CommandPart::Bare("r+") => Approver::Myself, CommandPart::KeyValue { key: "r", value } => { if value.is_empty() { - return Some(Err(CommandParseError::MissingArgValue { arg: "r" })); + return Some(Err(CommandParseError::MissingArgValue { + arg: "r".to_string(), + })); } Approver::Specified(value.to_string()) } @@ -160,7 +164,7 @@ fn parser_approval<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> } /// Parses "@bors r-" -fn parser_unapprove<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_unapprove(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { if let CommandPart::Bare("r-") = command { Some(Ok(BorsCommand::Unapprove)) } else { @@ -169,7 +173,7 @@ fn parser_unapprove<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) - } /// Parses "@bors help". -fn parser_help<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_help(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { if let CommandPart::Bare("help") = command { Some(Ok(BorsCommand::Help)) } else { @@ -178,7 +182,7 @@ fn parser_help<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> Par } /// Parses "@bors ping". -fn parser_ping<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_ping(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { if let CommandPart::Bare("ping") = command { Some(Ok(BorsCommand::Ping)) } else { @@ -194,7 +198,7 @@ fn parse_sha(input: &str) -> Result { } /// Parses "@bors try ". -fn parser_try<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_try(command: &CommandPart<'_>, parts: &[CommandPart<'_>]) -> ParseResult { if *command != CommandPart::Bare("try") { return None; } @@ -205,7 +209,7 @@ fn parser_try<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> Parse for part in parts { match part { CommandPart::Bare(key) => { - return Some(Err(CommandParseError::UnknownArg(key))); + return Some(Err(CommandParseError::UnknownArg(key.to_string()))); } CommandPart::KeyValue { key, value } => match (*key, *value) { ("parent", "last") => parent = Some(Parent::Last), @@ -234,7 +238,7 @@ fn parser_try<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> Parse jobs = raw_jobs; } _ => { - return Some(Err(CommandParseError::UnknownArg(key))); + return Some(Err(CommandParseError::UnknownArg(key.to_string()))); } }, } @@ -243,7 +247,7 @@ fn parser_try<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> Parse } /// Parses "@bors try cancel". -fn parser_try_cancel<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_try_cancel(command: &CommandPart<'_>, parts: &[CommandPart<'_>]) -> ParseResult { match (command, parts) { (CommandPart::Bare("try"), [CommandPart::Bare("cancel"), ..]) => { Some(Ok(BorsCommand::TryCancel)) @@ -253,7 +257,7 @@ fn parser_try_cancel<'a>(command: &CommandPart<'a>, parts: &[CommandPart<'a>]) - } /// Parses `@bors delegate=` or `@bors delegate+`. -fn parser_delegate<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_delegate(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { match command { CommandPart::Bare("delegate+") => { Some(Ok(BorsCommand::SetDelegate(DelegatedPermission::Review))) @@ -270,7 +274,7 @@ fn parser_delegate<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> } /// Parses "@bors delegate-" -fn parser_undelegate<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_undelegate(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { if let CommandPart::Bare("delegate-") = command { Some(Ok(BorsCommand::Undelegate)) } else { @@ -288,7 +292,7 @@ fn parse_priority_value(value: &str) -> Result { } /// Parses the first occurrence of `p|priority=` in `parts`. -fn parse_priority<'a>(parts: &[CommandPart<'a>]) -> ParseResult<'a, Priority> { +fn parse_priority(parts: &[CommandPart<'_>]) -> ParseResult { parts .iter() .filter_map(|part| match part { @@ -302,12 +306,12 @@ fn parse_priority<'a>(parts: &[CommandPart<'a>]) -> ParseResult<'a, Priority> { } /// Parses "@bors p=" -fn parser_priority<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_priority(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { parse_priority(std::slice::from_ref(command)).map(|res| res.map(BorsCommand::SetPriority)) } /// Parses the first occurrence of `rollup=` in `parts`. -fn parse_rollup<'a>(parts: &[CommandPart<'a>]) -> ParseResult<'a, RollupMode> { +fn parse_rollup(parts: &[CommandPart<'_>]) -> ParseResult { parts .iter() .filter_map(|part| match part { @@ -326,12 +330,12 @@ fn parse_rollup<'a>(parts: &[CommandPart<'a>]) -> ParseResult<'a, RollupMode> { } /// Parses "rollup=" -fn parser_rollup<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_rollup(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { parse_rollup(std::slice::from_ref(command)).map(|res| res.map(BorsCommand::SetRollupMode)) } /// Parses "@bors info" -fn parser_info<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_info(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { if *command == CommandPart::Bare("info") { Some(Ok(BorsCommand::Info)) } else { @@ -340,7 +344,7 @@ fn parser_info<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> Par } /// Parses `@bors treeclosed-`, `@bors treeopen` and `@bors treeclosed=` -fn parser_tree_ops<'a>(command: &CommandPart<'a>, _parts: &[CommandPart<'a>]) -> ParseResult<'a> { +fn parser_tree_ops(command: &CommandPart<'_>, _parts: &[CommandPart<'_>]) -> ParseResult { match command { CommandPart::Bare("treeclosed-") | CommandPart::Bare("treeopen") => { Some(Ok(BorsCommand::OpenTree)) @@ -383,27 +387,32 @@ mod tests { fn unknown_command() { let cmds = parse_commands("@bors foo"); assert_eq!(cmds.len(), 1); - assert!(matches!( + assert_eq!( cmds[0], - Err(CommandParseError::UnknownCommand("foo")) - )); + Err(CommandParseError::UnknownCommand("foo".to_string())) + ); } #[test] fn parse_arg_no_value() { let cmds = parse_commands("@bors ping a="); assert_eq!(cmds.len(), 1); - assert!(matches!( + assert_eq!( cmds[0], - Err(CommandParseError::MissingArgValue { arg: "a" }) - )); + Err(CommandParseError::MissingArgValue { + arg: "a".to_string() + }) + ); } #[test] fn parse_duplicate_key() { let cmds = parse_commands("@bors ping a=b a=c"); assert_eq!(cmds.len(), 1); - assert!(matches!(cmds[0], Err(CommandParseError::DuplicateArg("a")))); + assert_eq!( + cmds[0], + Err(CommandParseError::DuplicateArg("a".to_string())) + ); } #[test] @@ -632,7 +641,10 @@ mod tests { fn parse_duplicate_priority() { let cmds = parse_commands("@bors p=1 p=2"); assert_eq!(cmds.len(), 1); - assert!(matches!(cmds[0], Err(CommandParseError::DuplicateArg("p")))); + assert_eq!( + cmds[0], + Err(CommandParseError::DuplicateArg("p".to_string())) + ); } #[test] @@ -987,14 +999,14 @@ line two fn parse_try_unknown_arg() { let cmds = parse_commands("@bors try a"); assert_eq!(cmds.len(), 1); - assert!(matches!(cmds[0], Err(CommandParseError::UnknownArg("a")))); + assert_eq!(cmds[0], Err(CommandParseError::UnknownArg("a".to_string()))); } #[test] fn parse_try_unknown_kv_arg() { let cmds = parse_commands("@bors try a=b"); assert_eq!(cmds.len(), 1); - assert!(matches!(cmds[0], Err(CommandParseError::UnknownArg("a")))); + assert_eq!(cmds[0], Err(CommandParseError::UnknownArg("a".to_string()))); } #[test] @@ -1117,10 +1129,12 @@ line two fn parse_tree_closed_empty() { let cmds = parse_commands("@bors treeclosed="); assert_eq!(cmds.len(), 1); - assert!(matches!( + assert_eq!( cmds[0], - Err(CommandParseError::MissingArgValue { arg: "treeclosed" }) - )); + Err(CommandParseError::MissingArgValue { + arg: "treeclosed".to_string() + }) + ); } #[test] @@ -1141,10 +1155,10 @@ line two fn parse_tree_closed_unknown_command() { let cmds = parse_commands("@bors tree closed 5"); assert_eq!(cmds.len(), 1); - assert!(matches!( + assert_eq!( cmds[0], - Err(CommandParseError::UnknownCommand("tree")) - )); + Err(CommandParseError::UnknownCommand("tree".to_string())) + ); } fn parse_commands(text: &str) -> Vec> { From 060d30eeb8cef3deabb8ba5c6f4afbbba0bc91b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Wed, 4 Jun 2025 18:14:36 +0200 Subject: [PATCH 2/3] Ignore bors commands in Markdown wrapping elements --- Cargo.lock | 41 +++++++++++++++ Cargo.toml | 1 + src/bors/command/parser.rs | 103 ++++++++++++++++++++++++++++++++----- 3 files changed, 132 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa64c87b..b496c4e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,6 +322,7 @@ dependencies = [ "jsonwebtoken", "octocrab", "parking_lot", + "pulldown-cmark", "regex", "reqwest", "secrecy", @@ -787,6 +788,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -1669,6 +1679,25 @@ dependencies = [ "cc", ] +[[package]] +name = "pulldown-cmark" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +dependencies = [ + "bitflags", + "getopts", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + [[package]] name = "quinn" version = "0.11.8" @@ -2893,6 +2922,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "unicase" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" + [[package]] name = "unicode-bidi" version = "0.3.18" @@ -2920,6 +2955,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 1671d1b6..b8ef9071 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ chrono = "0.4" itertools = "0.14" # Text processing +pulldown-cmark = "0.13" regex = "1" [dev-dependencies] diff --git a/src/bors/command/parser.rs b/src/bors/command/parser.rs index 9004c50e..db2064ba 100644 --- a/src/bors/command/parser.rs +++ b/src/bors/command/parser.rs @@ -1,11 +1,11 @@ //! Defines parsers for bors commands. -use std::collections::HashSet; -use std::str::FromStr; - use crate::bors::command::{Approver, BorsCommand, Parent}; use crate::database::DelegatedPermission; use crate::github::CommitSha; +use pulldown_cmark::{CowStr, Event, Parser, Tag, TagEnd, TextMergeStream}; +use std::collections::HashSet; +use std::str::FromStr; use super::{Priority, RollupMode}; @@ -45,11 +45,11 @@ impl CommandParser { /// /// Assumes that each command spands at most one line and that there are not more commands on /// each line. - pub fn parse_commands<'a>( - &self, - text: &'a str, - ) -> Vec>> { - text.lines() + pub fn parse_commands(&self, text: &str) -> Vec> { + let segments = extract_text_segments(text); + segments + .iter() + .flat_map(|segment| segment.lines()) .filter_map(|line| match line.find(&self.prefix) { Some(index) => { let input = &line[index + self.prefix.len()..]; @@ -61,7 +61,54 @@ impl CommandParser { } } -type ParseResult<'a, T = BorsCommand> = Option>>; +/// Extract text segments from a Markdown `text`. +fn extract_text_segments(text: &str) -> Vec { + let md_parser = TextMergeStream::new(Parser::new(text)); + let mut stack = vec![]; + let mut segments = vec![]; + + for event in md_parser.into_iter() { + match event { + Event::Text(text) => { + // Only consider commands in raw text outside of wrapping elements + if stack.is_empty() { + segments.push(text); + } + } + Event::Start(tag) => match tag { + // Ignore content in the following wrapping elements + Tag::BlockQuote(_) + | Tag::CodeBlock(_) + | Tag::HtmlBlock + | Tag::Link { .. } + | Tag::Heading { .. } + | Tag::Image { .. } => { + stack.push(tag); + } + _ => {} + }, + Event::End(tag) => { + if let Some(start_tag) = stack.last() { + match (start_tag, tag) { + (Tag::BlockQuote(_), TagEnd::BlockQuote(_)) + | (Tag::CodeBlock(_), TagEnd::CodeBlock) + | (Tag::HtmlBlock, TagEnd::HtmlBlock) + | (Tag::Link { .. }, TagEnd::Link) + | (Tag::Heading { .. }, TagEnd::Heading(_)) + | (Tag::Image { .. }, TagEnd::Image) => { + stack.pop(); + } + _ => {} + } + } + } + _ => {} + } + } + segments +} + +type ParseResult = Option>; // The order of the parsers in the vector is important const PARSERS: &[fn(&CommandPart<'_>, &[CommandPart<'_>]) -> ParseResult] = &[ @@ -495,8 +542,8 @@ mod tests { fn parse_multiple_priority_commands() { let cmds = parse_commands( r#" - @bors r+ p=1 - @bors r=user2 p=2 +@bors r+ p=1 +@bors r=user2 p=2 "#, ); assert_eq!(cmds.len(), 2); @@ -714,8 +761,8 @@ mod tests { fn parse_multiple_rollup_commands() { let cmds = parse_commands( r#" - @bors r+ rollup - @bors r=user2 rollup=iffy +@bors r+ rollup +@bors r=user2 rollup=iffy "#, ); assert_eq!(cmds.len(), 2); @@ -1161,6 +1208,36 @@ line two ); } + #[test] + fn ignore_markdown_codeblock() { + let cmds = parse_commands( + r#" +```rust +@bors try +``` +"#, + ); + assert_eq!(cmds.len(), 0); + } + + #[test] + fn ignore_markdown_backtikcs() { + let cmds = parse_commands("Don't do `@bors try`!"); + assert_eq!(cmds.len(), 0); + } + + #[test] + fn ignore_markdown_link() { + let cmds = parse_commands("Ignore [me](@bors-try)."); + assert_eq!(cmds.len(), 0); + } + + #[test] + fn ignore_markdown_html() { + let cmds = parse_commands("
@bors try
"); + assert_eq!(cmds.len(), 0); + } + fn parse_commands(text: &str) -> Vec> { CommandParser::new("@bors".to_string()).parse_commands(text) } From c6e96d4c0c3d74d0769f032b88f8d64bc2dd4ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Mon, 9 Jun 2025 09:35:38 +0200 Subject: [PATCH 3/3] Parse commands from HTML blocks --- src/bors/command/parser.rs | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/bors/command/parser.rs b/src/bors/command/parser.rs index db2064ba..8c352573 100644 --- a/src/bors/command/parser.rs +++ b/src/bors/command/parser.rs @@ -69,7 +69,7 @@ fn extract_text_segments(text: &str) -> Vec { for event in md_parser.into_iter() { match event { - Event::Text(text) => { + Event::Text(text) | Event::Html(text) => { // Only consider commands in raw text outside of wrapping elements if stack.is_empty() { segments.push(text); @@ -79,7 +79,6 @@ fn extract_text_segments(text: &str) -> Vec { // Ignore content in the following wrapping elements Tag::BlockQuote(_) | Tag::CodeBlock(_) - | Tag::HtmlBlock | Tag::Link { .. } | Tag::Heading { .. } | Tag::Image { .. } => { @@ -92,7 +91,6 @@ fn extract_text_segments(text: &str) -> Vec { match (start_tag, tag) { (Tag::BlockQuote(_), TagEnd::BlockQuote(_)) | (Tag::CodeBlock(_), TagEnd::CodeBlock) - | (Tag::HtmlBlock, TagEnd::HtmlBlock) | (Tag::Link { .. }, TagEnd::Link) | (Tag::Heading { .. }, TagEnd::Heading(_)) | (Tag::Image { .. }, TagEnd::Image) => { @@ -1208,6 +1206,26 @@ line two ); } + #[test] + fn parse_in_html_command() { + let cmds = parse_commands( + r#" + +"#, + ); + assert_eq!(cmds.len(), 1); + assert_eq!( + cmds[0], + Ok(BorsCommand::Try { + parent: None, + jobs: vec![] + }) + ); + } + #[test] fn ignore_markdown_codeblock() { let cmds = parse_commands( @@ -1232,12 +1250,6 @@ line two assert_eq!(cmds.len(), 0); } - #[test] - fn ignore_markdown_html() { - let cmds = parse_commands("
@bors try
"); - assert_eq!(cmds.len(), 0); - } - fn parse_commands(text: &str) -> Vec> { CommandParser::new("@bors".to_string()).parse_commands(text) }