diff --git a/src/llm-coding-tools-core/benches/tools_read.rs b/src/llm-coding-tools-core/benches/tools_read.rs index c0d682a..4f2aba5 100644 --- a/src/llm-coding-tools-core/benches/tools_read.rs +++ b/src/llm-coding-tools-core/benches/tools_read.rs @@ -5,7 +5,7 @@ //! //! # Benchmark Groups //! -//! - `read_file/with_line_numbers`: Read with `L{n}: ` prefix on each line +//! - `read_file/with_line_numbers`: Read with `{n}: ` prefix on each line //! - `read_file/without_line_numbers`: Read raw content without prefixes //! //! # Test Cases (per mode) diff --git a/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs b/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs index 2ed526c..5e8fab6 100644 --- a/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs +++ b/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs @@ -18,7 +18,7 @@ mod tool_sections; pub(crate) const COMMON_RULES_HEADER: &str = "## Common Rules\n"; /// Largest common-rules section length, including [`COMMON_RULES_HEADER`]. -pub(crate) const COMMON_RULES_SECTION_MAX_SIZE: usize = 494; +pub(crate) const COMMON_RULES_SECTION_MAX_SIZE: usize = 493; /// Describes how a tool accepts paths. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/llm-coding-tools-core/src/system_prompt.rs b/src/llm-coding-tools-core/src/system_prompt.rs index 53be818..abdf63e 100644 --- a/src/llm-coding-tools-core/src/system_prompt.rs +++ b/src/llm-coding-tools-core/src/system_prompt.rs @@ -1161,7 +1161,7 @@ mod tests { assert!(preamble.contains( "Prefer `edit` for targeted changes and `write` for new files or full rewrites." )); - assert!(preamble.contains("copy exact text from `read` and omit any `L{n}: ` prefixes")); + assert!(preamble.contains("copy exact text from `read` and omit any `{n}: ` prefixes")); } #[test] diff --git a/src/llm-coding-tools-core/src/tool_metadata/read.rs b/src/llm-coding-tools-core/src/tool_metadata/read.rs index e78da90..6eac6c6 100644 --- a/src/llm-coding-tools-core/src/tool_metadata/read.rs +++ b/src/llm-coding-tools-core/src/tool_metadata/read.rs @@ -14,11 +14,8 @@ pub const DEFAULT_LIMIT: usize = 2000; /// Maximum characters per output line before truncation. pub const MAX_LINE_LENGTH: usize = 2000; -/// Format prefix for line-numbered output (e.g. `L42: ...`). -pub const LINE_PREFIX_FORMAT: &str = "L{}: "; - /// Display hint for the line-number prefix in prompts. -pub const LINE_PREFIX_DISPLAY: &str = "L{n}: "; +pub const LINE_PREFIX_DISPLAY: &str = "{n}: "; /// Serde-friendly default offset helper. #[must_use] diff --git a/src/llm-coding-tools-core/src/tools/grep.rs b/src/llm-coding-tools-core/src/tools/grep.rs index f8a50e9..e9512ab 100644 --- a/src/llm-coding-tools-core/src/tools/grep.rs +++ b/src/llm-coding-tools-core/src/tools/grep.rs @@ -3,7 +3,7 @@ use crate::error::{ToolError, ToolResult}; use crate::path::PathResolver; use crate::tool_metadata::grep as grep_meta; -use crate::util::{push_usize, truncate_line_with_ellipsis, TRUNCATION_ELLIPSIS}; +use crate::util::{push_padded_usize, truncate_line_with_ellipsis, TRUNCATION_ELLIPSIS}; use globset::Glob; use grep_regex::RegexMatcher; use grep_searcher::sinks::UTF8; @@ -209,11 +209,22 @@ impl GrepOutput { pub fn format(&self, formatting: GrepFormattingSettings) -> String { let line_numbers = formatting.line_numbers(); let max_line_len = formatting.max_line_length(); + let line_number_width = self + .files + .iter() + .flat_map(|f| f.matches.iter().map(|m| m.line_num as usize)) + .max() + .map(|m| m.checked_ilog10().unwrap_or(0) as usize + 1) + .unwrap_or(1); let estimated_capacity = self.match_count * ESTIMATED_CHARS_PER_MATCH; let mut output = String::with_capacity(estimated_capacity); output.push_str("Found "); - push_usize(&mut output, self.match_count); + push_padded_usize( + &mut output, + self.match_count, + self.match_count.checked_ilog10().unwrap_or(0) as usize + 1, + ); output.push_str(" matches\n"); for file in &self.files { @@ -226,8 +237,8 @@ impl GrepOutput { truncate_line_with_ellipsis(&m.line_text, max_line_len); if line_numbers { - output.push_str(" L"); - push_usize(&mut output, m.line_num as usize); + output.push_str(" "); + push_padded_usize(&mut output, m.line_num as usize, line_number_width); output.push_str(": "); output.push_str(display_text); } else { @@ -245,13 +256,22 @@ impl GrepOutput { if self.truncated { output.push_str("\n(Results truncated at "); - push_usize(&mut output, self.effective_limit); + push_padded_usize( + &mut output, + self.effective_limit, + self.effective_limit.checked_ilog10().unwrap_or(0) as usize + 1, + ); output.push_str(" matches)"); } if self.partial { output.push_str("\n(Partial results: "); - push_usize(&mut output, self.errors.len()); + let error_count = self.errors.len(); + push_padded_usize( + &mut output, + error_count, + error_count.checked_ilog10().unwrap_or(0) as usize + 1, + ); output.push_str(" file error(s) encountered)"); } @@ -630,8 +650,8 @@ mod tests { #[rstest] #[case::with_line_numbers_short( 6, // max_len: line "abcdefghij" (10 chars) truncated to 6 - true, // with_line_numbers: yes, shows "L1: " prefix - "L1: abc..." // expected: truncated with line number prefix + true, // with_line_numbers: yes, shows "1: " prefix + "1: abc..." // expected: truncated with line number prefix )] #[case::without_line_numbers_short( 4, // max_len: line truncated to 4 chars @@ -641,12 +661,12 @@ mod tests { #[case::no_truncation_when_fits( 200, // max_len: larger than line length (10 chars) true, // with_line_numbers: yes - "L1: abcdefghij" // expected: full line preserved, no truncation + "1: abcdefghij" // expected: full line preserved, no truncation )] #[case::exact_boundary_no_truncation( 10, // max_len: exactly matches line length (10 chars) true, // with_line_numbers: yes - "L1: abcdefghij" // expected: full line preserved, boundary not exceeded + "1: abcdefghij" // expected: full line preserved, boundary not exceeded )] fn grep_format_handles_line_truncation( #[case] max_len: usize, @@ -684,6 +704,52 @@ mod tests { ); } + #[test] + fn grep_format_aligns_line_numbers_with_padding() { + let output = GrepOutput { + files: vec![GrepFileMatches { + path: "file.txt".to_string(), + matches: vec![ + GrepLineMatch { + line_num: 3, + line_text: "short".to_string(), + }, + GrepLineMatch { + line_num: 15, + line_text: "longer content".to_string(), + }, + ], + mtime: SystemTime::UNIX_EPOCH, + }], + match_count: 2, + truncated: false, + partial: false, + errors: Vec::new(), + effective_limit: 10, + }; + + let formatted = output.format( + GrepFormattingSettings::new() + .with_max_line_length(200) + .unwrap() + .with_line_numbers(true), + ); + + // With max line_num=15, width=2: line 3 is " 3:" (1 space + digit), line 15 is "15:" (2 digits) + assert!( + formatted.contains(" 3: short"), + "Expected padded ' 3: short' in:\n{formatted}" + ); + assert!( + formatted.contains(" 15: longer content"), + "Expected aligned ' 15: longer content' in:\n{formatted}" + ); + assert!( + !formatted.contains("L"), + "No 'L' prefix should appear in output:\n{formatted}" + ); + } + #[test] fn grep_marks_results_partial_when_walker_reports_error() { let temp = tempdir().unwrap(); diff --git a/src/llm-coding-tools-core/src/tools/read.rs b/src/llm-coding-tools-core/src/tools/read.rs index 546303e..2bc5cfb 100644 --- a/src/llm-coding-tools-core/src/tools/read.rs +++ b/src/llm-coding-tools-core/src/tools/read.rs @@ -6,7 +6,7 @@ use crate::output::ToolOutput; use crate::path::PathResolver; use crate::tool_metadata::read as read_meta; use crate::util::{ - push_usize, truncate_line_with_ellipsis, ESTIMATED_CHARS_PER_LINE, TRUNCATION_ELLIPSIS, + push_padded_usize, truncate_line_with_ellipsis, ESTIMATED_CHARS_PER_LINE, TRUNCATION_ELLIPSIS, }; use memchr::{memchr, memchr_iter}; use serde::Deserialize; @@ -192,7 +192,7 @@ type BufFile = tokio::io::BufReader; /// /// The function opens the file at the resolved path, skips to the requested /// 1-indexed `offset`, then streams lines into an output string. Each line -/// can optionally carry a `L{number}: ` prefix and is truncated to +/// can optionally carry a `{number}: ` prefix and is truncated to /// `max_line_length` when necessary. /// /// # Arguments @@ -251,14 +251,16 @@ pub async fn read_file( .min(1_048_576); let mut reader = fs::open_buffered(&path, buf_capacity).await?; - // Compute the width of the "L{number}: " prefix so the output buffer can - // be pre-sized accurately. Derives digit count from the last line number. - let line_prefix_len = if line_numbers { + // Compute the width of the line number and "{number}: " prefix so the + // output buffer can be pre-sized accurately. Derives digit count from + // the last line number. + let line_number_width = if line_numbers { let last_line = offset.saturating_add(limit).saturating_sub(1); - last_line.checked_ilog10().unwrap_or(0) as usize + 3 + last_line.checked_ilog10().unwrap_or(0) as usize + 1 } else { 0 }; + let line_prefix_len = line_number_width + 2; // ": " let estimated_capacity = limit.saturating_mul(ESTIMATED_CHARS_PER_LINE + line_prefix_len); let mut output = String::with_capacity(estimated_capacity); // Holds a partial line that spans multiple buffered chunks. @@ -282,6 +284,7 @@ pub async fn read_file( emit_line( &overflow, line_number, + line_number_width, &mut output, &mut lines_output, max_line_length, @@ -306,6 +309,7 @@ pub async fn read_file( emit_line( &buf[pos..newline_pos], line_number, + line_number_width, &mut output, &mut lines_output, max_line_length, @@ -318,6 +322,7 @@ pub async fn read_file( emit_line( &overflow, line_number, + line_number_width, &mut output, &mut lines_output, max_line_length, @@ -438,6 +443,7 @@ pub(crate) async fn skip_to_line(reader: &mut BufFile, skip_target: usize) -> To fn emit_line( line_bytes: &[u8], line_number: usize, + line_number_width: usize, output: &mut String, lines_output: &mut usize, max_line_length: usize, @@ -447,6 +453,7 @@ fn emit_line( process_line::( line_bytes, line_number, + line_number_width, output, lines_output, max_line_length, @@ -455,6 +462,7 @@ fn emit_line( process_line::( line_bytes, line_number, + line_number_width, output, lines_output, max_line_length, @@ -470,6 +478,7 @@ fn emit_line( fn process_line( line_bytes: &[u8], line_number: usize, + line_number_width: usize, output: &mut String, lines_output: &mut usize, max_line_length: usize, @@ -481,8 +490,7 @@ fn process_line( } if LINE_NUMBERS { - output.push('L'); - push_usize(output, line_number); + push_padded_usize(output, line_number, line_number_width); output.push_str(": "); } @@ -640,7 +648,8 @@ mod tests { let result = read_temp_file(b"hello\nworld\n", 1, 2000, true) .await .unwrap(); - assert_eq!(result.content, "L1: hello\nL2: world"); + // With limit=2000, last_line=2000, width=4: " 1: hello\n 2: world" + assert_eq!(result.content, " 1: hello\n 2: world"); } #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] @@ -651,6 +660,24 @@ mod tests { assert_eq!(result.content, "hello\nworld"); } + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn reads_file_with_multi_digit_line_numbers() { + // 12-line file with limit=12: last_line=12, width=2, so line 1 is " 1:" (space-padded) + let content = (1..=12).map(|i| format!("line{i}\n")).collect::(); + let result = read_temp_file(content.as_bytes(), 1, 12, true) + .await + .unwrap(); + assert!( + result.content.contains(" 1: line1"), + "Expected padded ' 1: line1'" + ); + assert!( + result.content.contains("12: line12"), + "Expected unpadded '12: line12'" + ); + assert!(!result.content.contains("L"), "No 'L' prefix should appear"); + } + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] async fn errors_on_offset_zero() { let err = read_temp_file(b"test\n", 0, 10, true).await.unwrap_err(); @@ -727,7 +754,7 @@ mod tests { .await .unwrap(); - assert_eq!(result.content, "L1: line1"); + assert_eq!(result.content, "1: line1"); } #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] diff --git a/src/llm-coding-tools-core/src/util.rs b/src/llm-coding-tools-core/src/util.rs index 6494470..f30a860 100644 --- a/src/llm-coding-tools-core/src/util.rs +++ b/src/llm-coding-tools-core/src/util.rs @@ -69,25 +69,39 @@ pub(crate) fn truncate_line_with_ellipsis(line: &str, max_chars: usize) -> (&str } } -/// Fast integer-to-string conversion for positive integers. +/// Appends `n` right-aligned with leading spaces to fill `width` characters. +/// E.g. `push_padded_usize(buf, 5, 4)` appends `" 5"`. /// -/// Uses a stack buffer to avoid allocations and converts digit-by-digit -/// for maximum performance. +/// When `width` equals the digit count of `n`, this appends just the digits +/// (no padding), equivalent to a plain integer-to-string conversion. +/// +/// # Safety (caller contract) +/// +/// `width` must be >= the number of digits in `n`. This is guaranteed by +/// construction: callers compute `width` from the maximum line number or +/// from the number's own digit count. #[inline] -pub(crate) fn push_usize(output: &mut String, mut n: usize) { - if n == 0 { - output.push('0'); - return; - } - let mut buf = [0u8; 20]; +pub(crate) fn push_padded_usize(output: &mut String, n: usize, width: usize) { + debug_assert!(width <= 20, "width exceeds stack buffer"); + let mut buf = [b' '; 20]; let mut pos = 20usize; - while n > 0 { + let mut m = n; + if m == 0 { pos -= 1; - buf[pos] = b'0' + (n % 10) as u8; - n /= 10; + buf[pos] = b'0'; + } else { + while m > 0 { + pos -= 1; + buf[pos] = b'0' + (m % 10) as u8; + m /= 10; + } } + // `width >= digit_count(n)` by contract, so `20 - width <= pos`. + // buf[20-width..pos] is already spaces; buf[pos..20] has digits. + let start = 20 - width; + debug_assert!(start <= pos, "width ({width}) < digit count of {n}"); unsafe { - output.push_str(core::str::from_utf8_unchecked(&buf[pos..])); + output.push_str(core::str::from_utf8_unchecked(&buf[start..])); } } @@ -113,4 +127,22 @@ mod tests { assert_eq!(line, expected); assert_eq!(truncated, was_truncated); } + + #[rstest] + #[case::single_digit_no_padding(5, 1, "5")] + #[case::width_greater_than_digits(5, 4, " 5")] + #[case::width_equals_digits(42, 2, "42")] + #[case::zero(0, 1, "0")] + #[case::zero_with_padding(0, 3, " 0")] + #[case::large_number(999, 3, "999")] + #[case::large_number_with_padding(123, 5, " 123")] + fn push_padded_usize_should_right_align_with_spaces( + #[case] n: usize, + #[case] width: usize, + #[case] expected: &str, + ) { + let mut output = String::new(); + push_padded_usize(&mut output, n, width); + assert_eq!(output, expected); + } } diff --git a/src/llm-coding-tools-serdesai/src/tools/grep.rs b/src/llm-coding-tools-serdesai/src/tools/grep.rs index 5408653..7782ebf 100644 --- a/src/llm-coding-tools-serdesai/src/tools/grep.rs +++ b/src/llm-coding-tools-serdesai/src/tools/grep.rs @@ -203,7 +203,7 @@ mod tests { let text = result.as_text().unwrap(); assert!(text.contains("Found 1 matches")); - assert!(text.contains("L1: hello world")); + assert!(text.contains("1: hello world")); } #[tokio::test] @@ -226,7 +226,7 @@ mod tests { let text = result.as_text().unwrap(); assert!(text.contains("Found 1 matches")); - assert!(text.contains("L1: hello world")); + assert!(text.contains("1: hello world")); } #[tokio::test] @@ -347,7 +347,7 @@ mod tests { // Verify the match line doesn't exceed DEFAULT_MAX_LINE_LENGTH for line in text.lines() { if line.contains("prefix_") { - // Line format is " L1: content", so actual content is line.len() - prefix + // Line format is " 1: content", so actual content is line.len() - prefix let content_start = line.find("prefix_").unwrap(); let content = &line[content_start..]; assert!(content.len() <= llm_coding_tools_core::tools::DEFAULT_MAX_LINE_LENGTH); @@ -430,10 +430,10 @@ mod tests { .unwrap(); let text = result.as_text().unwrap(); - // Should only see L1: alpha, not the other lines - assert!(text.contains("L1: alpha")); - assert!(!text.contains("L2: beta")); - assert!(!text.contains("L3: gamma")); + // Should only see 1: alpha, not the other lines + assert!(text.contains("1: alpha")); + assert!(!text.contains("2: beta")); + assert!(!text.contains("3: gamma")); } #[test] diff --git a/src/llm-coding-tools-serdesai/src/tools/read.rs b/src/llm-coding-tools-serdesai/src/tools/read.rs index 5a91440..d698af2 100644 --- a/src/llm-coding-tools-serdesai/src/tools/read.rs +++ b/src/llm-coding-tools-serdesai/src/tools/read.rs @@ -198,10 +198,10 @@ mod tests { .unwrap(); let text = result.as_text().unwrap(); - assert!(text.contains("L2: line2")); - assert!(text.contains("L3: line3")); - assert!(!text.contains("L1:")); - assert!(!text.contains("L4:")); + assert!(text.contains("2: line2")); + assert!(text.contains("3: line3")); + assert!(!text.contains("1:")); + assert!(!text.contains("4:")); } #[tokio::test] @@ -224,8 +224,8 @@ mod tests { .unwrap(); let text = result.as_text().unwrap(); - assert!(text.contains("L1: hello")); - assert!(text.contains("L2: world")); + assert!(text.contains("1: hello")); + assert!(text.contains("2: world")); } #[tokio::test] @@ -269,7 +269,7 @@ mod tests { let text = result.as_text().unwrap(); assert!(text.contains("line1")); - assert!(!text.contains("L1:")); // line numbers disabled + assert!(!text.contains("1:")); // line numbers disabled } #[tokio::test] @@ -294,9 +294,9 @@ mod tests { .unwrap(); let text = result.as_text().unwrap(); - assert!(text.contains("L1: line1")); - assert!(!text.contains("L2: line2")); - assert!(!text.contains("L3: line3")); + assert!(text.contains("1: line1")); + assert!(!text.contains("2: line2")); + assert!(!text.contains("3: line3")); } #[test]