import re import argparse from pathlib import Path from collections import defaultdict, deque from typing import List, Set, Tuple, Dict def parse_dependencies(files: List[Path], root_path: Path) -> Tuple[Dict[Path, Set[Path]], Dict[Path, Set[Path]]]: """Parse header dependencies between files.""" dependencies = defaultdict(set) reverse_dependencies = defaultdict(set) include_pattern = re.compile(r'#include "(.*\.h)"') for file in files: with file.open() as infile: for line in infile: match = include_pattern.search(line) if match: included_file = root_path / match.group(1) if included_file.exists(): dependencies[file].add(included_file) reverse_dependencies[included_file].add(file) return dependencies, reverse_dependencies def topological_sort(files: List[Path], dependencies: Dict[Path, Set[Path]]) -> List[Path]: """Sort files based on their dependencies.""" indegree = defaultdict(int) for file in files: for dep in dependencies[file]: indegree[dep] += 1 queue = deque([file for file in files if indegree[file] == 0]) sorted_files = [] while queue: file = queue.popleft() sorted_files.append(file) for dep in dependencies[file]: indegree[dep] -= 1 if indegree[dep] == 0: queue.append(dep) return sorted_files def process_file(fname: Path) -> Tuple[List[str], List[str]]: """Process a single file, separating includes and other code.""" with fname.open() as infile: lines = [line.rstrip('\n') for line in infile] include_lines = [] processed_lines = [] inside_guard = False preprocessor_stack = [] in_include_section = True i = 0 while i < len(lines): line = lines[i] if re.match(r"#ifndef .*_H", line): inside_guard = True i += 1 continue if inside_guard and re.match(r"#define .*_H", line): i += 1 continue if inside_guard and line.strip() == "#endif" and not preprocessor_stack: inside_guard = False i += 1 continue if in_include_section and line and not line.startswith("#"): in_include_section = False if in_include_section: if line.startswith("#if") or re.match(r"#ifdef|#ifndef", line): block_start = i depth = 1 found_include = False while i + 1 < len(lines) and depth > 0: i += 1 next_line = lines[i] if next_line.startswith("#if"): depth += 1 elif next_line.strip() == "#endif": depth -= 1 elif next_line.startswith("#include"): found_include = True if found_include: include_lines.extend(lines[block_start:i+1]) else: processed_lines.extend(lines[block_start:i+1]) i += 1 continue elif line.startswith("#include"): if not re.match(r'#include ".*\.h"', line): include_lines.append(line) i += 1 continue processed_lines.append(line) i += 1 return include_lines, processed_lines def combine_files(output_file: Path, h_files: List[Path], c_files: List[Path], root_path: Path, start_file: str = None) -> None: """ Combine multiple C source files into a single file. Args: output_file: Path to the output file h_files: List of header files c_files: List of C source files root_path: Root path for resolving includes start_file: Name of the file to process last (e.g., "main.c") """ _, reverse_dependencies = parse_dependencies(h_files, root_path) sorted_h_files = topological_sort(h_files, reverse_dependencies) all_includes = [] all_code = [] main_file = None if start_file: for file in c_files: if file.name == start_file: main_file = file c_files.remove(file) break with output_file.open("w", encoding="utf-8") as outfile: for fname in (sorted_h_files + c_files): include_lines, processed_lines = process_file(fname) all_includes.extend(line for line in include_lines if line not in all_includes) all_code.append(f"//{'-' * 20} {fname.name} {'-' * 20}//\n") all_code.extend("\n".join(processed_lines).strip().split("\n")) all_code.append("") if main_file is not None: include_lines, processed_lines = process_file(main_file) all_includes.extend(line for line in include_lines if line not in all_includes) all_code.append(f"//{'-' * 20} {main_file.name} {'-' * 20}//\n") all_code.extend("\n".join(processed_lines).strip().split("\n")) all_code.append("") outfile.write("\n".join(all_includes)) outfile.write("\n\n") outfile.write("\n".join(all_code)) def find_source_files(target_dir: Path) -> Tuple[List[Path], List[Path]]: """ Find .c and .h files recursively in the target directory, excluding directories containing combine.py """ h_files = [] c_files = [] excluded_dirs = set() for path in target_dir.rglob(Path(__file__).name): excluded_dirs.add(path.parent) for path in target_dir.rglob("*.[ch]"): if any(excl_dir in path.parents for excl_dir in excluded_dirs): continue if path.suffix == '.h': h_files.append(path) else: c_files.append(path) return h_files, c_files def main(): """Main entry point of the script.""" default_output_file = Path(__file__).parent / "main.c" parser = argparse.ArgumentParser(description="Combine C source files into a single file") parser.add_argument('--target-dir', '-d', type=Path, help='Target directory containing source files', required=True) parser.add_argument('--output-file', '-o', type=Path, help='Output file path', default=default_output_file) parser.add_argument('--start-file', '-s', type=str, help='File to process last (e.g., main.c)') args = parser.parse_args() h_files, c_files = find_source_files(args.target_dir) if not h_files and not c_files: print(f"No source files found in {args.target_dir}") return start_file = args.start_file if start_file is None and (args.target_dir / "main.c").exists(): start_file = "main.c" print(f"main.c was specified as start_file in {args.target_dir}") combine_files(args.output_file, h_files, c_files, args.target_dir, start_file) if __name__ == "__main__": main()