import re
import argparse
from pathlib import Path
from collections import defaultdict, deque
from typing import List, Set, Tuple, Dict
def parse_dependencies(files: List[Path], root_path: Path) -> Tuple[Dict[Path, Set[Path]], Dict[Path, Set[Path]]]:
"""Parse header dependencies between files."""
dependencies = defaultdict(set)
reverse_dependencies = defaultdict(set)
include_pattern = re.compile(r'#include "(.*\.h)"')
for file in files:
with file.open() as infile:
for line in infile:
match = include_pattern.search(line)
if match:
included_file = root_path / match.group(1)
if included_file.exists():
dependencies[file].add(included_file)
reverse_dependencies[included_file].add(file)
return dependencies, reverse_dependencies
def topological_sort(files: List[Path], dependencies: Dict[Path, Set[Path]]) -> List[Path]:
"""Sort files based on their dependencies."""
indegree = defaultdict(int)
for file in files:
for dep in dependencies[file]:
indegree[dep] += 1
queue = deque([file for file in files if indegree[file] == 0])
sorted_files = []
while queue:
file = queue.popleft()
sorted_files.append(file)
for dep in dependencies[file]:
indegree[dep] -= 1
if indegree[dep] == 0:
queue.append(dep)
return sorted_files
def process_file(fname: Path) -> Tuple[List[str], List[str]]:
"""Process a single file, separating includes and other code."""
with fname.open() as infile:
lines = [line.rstrip('\n') for line in infile]
include_lines = []
processed_lines = []
inside_guard = False
preprocessor_stack = []
in_include_section = True
i = 0
while i < len(lines):
line = lines[i]
if re.match(r"#ifndef .*_H", line):
inside_guard = True
i += 1
continue
if inside_guard and re.match(r"#define .*_H", line):
i += 1
continue
if inside_guard and line.strip() == "#endif" and not preprocessor_stack:
inside_guard = False
i += 1
continue
if in_include_section and line and not line.startswith("#"):
in_include_section = False
if in_include_section:
if line.startswith("#if") or re.match(r"#ifdef|#ifndef", line):
block_start = i
depth = 1
found_include = False
while i + 1 < len(lines) and depth > 0:
i += 1
next_line = lines[i]
if next_line.startswith("#if"):
depth += 1
elif next_line.strip() == "#endif":
depth -= 1
elif next_line.startswith("#include"):
found_include = True
if found_include:
include_lines.extend(lines[block_start:i+1])
else:
processed_lines.extend(lines[block_start:i+1])
i += 1
continue
elif line.startswith("#include"):
if not re.match(r'#include ".*\.h"', line):
include_lines.append(line)
i += 1
continue
processed_lines.append(line)
i += 1
return include_lines, processed_lines
def combine_files(output_file: Path, h_files: List[Path], c_files: List[Path], root_path: Path, start_file: str = None) -> None:
"""
Combine multiple C source files into a single file.
Args:
output_file: Path to the output file
h_files: List of header files
c_files: List of C source files
root_path: Root path for resolving includes
start_file: Name of the file to process last (e.g., "main.c")
"""
_, reverse_dependencies = parse_dependencies(h_files, root_path)
sorted_h_files = topological_sort(h_files, reverse_dependencies)
all_includes = []
all_code = []
main_file = None
if start_file:
for file in c_files:
if file.name == start_file:
main_file = file
c_files.remove(file)
break
with output_file.open("w", encoding="utf-8") as outfile:
for fname in (sorted_h_files + c_files):
include_lines, processed_lines = process_file(fname)
all_includes.extend(line for line in include_lines if line not in all_includes)
all_code.append(f"//{'-' * 20} {fname.name} {'-' * 20}//\n")
all_code.extend("\n".join(processed_lines).strip().split("\n"))
all_code.append("")
if main_file is not None:
include_lines, processed_lines = process_file(main_file)
all_includes.extend(line for line in include_lines if line not in all_includes)
all_code.append(f"//{'-' * 20} {main_file.name} {'-' * 20}//\n")
all_code.extend("\n".join(processed_lines).strip().split("\n"))
all_code.append("")
outfile.write("\n".join(all_includes))
outfile.write("\n\n")
outfile.write("\n".join(all_code))
def find_source_files(target_dir: Path) -> Tuple[List[Path], List[Path]]:
"""
Find .c and .h files recursively in the target directory,
excluding directories containing combine.py
"""
h_files = []
c_files = []
excluded_dirs = set()
for path in target_dir.rglob(Path(__file__).name):
excluded_dirs.add(path.parent)
for path in target_dir.rglob("*.[ch]"):
if any(excl_dir in path.parents for excl_dir in excluded_dirs):
continue
if path.suffix == '.h':
h_files.append(path)
else:
c_files.append(path)
return h_files, c_files
def main():
"""Main entry point of the script."""
default_output_file = Path(__file__).parent / "main.c"
parser = argparse.ArgumentParser(description="Combine C source files into a single file")
parser.add_argument('--target-dir', '-d', type=Path, help='Target directory containing source files', required=True)
parser.add_argument('--output-file', '-o', type=Path, help='Output file path', default=default_output_file)
parser.add_argument('--start-file', '-s', type=str, help='File to process last (e.g., main.c)')
args = parser.parse_args()
h_files, c_files = find_source_files(args.target_dir)
if not h_files and not c_files:
print(f"No source files found in {args.target_dir}")
return
start_file = args.start_file
if start_file is None and (args.target_dir / "main.c").exists():
start_file = "main.c"
print(f"main.c was specified as start_file in {args.target_dir}")
combine_files(args.output_file, h_files, c_files, args.target_dir, start_file)
if __name__ == "__main__":
main()