08_TSVC
TSVC
安装与编译:
git clone https://github.com/UoB-HPC/TSVC_2.git
cd TSVC_2
make COMPILER=clang \
CC=/path/to/your/clang
CFLAGS="-O3 -march=native"
在 bin/clang
里就会生成六个文件,分别是:
- novec(不向量化):
tsvc_novec_default
tsvc_novec_precise
tsvc_novec_relaxed
- vec(启用向量化):
tsvc_vec_default
tsvc_vec_precise
tsvc_vec_relaxed
自动化代码
import argparse, json, logging, os, re, subprocess, sys, tempfile
from datetime import datetime
from pathlib import Path
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
console = Console()
log = logging.getLogger("tsvc")
# ──────────────────── utility helpers ──────────────────────────────────────
def run(cmd: list[str], **kw):
if log.isEnabledFor(logging.DEBUG):
console.print(f"[cyan]$ {' '.join(cmd)}[/cyan]")
return subprocess.run(cmd, check=True, text=True, **kw)
def read_lines(p: Path):
return p.read_text(encoding="utf-8").splitlines(keepends=True)
def write_lines(p: Path, lines):
p.write_text("".join(lines), encoding="utf-8")
def find_line(lines, pred):
return next((i for i, l in enumerate(lines) if pred(l)), None)
def grab_block(lines, start):
out, depth = [], 0
for i in range(start, len(lines)):
ln = lines[i]
out.append(ln)
depth += ln.count("{") - ln.count("}")
if depth == 0:
break
return out
def fetch_func(lines, name):
sig = re.compile(rf"^real_t\s+{re.escape(name)}\s*\(")
idx = find_line(lines, lambda l: bool(sig.match(l)))
if idx is None:
console.print(f"[red]✗ 未找到函数 {name}[/red]"); sys.exit(1)
if "{" in lines[idx]:
return grab_block(lines, idx)
if idx + 1 < len(lines) and "{" in lines[idx + 1]:
return [lines[idx]] + grab_block(lines, idx + 1)
console.print(f"[red]✗ 解析 {name} 失败: 缺少 '{{' 行[/red]"); sys.exit(1)
def fetch_time_function(lines):
sig = re.compile(r"^void\s+time_function\s*\(")
idx = find_line(lines, lambda l: bool(sig.match(l)))
if idx is None:
console.print("[red]✗ 找不到 time_function[/red]"); sys.exit(1)
return grab_block(lines, idx) if "{" in lines[idx] \
else [lines[idx]] + grab_block(lines, idx + 1)
# ────────────────────── main ───────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="TSVC per-kernel vectorization benchmark")
parser.add_argument("kernels", nargs="*", help="内核名字,例如 s000 s1111 …")
parser.add_argument("--tsvc", default=None, help="tsvc.c 路径(自动查找)")
parser.add_argument("--cc", default="clang", help="C 编译器 (默认 clang)")
parser.add_argument("--cflags", default="-O3 -fstrict-aliasing -ffast-math -w",
help="通用编译旗标(两版共用)")
parser.add_argument("--vecflags", default="-O3 -fopenmp=libomp -fvectorize -fslp-vectorize",
help="仅向量化版本追加的旗标")
parser.add_argument("--novecflags", default="-fno-vectorize",
help="仅 novec 版本追加的旗标")
parser.add_argument("--warmup", type=int, default=0)
parser.add_argument("--max-runs", type=int, default=3)
parser.add_argument("--time-unit", default="millisecond",
choices=["millisecond", "second"])
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO,
format="%(levelname)s: %(message)s")
os.remove("custom_novec") if Path("custom_novec").exists() else None
os.remove("custom_vec") if Path("custom_vec").exists() else None
# 1️⃣ 找到 tsvc.c
tsvc_path = Path(args.tsvc) if args.tsvc else None
if not (tsvc_path and tsvc_path.exists()):
tsvc_path = Path("tsvc.c") if Path("tsvc.c").exists() else Path("src/tsvc.c")
if not tsvc_path.exists():
console.print("[red]✗ tsvc.c 未找到[/red]"); sys.exit(1)
# 如果未指定内核,则直接复制 tsvc.c 并添加 include,编译并运行
if not args.kernels:
console.rule("[bold yellow]No kernels specified: run full tsvc.c")
lines = read_lines(tsvc_path)
# 在文件顶部确保包含 string.h
guard = any(l.startswith("#include <string.h>") for l in lines)
if not guard:
insert_idx = find_line(lines, lambda l: l.startswith("#include"))
if insert_idx is None:
lines.insert(0, "#include <string.h>\n")
else:
lines.insert(insert_idx + 1, "#include <string.h>\n")
write_lines(Path("custom_tsvc.c"), lines)
console.print(f"[green]✔ custom_tsvc.c 生成完毕[/green]")
# 编译并运行,链接 common.c 和 dummy.c
exe = Path("custom_tsvc")
run(args.cc.split() + args.cflags.split() + ["custom_tsvc.c", "common.c", "dummy.c", "-lm", "-o", str(exe)])
console.print(f"[blue]Running {exe}...[/blue]")
run([f"./{exe.name}"])
return
console.rule("[bold green]TSVC Extract & Build")
console.print(f"[bold]源文件:[/bold] {tsvc_path}")
console.print(f"[bold]内核:[/bold] {', '.join(args.kernels)}")
# 2️⃣ 生成 custom_tsvc.c for selected kernels
lines = read_lines(tsvc_path)
first_idx = find_line(lines, lambda l: re.match(r"^real_t\s+s\d{3}\s*\(", l))
prefix = lines[:first_idx]
prefix.append('#include <string.h>\n')
out = []
out.extend(prefix)
out.append("\ntypedef real_t (*test_function_t)(struct args_t *);\n\n")
for k in args.kernels:
out.extend(fetch_func(lines, k)); out.append("\n")
out.extend(fetch_time_function(lines)); out.append("\n")
out.append("struct {const char* name; test_function_t fn;} tbl[]={\n")
for k in args.kernels:
out.append(f' {{"{k}", {k}}},\n')
out.append(" {NULL,NULL}};\n\n")
out.append(
"int main(int argc,char**argv){\n"
' printf("Loop\\tTime(sec)\\tChecksum\\n");\n'
" if(argc>1){\n"
" for(int i=0;tbl[i].name;i++) if(!strcmp(tbl[i].name,argv[1]))\n"
" time_function(tbl[i].fn,NULL);\n"
" }else{ for(int i=0;tbl[i].name;i++) time_function(tbl[i].fn,NULL);} "
"return 0;}\n")
Path("custom_tsvc.c").write_text("".join(out), encoding="utf-8")
console.print("[green]✔ custom_tsvc.c 生成完毕[/green]")
# 3️⃣ 编译 vec / novec
build_dir = Path(tempfile.mkdtemp(prefix="tsvc_build_"))
def cc_compile(src, extra_flags):
obj = build_dir / f"{Path(src).stem}_{hash(' '.join(extra_flags)) & 0xffff}.o"
run(args.cc.split() + args.cflags.split() + extra_flags +
["-c", src, "-o", obj])
return obj
common_objs = [cc_compile(src, []) for src in ("common.c", "dummy.c")]
obj_vec = cc_compile("custom_tsvc.c", args.vecflags.split())
obj_novec = cc_compile("custom_tsvc.c", args.novecflags.split())
exe_vec = Path("custom_vec"); exe_novec = Path("custom_novec")
run(args.cc.split() + common_objs + [obj_vec] + ["-lm", "-o", str(exe_vec)])
run(args.cc.split() + common_objs + [obj_novec] + ["-lm", "-o", str(exe_novec)])
console.print(Panel.fit("编译完成", border_style="green"))
# 4️⃣ 对每个 kernel 基准
console.rule("[bold blue]Hyperfine Per-Kernel")
rows=[]
for k in args.kernels:
jfile = f"hf_{k}_{datetime.now():%H%M%S}.json"
cmd = ["hyperfine",
f"--warmup={args.warmup}",
f"--max-runs={args.max_runs}",
f"--time-unit={args.time_unit}",
"--export-json", jfile,
f"./{exe_novec.name} {k}",
f"./{exe_vec.name} {k}"]
run(cmd)
data = json.loads(Path(jfile).read_text())
novec_mean, vec_mean = (data["results"][0]["mean"],
data["results"][1]["mean"])
rows.append((k, novec_mean, vec_mean,
novec_mean/vec_mean if vec_mean else float('inf')))
# 5️⃣ Rich 表输出
tbl=Table(title="Per-Kernel speed-up", header_style="bold magenta")
tbl.add_column("Kernel"); tbl.add_column("No-vec (s)"); tbl.add_column("Vec (s)")
tbl.add_column("Speed-up ×", style="green")
for k,nv,v,s in rows:
tbl.add_row(k, f"{nv:.6f}", f"{v:.6f}", f"{s:0.2f}")
console.print(tbl)
console.print("[bold green]✓ All Done.[/bold green]")
# ────────────────────────────────────────────────────────
if __name__ == "__main__":
main()
评论