[libFuzzer] remove the data-flow-trace (DFT) python scripts; their functionality is now part of libFuzzer proper; also write functions.txt to the disk only if this file doesn't exist yet

llvm-svn: 361452
2024-12-26 17:57:07 +00:00 · 2019-05-23 01:03:42 +00:00 · 2019-05-23 01:03:42 +00:00 · eac9a7830b
commit eac9a7830b
parent 772176dad1
4 changed files with 8 additions and 156 deletions
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
@ -310,11 +310,14 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
      OF << C << std::endl;
  }
  RemoveFile(Temp);
-  // Write functions.txt.
-  Command Cmd;
-  Cmd.addArgument(DFTBinary);
-  Cmd.setOutputFile(DirPlusFile(DirPath, "functions.txt"));
-  ExecuteCommand(Cmd);
+  // Write functions.txt if it's currently empty or doesn't exist.
+  auto FunctionsTxtPath = DirPlusFile(DirPath, "functions.txt");
+  if (FileToString(FunctionsTxtPath).empty()) {
+    Command Cmd;
+    Cmd.addArgument(DFTBinary);
+    Cmd.setOutputFile(FunctionsTxtPath);
+    ExecuteCommand(Cmd);
+  }
  return 0;
 }

--- a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py
+++ b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py
@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
-# Runs the data-flow tracer several times on the same input in order to collect
-# the complete trace for all input bytes (running it on all bytes at once
-# may fail if DFSan runs out of labels).
-# Usage:
-#
-#   # Collect dataflow for one input, store it in OUTPUT (default is stdout)
-#   collect_data_flow.py BINARY INPUT [OUTPUT]
-#
-#   # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
-#   collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
-#===------------------------------------------------------------------------===#
-import atexit
-import hashlib
-import sys
-import os
-import subprocess
-import tempfile
-import shutil
-
-tmpdir = ""
-
-def cleanup(d):
-  print("removing: %s" % d)
-  shutil.rmtree(d)
-
-def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
-  print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
-                                                               output_dir))
-  assert not os.path.exists(output_dir)
-  os.mkdir(output_dir)
-  for root, dirs, files in os.walk(corpus_dir):
-    for f in files:
-      path = os.path.join(root, f)
-      with open(path, 'rb') as fh:
-        data = fh.read()
-      sha1 = hashlib.sha1(data).hexdigest()
-      output = os.path.join(output_dir, sha1)
-      subprocess.call([self, exe, path, output])
-  functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
-  subprocess.call([exe], stdout=functions_txt)
-
-
-def main(argv):
-  exe = argv[1]
-  inp = argv[2]
-  if os.path.isdir(inp):
-    return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
-  size = os.path.getsize(inp)
-  q = [[0, size]]
-  tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
-  atexit.register(cleanup, tmpdir)
-  print("tmpdir: ", tmpdir)
-  outputs = []
-  while len(q):
-    r = q.pop()
-    print("******* Trying:  ", r)
-    tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
-    ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
-    if ret and r[1] - r[0] >= 2:
-      q.append([r[0], (r[1] + r[0]) // 2])
-      q.append([(r[1] + r[0]) // 2, r[1]])
-    else:
-      outputs.append(tmpfile)
-      print("******* Success: ", r)
-  f = sys.stdout
-  if len(argv) >= 4:
-    f = open(argv[3], "w")
-  merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
-  subprocess.call([merge] + outputs, stdout=f)
-
-if __name__ == '__main__':
-  main(sys.argv)
--- a/compiler-rt/lib/fuzzer/scripts/merge_data_flow.py
+++ b/compiler-rt/lib/fuzzer/scripts/merge_data_flow.py
@ -1,58 +0,0 @@
-#!/usr/bin/env python3
-#===- lib/fuzzer/scripts/merge_data_flow.py ------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
-# Merge several data flow traces into one.
-# Usage:
-#   merge_data_flow.py trace1 trace2 ...  > result
-#===------------------------------------------------------------------------===#
-import sys
-import fileinput
-from array import array
-
-def Merge(a, b):
-  res = array('b')
-  for i in range(0, len(a)):
-    res.append(ord('1' if a[i] == '1' or b[i] == '1' else '0'))
-  return res.tostring().decode('utf-8')
-
-def main(argv):
-  D = {}
-  C = {}
-  # read the lines.
-  for line in fileinput.input():
-    # collect the coverage.
-    if line.startswith('C'):
-      COV = line.strip().split(' ')
-      F = COV[0];
-      if not F in C:
-        C[F] = {0}
-      for B in COV[1:]:
-        C[F].add(int(B))
-      continue
-    # collect the data flow trace.
-    [F,BV] = line.strip().split(' ')
-    if F in D:
-      D[F] = Merge(D[F], BV)
-    else:
-      D[F] = BV;
-  # print the combined data flow trace.
-  for F in D.keys():
-    if isinstance(D[F], str):
-      value = D[F]
-    else:
-      value = D[F].decode('utf-8')
-    print("%s %s" % (F, value))
-  # print the combined coverage
-  for F in C.keys():
-    print("%s" % F, end="")
-    for B in list(C[F])[1:]:
-      print(" %s" % B, end="")
-    print()
-
-if __name__ == '__main__':
-  main(sys.argv)
--- a/compiler-rt/test/fuzzer/dataflow.test
+++ b/compiler-rt/test/fuzzer/dataflow.test
@ -69,10 +69,6 @@ RUN:rm -f %t-merge-*
 RUN:%t-ThreeFunctionsTestDF 0 2 %t/IN/FUZZMU > %t-merge-1
 RUN:%t-ThreeFunctionsTestDF 2 4 %t/IN/FUZZMU > %t-merge-2
 RUN:%t-ThreeFunctionsTestDF 4 6 %t/IN/FUZZMU > %t-merge-3
-RUN:%libfuzzer_src/scripts/merge_data_flow.py  %t-merge-* | sort | FileCheck %s --check-prefix=IN_FUZZMU
-
-# Test collect_data_flow
-RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/FUZZMU | sort | FileCheck %s --check-prefix=IN_FUZZMU

 # Test libFuzzer's built in DFT collection.
 RUN: rm -rf %t-DFT
@ -90,17 +86,10 @@ OUT_OF_LABELS: ==FATAL: DataFlowSanitizer: out of labels
 RUN: %t-ExplodeDFSanLabelsTestDF 0 2  %t/IN/1234567890123456
 RUN: %t-ExplodeDFSanLabelsTestDF 2 4  %t/IN/1234567890123456
 RUN: %t-ExplodeDFSanLabelsTestDF 4 6  %t/IN/1234567890123456
-# Or we can use collect_data_flow
-RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ExplodeDFSanLabelsTestDF %t/IN/1234567890123456
-
 # Test libFuzzer's builtin collect_data_flow.
 RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t-DFT %t/IN/1234567890123456

 # Test that we can run collect_data_flow on the entire corpus dir
-RUN: rm -rf %t/OUT
-RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN %t/OUT
-RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
-
 RUN: rm -rf %t/OUT
 RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN
 RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
@ -115,8 +104,6 @@ USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2'
 # Test that we can run collect_data_flow on a long input (>2**16 bytes)
 RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input
 RUN: rm -rf %t/OUT
-RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/very_long_input %t/OUT | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT
-RUN: rm -rf %t/OUT
 RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN/very_long_input 2>&1 | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT
 RUN: rm %t/IN/very_long_input
 COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[0, 150001