From 42cc44fbc8dc6c66d3e10a2d904133ffcdde3762 Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Wed, 26 Feb 2020 19:50:43 +0000 Subject: [PATCH] [flang] Add script to flatten git history for llvm monorepo submission (flang-compiler/f18#854) This script, when run on a checkout of the f18 repository, takes the current origin/master and makes a branch called "new" with a rewritten history; The "new" branch has a flat git history (that is, a series of commits with only one parent). Flattening is done for merge commits by taking the content of the commit as it is at the merge commit. Original-commit: flang-compiler/f18@d9871fa9eb2304c4761a4a818187553396bb8924 Reviewed-on: https://github.com/flang-compiler/f18/pull/854 --- flang/flatten.cpp | 916 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 916 insertions(+) create mode 100644 flang/flatten.cpp diff --git a/flang/flatten.cpp b/flang/flatten.cpp new file mode 100644 index 000000000000..302bebaa0bad --- /dev/null +++ b/flang/flatten.cpp @@ -0,0 +1,916 @@ +// Copyright (c) 2019, Arm Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Reminder: no warranty with this program. I recommend using a fresh checkout. + +// Compile with: +// clang++ -Wall -Werror -O2 flatten.cpp -lgit2 +// Run with f18 in PWD or argv[1]: +// time ./a.out ~/.local/src/github.com/flang-compiler/f18/ + +// To get a rewritten history, do this: +// +// sudo apt install -y libgit2-dev # or equivalent +// git clone https://github.com/flang-compiler/f18 +// git remote add llvm-project https://github.com/llvm/llvm-project +// git fetch llvm-project +// clang++ -DREPLACE_REFERENCES -Wall -Werror -O2 flatten.cpp -lgit2 +// ./a.out + +// Inputs: +// * a ref called origin/master, representing f18 history +// * (optionally) a ref called llvm-project/master, representing llvm upstream +// * (optionally) branches called rebase-{12 digit merge sha}, representing the +// manual rebase of tricky cases. +// +// Outputs: +// * A branch called rewritten-history-v2, with a linearized f18 history. +// * A branch called rewritten-history-v2-llvm-project-merge, representing the +// renaming of the project under /flang/ and taking llvm-project/master as the +// new parent for the (original) root commit. +// +// This program is meant to be idempotent and should not write to the working +// directory, it simply takes refs as input and produces them as output. + +// Key concepts: +// +// * The checkout that git gives you for a commit is called a "tree", which is +// determined by a recursive checksum of the directory structure. If two +// commits have the same tree ("treesame"), then they are by definition +// equivalent when you check them out. +// +// * Lineage of the "master" branch is taken by following the first parents of +// each commit. To see this in git log, run `git log --first-parent`. This +// effectively ignores the second-parent history (i.e. commits that happened +// on branches). +// +// * By construction it is arranged that the trees of the first-parent history +// are preserved. This means "the code on the master branch is the same before +// and after rewrite". +// +// * Preserving the non-first-parent commits is trickier, and requires a rebase. +// +// * If nothing changed on the master branch during a feature branch, a rebase +// will not change the trees of the feature branch, so trees of those commits +// will still be the same. It's like rewriting the merge as a fast-forward. +// +// * However, if something happened on the master branch during the feature +// branch, then a rebase *must* create new trees. This implies code which +// might not build. As an example, imagine a case where a class is renamed on +// master, and the old name is used in the feature branch (until it's fixed at +// some point by the time it is merged). +// +// * By the end of the rebase, we assert that the trees are the same as those +// merged into master. So code in the middle of the rebased feature branch may +// not build, but at least the overall result of the feature branch will be as +// good as master was. Thankfully this is relatively rare. +// +// * If a branch exists called rebase-{sha of merge commit}, that branch is +// substituted in place of the merge commit. This allows manually rebasing +// tricky merges. +// +// * For the non-treesame, we can take a second-order diff (diff-of-diff) +// comparing those commits before and after rewrite, and ensure that only line +// numbers and context changed. This is almost totally the case. + +// Using the following script, it is possible to see whether non-TREESAME +// patches still have the same diff, modulo blank lines, by taking a +// second-order diff. +// +// git log --grep=TREESAME --invert-grep --format="%h %(trailers:key=Original-commit)" rewritten-history-v2 | +// sed -n 's|Original-commit: flang-compiler/f18@||p' | +// while read NEW ORIG +// do +// echo ORIG NEW: $ORIG $NEW +// git show $ORIG > a +// git show $NEW > b +// sed -r -i \ +// -e 's/@@ .* @@/@@ Numbers @@/g' \ +// -e '/^(commit|index) .*/d' \ +// -e '/Original-commit.*/d' \ +// -e '/^\s$/d' \ +// a b +// git diff --color --no-index a b +// done |& less -SR + +#include +#include +#include +#include +#include + +#ifndef NO_REPLACE_REFERENCES +#include +#endif + +#include +#include + +#include +#include + +void check(int error, const char *message, const char *extra) { + const git_error *err; + const char *msg = "", *spacer = ""; + + if (!error) + return; + + if ((err = giterr_last()) != NULL && err->message != NULL) { + msg = err->message; + spacer = " - "; + } + + if (extra) + fprintf(stderr, "%s '%s' [%d]%s%s\n", message, extra, error, spacer, msg); + else + fprintf(stderr, "%s [%d]%s%s\n", message, error, spacer, msg); + + exit(1); +} + +int n_conflicts = 0, n_discards = 0; + +// Copy src string to dst string, rewriting issue references. +char *rewrite_issue_references(char *dst, const char *src) { +#ifndef NO_REPLACE_REFERENCES + const char *src_end = src + strlen(src); + // return src_end; + char *new_end = std::regex_replace( + dst, src, src_end, + std::regex("(^|\\b[^a-zA-Z0-9]+)(#[0-9]+)\\b"), + "$1flang-compiler/f18$2"); + *new_end = '\0'; + return new_end; +#else + return stpcpy(dst, src); +#endif +} + +// test_rewrite_issue_references runs some test cases thorugh the string +// replacement machinery and aborts if anything is awry. +void test_rewrite_issue_references() { + #ifdef NO_REPLACE_REFERENCES + return; + #endif + struct { const char *input, *want; } tests[] = { + {"foo#123", "foo#123"}, + {"Test #123bar", "Test #123bar"}, + // Special case. + // {"commit message #123", "commit message #123"}, + + {"#123", "flang-compiler/f18#123"}, + {"Test #123", "Test flang-compiler/f18#123"}, + {"Test #123", "Test flang-compiler/f18#123"}, + {"Test (#123)", "Test (flang-compiler/f18#123)"}, + }; + + bool fail = false; + for (const auto test : tests) { + char *x = (char*)malloc(1024); + const char *new_end = rewrite_issue_references(x, test.input); + if (strcmp(x, test.want)) { + fprintf(stderr, "Got : %s\n", x); + fprintf(stderr, "Want: %s\n", test.want); + fail = true; + } + if (new_end != x + strlen(x)) { + abort(); + } + (void)new_end; + free((void*)x); + } + if (fail) + abort(); +} + +static const char mergemsg_prefix[] = "Merge pull request #"; + +// has_merge_pr_prefix returns true if the commit message begins "Merge pull +// request #". +bool has_merge_pr_prefix(const char* msg) { + int len = sizeof(mergemsg_prefix)-1; + if (strlen(msg) < len) + len = strlen(msg); + return !strncmp(mergemsg_prefix, msg, len); +} + +// tweak_commit_message +// Prepend [flang-compiler/f18#PRNUM] +// Append "Original-commit", "Reviewed-on" and "Tree-same-pre-rewrite". +// +// Allocates a new commit message. Return value must be freed. +// The Reviewed-on trailer URL is determined by "Merge pull request #(number)", +// if present. +char *tweak_commit_message(git_commit *orig_commit, git_commit *orig_merge, const git_oid *new_tree) { + const char *orig_msg = git_commit_message_raw(orig_commit); + const char *prnum = NULL, *prnum_end = NULL; + + // If the message indicates a PR, store in prnum. + if (orig_merge != NULL && has_merge_pr_prefix(git_commit_message(orig_merge))) { + const char *mergemsg = git_commit_message_raw(orig_merge); + prnum = mergemsg + sizeof(mergemsg_prefix) - 1; + prnum_end = strchr(prnum, ' '); + } + + #ifndef NO_REPLACE_REFERENCES + // Match "foo bar baz (#123)", which is the convention for "Squash" commit + // merges on GitHub. + static std::regex prnum_re("^(.*\\(#)([0-9]+)\\)$"); + std::cmatch match; + if (std::regex_match(git_commit_summary(orig_merge), match, prnum_re)) { + const char *summary = git_commit_summary(orig_merge); + prnum = summary + match.length(1); + prnum_end = prnum + match.length(2); + } + #endif + + // Gratuitous space for appending things. + const ssize_t extra_space = 102400; + ssize_t size = strlen(orig_msg) + extra_space; + char *newmsg_start = (char*)malloc(size); + char *newmsg_end = newmsg_start + (size); + char *newmsg = newmsg_start; // Pointer tracks the current write position. + newmsg[0] = 0; + + // Set to leave message unmodified except for Original-commit, useful for + // verifying second-order diffs. + const bool use_original_message = false; + if (use_original_message) { + // These are here to indicate if the checkouts are the same as a commit and/or a merge. + if (git_oid_equal(git_commit_tree_id(orig_merge), new_tree)) { + newmsg = stpncpy(newmsg, "[TREESAME master] ", newmsg_end - newmsg); + } else if (git_oid_equal(git_commit_tree_id(orig_commit), new_tree)) { + newmsg = stpncpy(newmsg, "[TREESAME commit] ", newmsg_end - newmsg); + } + + newmsg = stpcpy(newmsg, orig_msg); + + // From here on out, append trailer headers. + char buf[GIT_OID_HEXSZ+1] = {}; + newmsg = stpncpy(newmsg, "\n\nOriginal-commit: flang-compiler/f18@", newmsg_end - newmsg); + git_oid_fmt(buf, git_commit_id(orig_commit)); + newmsg = stpncpy(newmsg, buf, newmsg_end - newmsg); + newmsg = stpncpy(newmsg, "\n", newmsg_end - newmsg); + return newmsg_start; + } + + // Prepend [Flang] tag. + newmsg = stpncpy(newmsg, "[Flang] ", newmsg_end - newmsg); + + // Paste in the original message, rewriting references #123 => flang-compiler/f18#123 + newmsg = rewrite_issue_references(newmsg, orig_msg); + + // If there is a newline at the end, remove it; subsequent insertion of the + // Original-commit header will always insert it. This ensures consistent + // spacing before the header. + while (newmsg[-1] == '\n') { + newmsg[-1] = 0; + newmsg--; + } + + // From here on out, append trailer headers. + char buf[GIT_OID_HEXSZ+1] = {}; + newmsg = stpncpy(newmsg, "\n\nOriginal-commit: flang-compiler/f18@", newmsg_end - newmsg); + git_oid_fmt(buf, git_commit_id(orig_commit)); + newmsg = stpncpy(newmsg, buf, newmsg_end - newmsg); + newmsg = stpncpy(newmsg, "\n", newmsg_end - newmsg); + + if (prnum != NULL) { + newmsg = stpncpy(newmsg, "Reviewed-on: https://github.com/flang-compiler/f18/pull/", newmsg_end - newmsg); + newmsg = stpncpy(newmsg, prnum, prnum_end - prnum); + newmsg = stpncpy(newmsg, "\n", newmsg_end - newmsg); + } + + if (!git_oid_equal(git_commit_tree_id(orig_merge), new_tree)) { + // If this is present, then the contents of the tree are identical pre- + // and post- merge. If it is not present, then the patch was rebased. + newmsg = stpncpy(newmsg, "Tree-same-pre-rewrite: false\n", newmsg_end - newmsg); + } + + return newmsg_start; +} + +// insert_flang_directory sets new_root to a newly created tree with one entry +// in it: /flang/, which points at orig_root. +void insert_flang_directory(git_repository *repo, git_oid *new_root, const git_oid *orig_root) { + git_treebuilder *tb; + check(git_treebuilder_new(&tb, repo, NULL), "git_treebuilder_new", NULL); + const git_tree_entry *te; + git_treebuilder_insert(&te, tb, "flang", orig_root, GIT_FILEMODE_TREE); + git_treebuilder_write(new_root, tb); + git_treebuilder_free(tb); +} + +// count_branch_commits counts the number of on-branch (non-merge) commits in +// the given merge. +int count_branch_commits(git_commit *merge) { + git_revwalk *walk; + check(git_revwalk_new(&walk, git_commit_owner(merge)), "git_revwalk_new", NULL); + check(git_revwalk_hide(walk, git_commit_parent_id(merge, 0)), "git_revwalk_hide", NULL); + check(git_revwalk_push(walk, git_commit_parent_id(merge, 1)), "git_revwalk_push", NULL); + + git_oid commit_oid; + int n = 0; + while (!git_revwalk_next(&commit_oid, walk)) + n++; + + git_revwalk_free(walk); + return n; +} + +// tree_for_commit grabs the git_oid pointing to the tree for a given commit_id. +git_oid tree_for_commit(git_repository *repo, const git_oid *commit_id) { + git_commit *c; + check(git_commit_lookup(&c, repo, commit_id), "git_commit_lookup", NULL); + git_oid tree_id; + git_oid_cpy(&tree_id, git_commit_tree_id(c)); + git_commit_free(c); + // git_commit_ + return tree_id; +} + +// generate_authortime_to_commit_map walks the commits on the second-parent +// history of the given `merge`, computing a mapping from the author time to the +// original commit id. Since this is scoped to feature-branch commits, there are +// not likely to be collisions. +void generate_authortime_to_commit_map(std::map &authortime_to_commit, git_commit *merge) { + git_repository *repo = git_commit_owner(merge); + git_revwalk *walk; + check(git_revwalk_new(&walk, git_commit_owner(merge)), "git_revwalk_new", NULL); + check(git_revwalk_hide(walk, git_commit_parent_id(merge, 0)), "git_revwalk_hide", NULL); + check(git_revwalk_push(walk, git_commit_parent_id(merge, 1)), "git_revwalk_push", NULL); + + // Only walk first parent history on the grounds that most of those which + // introduce commits not-already-on-mainline are accidental merges of + // rebases, duplicating patches in history. Where patches are missed, they + // won't have an entry in the authortime_to_commit. + git_revwalk_simplify_first_parent(walk); + + git_oid commit_id; + while (!git_revwalk_next(&commit_id, walk)) { + git_commit *c; + check(git_commit_lookup(&c, repo, &commit_id), "git_commit_lookup", NULL); + int when = git_commit_author(c)->when.time; + + if (authortime_to_commit.count(when) != 0) { + char buf[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf, 12, &commit_id); + char buf1[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf1, 12, git_commit_id(merge)); + char buf2[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf2, 12, &authortime_to_commit[when]); + printf("Hit duplicate commit considering %s " + "(merge %s, duplicate %s)\n", buf, buf1, buf2); + // Duplicate author times. Need another strategy. + abort(); + } + authortime_to_commit[when] = commit_id; + git_commit_free(c); + } + + git_revwalk_free(walk); +} + +// try_rebase attempts to rebase orig_merge onto the new history. +// It returns true if the rebase succeeds without conflicts, and false otherwise. +// On success, new_head is set to the tip of the rebase. +bool try_rebase(git_oid **new_head, git_commit *orig_merge) { + git_repository *repo = git_commit_owner(orig_merge); + const git_oid *p0 = git_commit_parent_id(orig_merge, 0); + const git_oid *p1 = git_commit_parent_id(orig_merge, 1); + + git_annotated_commit *p0a, *p1a, *new_heada; + check(git_annotated_commit_lookup(&p0a, repo, p0), "git_annotated_commit_lookup p0", NULL); + check(git_annotated_commit_lookup(&p1a, repo, p1), "git_annotated_commit_lookup p1", NULL); + check(git_annotated_commit_lookup(&new_heada, repo, *new_head), "git_annotated_commit_lookup new_head", NULL); + + char buf[] = "refs/heads/rebase-0123456789ab\0"; + git_oid_nfmt(buf+sizeof("refs/heads/rebase-")-1, 12, git_commit_id(orig_merge)); + + bool using_manual_rebase = false; + + // Look for a branch called rebase-[12 digit SHA]. If it exists and is + // tree-same to the merge, treat it as the branch we're trying to rebase. + git_reference *manual_rebase; + int err = git_reference_lookup(&manual_rebase, repo, buf); + switch (err) { + case 0: { // Reference found. + const git_oid manual_tree = tree_for_commit(repo, git_reference_target(manual_rebase)); + + if (0 == git_oid_cmp( + git_reference_target(manual_rebase), + git_commit_id(orig_merge))) { + printf("Skip %s because it's pointing at the merge.\n", buf); + goto manual_rebase_unusable; + } + if (0 != git_oid_cmp(&manual_tree, git_commit_tree_id(orig_merge))) { + printf("Skip %s because the tip of the rebase is not " + "treesame to the merge commit\n", buf); + goto manual_rebase_unusable; + } + printf("Using manual rebase branch %s\n", buf); + using_manual_rebase = true; + + // Update p1a, the commits being rebased, to point at the branch. + // Then rebase, and this shouldn't result in any conflicts. + git_annotated_commit_free(p1a); + git_annotated_commit_lookup(&p1a, repo, git_reference_target(manual_rebase)); + + manual_rebase_unusable: + git_reference_free(manual_rebase); + + break; + } + case GIT_ENOTFOUND: + // printf("Rebase branch %s not found.\n", buf); + break; + default: + check(err, "git_reference_lookup rebase-...", NULL); + } + + git_rebase_options rb_opts; + check(git_rebase_init_options(&rb_opts, GIT_REBASE_OPTIONS_VERSION), "git_rebase_init_options", NULL); + rb_opts.inmemory = 1; + rb_opts.merge_options.flags = GIT_MERGE_FIND_RENAMES; + rb_opts.merge_options.rename_threshold = 50; + + git_rebase *rb; + check(git_rebase_init(&rb, repo, p1a, p0a, new_heada, &rb_opts), "git_rebase_init", NULL); + + bool is_success = true; // becomes false if conflicts encountered. + bool committed_at_least_one_patch = false; + git_oid rebase_tip_id; + git_oid_cpy(&rebase_tip_id, *new_head); + + std::map authortime_to_commit; + if (using_manual_rebase) { + generate_authortime_to_commit_map(authortime_to_commit, orig_merge); + } + + // Loop over each patch in the rebase, committing it. + git_rebase_operation *op; + while (!git_rebase_next(&op, rb)) { + git_index *idx; + check(git_rebase_inmemory_index(&idx, rb), "git_rebase_inmemory_index", NULL); + if (git_index_has_conflicts(idx)) { + // Conflicting case. Print a useful message. + char buf_patch[GIT_OID_HEXSZ+1] = {}; + char buf_merge[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf_patch, 12, &op->id); + git_oid_nfmt(buf_merge, 12, git_commit_id(orig_merge)); + + int discarded = count_branch_commits(orig_merge); + printf("Conflicts encountered; patch=%s merge=%s - discarding %d commits\n", buf_patch, buf_merge, discarded); + printf(" M=%s; git checkout -B rebase-${M} ${M}^2; git rebase ${M}^1\n", buf_merge); + + n_conflicts++; + n_discards += discarded; + + git_index_free(idx); + is_success = false; + // If conflicts are found, abort, fall back to taking the merge + // commit. + break; + } + git_index_free(idx); + + git_commit *orig_commit; + check(git_commit_lookup(&orig_commit, repo, &op->id), "git_commit_lookup", NULL); + + // Generate the new tree now (as opposed to within git_rebase_commit) so that it can be used for TREESAME + // diagnostics in the commit message. + git_oid new_tree; + check(git_index_write_tree_to(&new_tree, idx, repo), "git_index_write_tree_to", NULL); + + if (using_manual_rebase) { + // If in a manual rebase, need to lookup original patch. + // Use the author timestamp as a heuristic for patch equality. + const git_time_t when = git_commit_author(orig_commit)->when.time; + git_oid pre_rebase_commit_id = {}; + if (when == 1518039228) { // Wed Feb 7 13:33:48 2018 -0800 + // Hack for a single special case, a commit which was merged. + check(git_oid_fromstr(&pre_rebase_commit_id, "044148ead21f18e16716d5bc30819525c79065d0"), "git_oid_fromstr", NULL); + } else if (authortime_to_commit.count(when) == 0) { + char buf[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf, 12, &op->id); + printf("Unable to find original commit for manual " + "rebase: %s\n", buf); + git_oid_nfmt(buf, 12, git_commit_id(orig_merge)); + printf(" Merge: %s\n", buf); + abort(); + } else { + pre_rebase_commit_id = authortime_to_commit[when]; + } + + // Replace orig_commit (the rebased commit in this context) with the + // true original commit, so that the commit cross-reference + // correctly reflects a commit which exists in the f18 repository. + git_commit_free(orig_commit); + check(git_commit_lookup(&orig_commit, repo, &pre_rebase_commit_id), "git_commit_lookup", NULL); + } + + const char *msg = tweak_commit_message(orig_commit, orig_merge, &new_tree); + + int err = git_rebase_commit( + &rebase_tip_id, + rb, + NULL, + // Take the committer information from the merge commit if manually rebased. + using_manual_rebase ? git_commit_committer(orig_merge): git_commit_committer(orig_commit), + NULL, + msg + ); + free((void*)msg); + + git_commit_free(orig_commit); + if (err == GIT_EAPPLIED) { + // Applying the patch results in the same tree, so the patch is + // empty. + char buf_patch[GIT_OID_HEXSZ+1] = {}; + char buf_merge[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf_patch, 12, &op->id); + git_oid_nfmt(buf_merge, 12, git_commit_id(orig_merge)); + printf("Patch already exists in history; patch=%s merge=%s\n", buf_patch, buf_merge); + continue; + } + check(err, "git_rebase_commit", NULL); + committed_at_least_one_patch = true; + } + + if (is_success && committed_at_least_one_patch) { + // Update the growing new_head to point at our new rebase tip. + git_oid_cpy(*new_head, &rebase_tip_id); + } + + git_rebase_abort(rb); + git_rebase_free(rb); + + git_annotated_commit_free(p0a); + git_annotated_commit_free(p1a); + git_annotated_commit_free(new_heada); + + return is_success; +} + +// merge_llvm_project_tree generates a new root tree combining the llvm project +// tree and the given new_tree_id. new_tree_id is updated to point at the new tree. +void merge_llvm_project_tree( + git_oid *new_tree_id, + const git_oid *orig_tree, + const git_tree *llvm_project_tree) { + + git_repository *repo = git_tree_owner(llvm_project_tree); + + git_tree *flang_tree; + check(git_tree_lookup(&flang_tree, repo, orig_tree), "git_tree_lookup", NULL); + const git_oid *flang_dir_tree_id = git_tree_entry_id(git_tree_entry_byname(flang_tree, "flang")); + + // Effectively merges the flang/ directory into the llvm project tree. + git_treebuilder *tb; + check(git_treebuilder_new(&tb, repo, llvm_project_tree), "git_treebuilder_new", NULL); + check(git_treebuilder_insert(NULL, tb, "flang", flang_dir_tree_id, GIT_FILEMODE_TREE), "git_treebuilder_insert", NULL); + check(git_treebuilder_write(new_tree_id, tb), "git_treebuilder_write", NULL); + git_treebuilder_free(tb); + + git_tree_free(flang_tree); +} + +// generate_squash_message generates a commit message for merges which have been +// squashed. +void generate_squash_message(char **newmsg, git_commit *merge_commit) { + std::stringstream s; + + // Start the message with the existing rewritten message. + s << *newmsg; + + s << "\nDue to a conflicting rebase during the linearizing of " + "flang-compiler/f18, this commit squashes a number of " + "other commits:\n\n"; + + git_revwalk *walk; + check(git_revwalk_new(&walk, git_commit_owner(merge_commit)), "allocate git_revwalk", NULL); + git_revwalk_simplify_first_parent(walk); + git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE); + check(git_revwalk_push(walk, git_commit_parent_id(merge_commit, 1)), "git_revwalk_push", NULL); + check(git_revwalk_hide(walk, git_commit_parent_id(merge_commit, 0)), "git_revwalk_hide", NULL); + + git_oid commit_id; + while (!git_revwalk_next(&commit_id, walk)) { + char buf[GIT_OID_HEXSZ+1] = {}; + git_oid_fmt(buf, &commit_id); + + git_commit *c; + check(git_commit_lookup(&c, git_commit_owner(merge_commit), &commit_id), "git_commit_lookup", NULL); + + s << "flang-compiler/f18@" << buf << " " << git_commit_summary(c) << "\n"; + git_commit_free(c); + + } + + git_revwalk_free(walk); + + // Replace newmsg with the squashed msg. + auto result = s.str(); + char *squashmsg = (char*)malloc(result.size()+1); + squashmsg[result.size()] = 0; + strncpy(squashmsg, result.c_str(), result.size()); + free(*newmsg); + *newmsg = squashmsg; +} + +int main(int argc, char* argv[]) { + test_rewrite_issue_references(); + + git_libgit2_init(); + + const char *repo_path = "."; + if (argc > 1) + repo_path = argv[1]; + + git_repository *repo; + int error = git_repository_open(&repo, repo_path); + if (error < 0) { + fprintf(stderr, "Could not open repository: %s\n", giterr_last()->message); + exit(1); + } + + // Walk commits in reverse topological order starting from origin/master. + git_revwalk *walk; + check(git_revwalk_new(&walk, repo), "allocate git_revwalk", NULL); + git_revwalk_simplify_first_parent(walk); + git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE); + + check(git_revwalk_push_ref(walk, "refs/remotes/origin/master"), "git_revwalk_push_head", NULL); + // check(git_revwalk_push_ref(walk, "refs/heads/flatten-top"), "git_revwalk_push_ref", NULL); + // check(git_revwalk_hide_ref(walk, "refs/heads/flatten-bottom"), "git_revwalk_hide_ref", NULL); + + bool is_root = true; // First commit has no parents. + git_oid old_head = {}; + git_oid *new_head = NULL; + git_oid new_commit_id = {}; + + // For each commit in the first-parent lineage of the original history: + // + // 1. Take non-merge commits as they were. + // 2. Attempt to rebase second-parent of merge commits onto first-parent. + // 2a. Otherwise, squash them. + // + // Merge commits are preserved as empty commits. + while (!git_revwalk_next(&old_head, walk)) { + git_commit *c; + check(git_commit_lookup(&c, repo, &old_head), "git_commit_lookup", NULL); + + // Prettify the commit message - rewrite references, add trailer headers. + char *newmsg = tweak_commit_message(c, c, git_commit_tree_id(c)); + + switch (git_commit_parentcount(c)) { + default: + fprintf(stderr, "Unexpected number of parents.\n"); + exit(5); + + case 2: { + if (is_root) { + // root commit cannot be rebased. Squash instead. + // (only happens if using a restricted commit range) + break; + } + if (try_rebase(&new_head, c)) { + // Rebase succeeded. Now ensure that at the end of the rebase, + // the tree state is the same as if the merge had been done. + git_oid old_tree = tree_for_commit(repo, &old_head); + git_oid new_tree = tree_for_commit(repo, new_head); + if (!git_oid_equal(&old_tree, &new_tree)) { + char buf_old_head[GIT_OID_HEXSZ+1] = {}; + char buf_new_head[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf_old_head, 12, &old_head); + git_oid_nfmt(buf_new_head, 12, new_head); + + fprintf(stderr, "commits do not have the same tree: (old, " + "new) = %s %s", buf_old_head, buf_new_head); + exit(6); + } + + // Create an empty commit for the merge. + check(git_commit_create_from_ids( + &new_commit_id, + repo, + NULL, + git_commit_author(c), + git_commit_committer(c), + git_commit_message_encoding(c), + newmsg, + &new_tree, + is_root ? 0 : 1, + (const git_oid**)(&new_head) + ), "git_commit_create_from_ids", NULL); + new_head = &new_commit_id; + is_root = false; + + // Rebase succeeded, new_head updated. Keep going... + goto next_patch; + } + + generate_squash_message(&newmsg, c); + } + + // These are non-merge commits on the first-parent history. + // Take them as-is. + case 0: case 1: ; + } + + // Create a new commit. + check(git_commit_create_from_ids( + &new_commit_id, + repo, + NULL, + git_commit_author(c), + git_commit_committer(c), + git_commit_message_encoding(c), + newmsg, + git_commit_tree_id(c), + is_root ? 0 : 1, + (const git_oid**)(&new_head) + ), "git_commit_create_from_ids", NULL); + new_head = &new_commit_id; + is_root = false; + + next_patch: + free((void*)newmsg); + git_commit_free(c); + } + + // First pass now done. Move the directory in a second pass, and re-parent + // onto llvm-project if it is available. + + char buf[GIT_OID_HEXSZ+1] = {}; + git_oid_nfmt(buf, 12, new_head); + printf("\nConflicts encountered: %d, discarding %d commits\n", n_conflicts, n_discards); + printf("Done; rewritten-history-v4 => %s\n", buf); + + git_reference *ref; + check(git_reference_create( + &ref, + repo, + "refs/heads/rewritten-history-v4", + new_head, + 1, + "flatten.cpp update" + ), + "git_reference_create", NULL); + git_reference_free(ref); + + git_revwalk_reset(walk); + + // Now rename everything under flang/. + printf("Inserting /flang/...\n"); + { + git_oid new_commit_id; + bool is_root = true; // First commit has no parents. + git_oid *new_head_renamed = NULL; + + git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE); + check(git_revwalk_push(walk, new_head), "git_revwalk_push_head", NULL); + + // See if the upstream is available at llvm-project/master. If it is, + // we'll write the history into there, and use the LLVM project head as + // the root commit. + git_oid llvm_project_head = {}; + git_tree *llvm_project_tree; + int err = git_reference_name_to_id(&llvm_project_head, repo, "refs/remotes/llvm-project/master"); + bool have_llvm_project = err == 0; + + if (!have_llvm_project) { + fprintf(stderr, "Require llvm-project/master ref to exist before proceeding. Add llvm-project as a remote and fetch it.\n"); + exit(2); + } + + git_oid_nfmt(buf, 12, &llvm_project_head); + printf("Rewriting history on top of llvm-project@%s...\n", buf); + + // Disabled since the merged MLIR root commit has zero parents. + // Take the same approach to be consistent (= false). + const bool use_llvm_project_head_as_root = false; + if (use_llvm_project_head_as_root) { + new_head_renamed = &llvm_project_head; + is_root = false; + } + + // Grab the llvm_project_tree. + git_commit *c; + check(git_commit_lookup(&c, repo, &llvm_project_head), "git_commit_lookup", NULL); + check(git_commit_tree(&llvm_project_tree, c), "git_commit_tree", NULL); + git_commit_free(c); + + git_oid new_tree; + + // For each commit, rewrite its tree. + while (!git_revwalk_next(&old_head, walk)) { + git_commit *c; + check(git_commit_lookup(&c, repo, &old_head), "git_commit_lookup", NULL); + + insert_flang_directory(repo, &new_tree, git_commit_tree_id(c)); + + check(git_commit_create_from_ids( + &new_commit_id, + repo, + NULL, + git_commit_author(c), + git_commit_committer(c), + git_commit_message_encoding(c), + git_commit_message_raw(c), + &new_tree, + is_root ? 0 : 1, + (const git_oid**)(&new_head_renamed) + ), "git_commit_create_from_ids", NULL); + new_head_renamed = &new_commit_id; + is_root = false; + + git_commit_free(c); + } + + git_signature *merge_commit_author; + check(git_signature_default(&merge_commit_author, repo), "git_signature_default", NULL); + + const char *merge_message = + "[Flang] Merge flang-compiler/f18\n" + "\n" + "This is the initial merge of flang-compiler, which is done in this way\n" + "principally to preserve the history and git-blame, without generating a large\n" + "number of commits on the first-parent history of LLVM.\n" + "\n" + "If you don't care about the flang history during a bisect remember that you can\n" + "supply paths to git-bisect, e.g. `git bisect start clang llvm`.\n" + "\n" + "The history of f18 was rewritten to:\n" + "\n" + "* Put the code under /flang/.\n" + "* Linearize the history.\n" + "* Rewrite commit messages so that issue and PR numbers point to the old repository.\n" + "\n" + "Updates: flang-compiler/f18#876 (submission into llvm-project)\n" + "Mailing-list: http://lists.llvm.org/pipermail/llvm-dev/2020-January/137989.html ([llvm-dev] Flang landing in the monorepo - next Monday!)\n" + "Mailing-list: http://lists.llvm.org/pipermail/llvm-dev/2019-December/137661.html ([llvm-dev] Flang landing in the monorepo)\n"; + + merge_llvm_project_tree(&new_tree, &new_tree, llvm_project_tree); + + const git_oid *parents[2] = {}; + parents[0] = &llvm_project_head; + parents[1] = &new_commit_id; + + git_oid new_head_merged; + check(git_commit_create_from_ids( + &new_head_merged, + repo, + NULL, + merge_commit_author, + merge_commit_author, + NULL, + merge_message, + &new_tree, + 2, + parents + ), "git_commit_create_from_ids", NULL); + + git_signature_free(merge_commit_author); + + git_tree_free(llvm_project_tree); + + git_reference *ref; + check(git_reference_create( + &ref, + repo, + "refs/heads/rewritten-history-v4-llvm-project-merge", + &new_head_merged, + 1, + "flatten.cpp update" + ), + "git_reference_create", NULL); + git_reference_free(ref); + + git_oid_nfmt(buf, 12, &new_head_merged); + printf("Done; rewritten-history-v4-llvm-project-merge => %s\n", buf); + } + printf(" ... all done\n"); + + git_oid origin_master; + git_reference_name_to_id(&origin_master, repo, "refs/remotes/origin/master"); + git_oid_nfmt(buf, 12, &origin_master); + printf("Start point was origin/master => %s\n", buf); + + git_revwalk_free(walk); + git_repository_free(repo); + + return 0; +}