#!/bin/sh -e
#
# Clean up QEMU #include lines by ensuring that qemu/osdep.h
# is the first include listed in .c files, and no headers provided
# by osdep.h itself are redundantly included in either .c or .h files.
#
# Copyright (c) 2015 Linaro Limited
#
# Authors:
#  Peter Maydell <peter.maydell@linaro.org>
#
# This work is licensed under the terms of the GNU GPL, version 2
# or (at your option) any later version. See the COPYING file in
# the top-level directory.

# Usage:
#   clean-includes [--git subjectprefix] [--check-dup-head] file-or-dir ...
# or
#   clean-includes [--git subjectprefix] [--check-dup-head] --all
#
# If the --git subjectprefix option is given, then after making
# the changes to the files this script will create a git commit
# with the subject line "subjectprefix: Clean up includes"
# and a boilerplate commit message.
#
# If --check-dup-head is specified, additionally check for duplicate
# header includes.
#
# Using --all will cause clean-includes to run on the whole source
# tree (excluding certain directories which are known not to need
# handling). This is equivalent to passing '.' as the directory to
# scan.

# This script requires Coccinelle to be installed.

# .c files will have the osdep.h included added, and redundant
# includes removed.
# .h files will have redundant includes (including includes of osdep.h)
# removed.
# Other files (including C++ and ObjectiveC) can't be handled by this script.

GIT=no
DUPHEAD=no

# Save the original arguments in case we want to put them in
# a git commit message, quoted for the shell so that we handle
# args with spaces/metacharacters correctly.
# The quote_sh() function is the same one we use in configure.

quote_sh() {
    printf "%s" "$1" | sed "s,','\\\\'',g; s,.*,'&',"
}

quote_args() {
    while [ $# -gt 0 ]; do
        printf "%s" "$(quote_sh "$1")"
        shift
        if [ $# -gt 0 ]; then
            printf " "
        fi
    done
}

QUOTEDARGS="$(quote_args "$@")"

while true
do
    case $1 in
    "--git")
         if [ $# -eq 1 ]; then
             echo "--git option requires an argument"
             exit 1
         fi
         GITSUBJ="$2"
         GIT=yes
         shift
         shift
         ;;
    "--check-dup-head")
        DUPHEAD=yes
        shift
        ;;
    "--")
        shift
        break
        ;;
    *)
        break
        ;;
   esac
done

if [ $# -eq 0 ]; then
    echo "Usage: clean-includes [--git subjectprefix] [--check-dup-head] [--all | foo.c ...]"
    echo "(modifies the files in place)"
    exit 1
fi

# --all means "scan everything starting from the current directory"
if [ "$1" = "--all" ]; then
    set -- '.'
fi

# Annoyingly coccinelle won't read a scriptfile unless its
# name ends '.cocci', so write it out to a tempfile with the
# right kind of name.
COCCIFILE="$(mktemp --suffix=.cocci)"
REGEXFILE="$(mktemp --suffix=.regex)"

trap 'rm -f -- "$COCCIFILE" "$REGEXFILE"' INT TERM HUP EXIT

# List of extended regular expressions defining files to ignore
# Comments starting with '#' are permitted
grep -v '^#' >"$REGEXFILE" <<EOT
# These tests are generally standalone binaries
^tests/(tcg|multiboot|fp|uefi-test-tools|qtest/migration/s390x)
# BIOS sources and third-party subprojects don't follow our rules
^pc-bios
^subprojects
# headers under rust are only used for input to bindgen
^rust
# plugin binaries are standalone
^contrib/plugins
# the ebpf tool is standalone, and the skeleton header is autogenerated
^tools/ebpf
^ebpf/rss.bpf.skeleton.h
# These files just include some other .c file and have no content themselves
^linux-user/(mips64|x86_64)/(cpu_loop|signal).c
^linux-user/mips64/elfload.c
# These are autogenerated headers
^include/standard-headers/
# osdep.h itself and its friends are expected to include system headers
^include/qemu/osdep.h
^include/qemu/compiler.h
^include/glib-compat.h
^include/system/os-(posix|win32|wasm).h
# This is for use by plugins, which are standalone binaries
^include/qemu/qemu-plugin.h
# standalone tools used in building the hexagon target code
^target/hexagon/(idef-parser|gen_semantics.c|gen_dectree_import.c)
# standalone tool
^target/s390x/gen-features.c
# gen-vdso is a standalone tool
^linux-user/gen-vdso.c
# feature-detection code used by meson.bulid
^scripts/xen-detect.c
# autogenerated by tracetool
^tests/tracetool/simple.c
# these just include another C file
^tests/unit/test-rcu-(simpleq|slist|tailq).c
EOT

# We assume there are no files in the tree with spaces in their name
set -- $(git ls-files "$@" | grep '\.[ch]$' | grep -E -v -f "$REGEXFILE")

cat >"$COCCIFILE" <<EOT
@@
@@

(
+ #include "qemu/osdep.h"
 #include "..."
|
+ #include "qemu/osdep.h"
 #include <...>
)
EOT

files=
for f in "$@"; do
  if [ -L "$f" ]; then
      echo "SKIPPING $f (symbolic link)"
      continue
  fi
  case "$f" in
    *.c.inc)
      # These aren't standalone C source files
      echo "SKIPPING $f (not a standalone source file)"
      continue
      ;;
    *.c)
      MODE=c
      ;;
    *.h)
      MODE=h
      ;;
    *)
      echo "WARNING: ignoring $f (cannot handle non-C files)"
      continue
      ;;
  esac
  files="$files $f"

  if [ "$MODE" = "c" ]; then
    # First, use Coccinelle to add qemu/osdep.h before the first existing include
    # (this will add two lines if the file uses both "..." and <...> #includes,
    # but we will remove the extras in the next step)
    spatch  --in-place --no-show-diff --cocci-file "$COCCIFILE" "$f"

    # Now remove any duplicate osdep.h includes
    perl -n -i -e 'print if !/#include "qemu\/osdep.h"/ || !$n++;' "$f"
  else
    # Remove includes of osdep.h itself
    perl -n -i -e 'print if !/\s*#\s*include\s*(["<][^>"]*[">])/ ||
                            ! (grep { $_ eq $1 } qw ("qemu/osdep.h"))' "$f"
  fi

  # Remove includes that osdep.h already provides
  perl -n -i -e 'print if !/\s*#\s*include\s*(["<][^>"]*[">])/ ||
                          ! (grep { $_ eq $1 } qw (
           "config-host.h" "config-target.h" "qemu/compiler.h"
           <setjmp.h> <stdarg.h> <stddef.h> <stdbool.h> <stdint.h> <sys/types.h>
           <stdlib.h> <stdio.h> <string.h> <strings.h> <inttypes.h>
           <limits.h> <unistd.h> <time.h> <ctype.h> <errno.h> <fcntl.h>
           <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h>
           <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h> <sys/mman.h>
           "system/os-posix.h, system/os-win32.h "glib-compat.h"
           "qemu/typedefs.h"
            ))' "$f"

done

if [ "$DUPHEAD" = "yes" ] && [ -n "$files" ]; then
    if egrep "^[[:space:]]*#[[:space:]]*include" $files | tr -d '[:blank:]' \
        | sort | uniq -c | grep -v '^ *1 '; then
        echo "Found duplicate header file includes. Please check the above files manually."
        exit 1
    fi
fi

if [ "$GIT" = "yes" ]; then
    git add -- $files
    git commit --signoff -F - <<EOF
$GITSUBJ: Clean up includes

This commit was created with scripts/clean-includes:
 ./scripts/clean-includes $QUOTEDARGS

All .c should include qemu/osdep.h first.  The script performs three
related cleanups:

* Ensure .c files include qemu/osdep.h first.
* Including it in a .h is redundant, since the .c  already includes
  it.  Drop such inclusions.
* Likewise, including headers qemu/osdep.h includes is redundant.
  Drop these, too.

EOF

fi
