Date   

[PATCH 4.4-cip 07/23] ubsan: fix tree-wide -Wmaybe-uninitialized false positives

Ben Hutchings <ben.hutchings@...>
 

From: Andrey Ryabinin <aryabinin@...>

commit dde5cf39d4d2cce71f2997c37210dd624d0e4bf6 upstream.

-fsanitize=* options makes GCC less smart than usual and increase number
of 'maybe-uninitialized' false-positives. So this patch does two things:

* Add -Wno-maybe-uninitialized to CFLAGS_UBSAN which will disable all
such warnings for instrumented files.

* Remove CONFIG_UBSAN_SANITIZE_ALL from all[yes|mod]config builds. So
the all[yes|mod]config build goes without -fsanitize=* and still with
-Wmaybe-uninitialized.

Signed-off-by: Andrey Ryabinin <aryabinin@...>
Reported-by: Fengguang Wu <fengguang.wu@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
lib/Kconfig.ubsan | 5 +++++
scripts/Makefile.ubsan | 4 ++++
2 files changed, 9 insertions(+)

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index e07c1ba9ba13..39494af9a84a 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -13,6 +13,11 @@ config UBSAN_SANITIZE_ALL
bool "Enable instrumentation for the entire kernel"
depends on UBSAN
depends on ARCH_HAS_UBSAN_SANITIZE_ALL
+
+ # We build with -Wno-maybe-uninitilzed, but we still want to
+ # use -Wmaybe-uninitilized in allmodconfig builds.
+ # So dependsy bellow used to disable this option in allmodconfig
+ depends on !COMPILE_TEST
default y
help
This option activates instrumentation for the entire kernel.
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 8ab68679cfb5..77ce538268b5 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -14,4 +14,8 @@ ifdef CONFIG_UBSAN
ifdef CONFIG_UBSAN_ALIGNMENT
CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
endif
+
+ # -fsanitize=* options makes GCC less smart than usual and
+ # increase number of 'maybe-uninitialized false-positives
+ CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized)
endif
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 06/23] perf/core: Fix Undefined behaviour in rb_alloc()

Ben Hutchings <ben.hutchings@...>
 

From: Peter Zijlstra <peterz@...>

commit 8184059e93c200757f5c0805dae0f14e880eab5d upstream.

Sasha reported:

[ 3494.030114] UBSAN: Undefined behaviour in kernel/events/ring_buffer.c:685:22
[ 3494.030647] shift exponent -1 is negative

Andrey spotted that this is because:

It happens if nr_pages = 0:
rb->page_order = ilog2(nr_pages);

Fix it by making both assignments conditional on nr_pages; since
otherwise they should both be 0 anyway, and will be because of the
kzalloc() used to allocate the structure.

Reported-by: Sasha Levin <sasha.levin@...>
Reported-by: Andrey Ryabinin <ryabinin.a.a@...>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...>
Cc: Alexander Shishkin <alexander.shishkin@...>
Cc: Andy Lutomirski <luto@...>
Cc: Arnaldo Carvalho de Melo <acme@...>
Cc: Arnaldo Carvalho de Melo <acme@...>
Cc: Borislav Petkov <bp@...>
Cc: Brian Gerst <brgerst@...>
Cc: David Ahern <dsahern@...>
Cc: Denys Vlasenko <dvlasenk@...>
Cc: H. Peter Anvin <hpa@...>
Cc: Jiri Olsa <jolsa@...>
Cc: Linus Torvalds <torvalds@...>
Cc: Namhyung Kim <namhyung@...>
Cc: Peter Zijlstra <peterz@...>
Cc: Stephane Eranian <eranian@...>
Cc: Thomas Gleixner <tglx@...>
Cc: Vince Weaver <vincent.weaver@...>
Link: http://lkml.kernel.org/r/20160129141751.GA407@worktop
Signed-off-by: Ingo Molnar <mingo@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
kernel/events/ring_buffer.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 014b69528194..7beb02f40cb4 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -754,8 +754,10 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)

rb->user_page = all_buf;
rb->data_pages[0] = all_buf + PAGE_SIZE;
- rb->page_order = ilog2(nr_pages);
- rb->nr_pages = !!nr_pages;
+ if (nr_pages) {
+ rb->nr_pages = 1;
+ rb->page_order = ilog2(nr_pages);
+ }

ring_buffer_init(rb, watermark, flags);

--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 05/23] mm/page-writeback: fix dirty_ratelimit calculation

Ben Hutchings <ben.hutchings@...>
 

From: Andrey Ryabinin <aryabinin@...>

commit d59b1087a98e402ed9a7cc577f4da435f9a555f5 upstream.

Calculation of dirty_ratelimit sometimes is not correct. E.g. initial
values of dirty_ratelimit == INIT_BW and step == 0, lead to the
following result:

UBSAN: Undefined behaviour in ../mm/page-writeback.c:1286:7
shift exponent 25600 is too large for 64-bit type 'long unsigned int'

The fix is straightforward - make step 0 if the shift exponent is too
big.

Signed-off-by: Andrey Ryabinin <aryabinin@...>
Cc: Wu Fengguang <fengguang.wu@...>
Cc: Tejun Heo <tj@...>
Cc: Andy Shevchenko <andy.shevchenko@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
mm/page-writeback.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index fd51ebfc423f..6d0dbde4503b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1162,6 +1162,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
unsigned long balanced_dirty_ratelimit;
unsigned long step;
unsigned long x;
+ unsigned long shift;

/*
* The dirty rate will match the writeout rate in long term, except
@@ -1286,11 +1287,11 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
* rate itself is constantly fluctuating. So decrease the track speed
* when it gets close to the target. Helps eliminate pointless tremors.
*/
- step >>= dirty_ratelimit / (2 * step + 1);
- /*
- * Limit the tracking speed to avoid overshooting.
- */
- step = (step + 7) / 8;
+ shift = dirty_ratelimit / (2 * step + 1);
+ if (shift < BITS_PER_LONG)
+ step = DIV_ROUND_UP(step >> shift, 8);
+ else
+ step = 0;

if (dirty_ratelimit < balanced_dirty_ratelimit)
dirty_ratelimit += step;
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 04/23] x86/microcode/intel: Change checksum variables to u32

Ben Hutchings <ben.hutchings@...>
 

From: Chris Bainbridge <chris.bainbridge@...>

commit bc864af13f34d19c911f5691d87bdacc9ce109f5 upstream.

Microcode checksum verification should be done using unsigned 32-bit
values otherwise the calculation overflow results in undefined
behaviour.

This is also nicely documented in the SDM, section "Microcode Update
Checksum":

"To check for a corrupt microcode update, software must perform a
unsigned DWORD (32-bit) checksum of the microcode update. Even though
some fields are signed, the checksum procedure treats all DWORDs as
unsigned. Microcode updates with a header version equal to 00000001H
must sum all DWORDs that comprise the microcode update. A valid
checksum check will yield a value of 00000000H."

but for some reason the code has been using ints from the very
beginning.

In practice, this bug possibly manifested itself only when doing the
microcode data checksum - apparently, currently shipped Intel microcode
doesn't have an extended signature table for which we do checksum
verification too.

UBSAN: Undefined behaviour in arch/x86/kernel/cpu/microcode/intel_lib.c:105:12
signed integer overflow:
-1500151068 + -2125470173 cannot be represented in type 'int'
CPU: 0 PID: 0 Comm: swapper Not tainted 4.5.0-rc5+ #495
...
Call Trace:
dump_stack
? inotify_ioctl
ubsan_epilogue
handle_overflow
__ubsan_handle_add_overflow
microcode_sanity_check
get_matching_model_microcode.isra.2.constprop.8
? early_idt_handler_common
? strlcpy
? find_cpio_data
load_ucode_intel_bsp
load_ucode_bsp
? load_ucode_bsp
x86_64_start_kernel

[ Expand and massage commit message. ]
Signed-off-by: Chris Bainbridge <chris.bainbridge@...>
Signed-off-by: Borislav Petkov <bp@...>
Cc: hmh@...
Link: http://lkml.kernel.org/r/1456834359-5132-1-git-send-email-chris.bainbridge@gmail.com
Signed-off-by: Thomas Gleixner <tglx@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
arch/x86/kernel/cpu/microcode/intel_lib.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index b96896bcbdaf..99ca2c935777 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err)
unsigned long total_size, data_size, ext_table_size;
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
- int sum, orig_sum, ext_sigcount = 0, i;
+ u32 sum, orig_sum, ext_sigcount = 0, i;
struct extended_signature *ext_sig;

total_size = get_totalsize(mc_header);
@@ -85,8 +85,8 @@ int microcode_sanity_check(void *mc, int print_err)

/* check extended table checksum */
if (ext_table_size) {
- int ext_table_sum = 0;
- int *ext_tablep = (int *)ext_header;
+ u32 ext_table_sum = 0;
+ u32 *ext_tablep = (u32 *)ext_header;

i = ext_table_size / DWSIZE;
while (i--)
@@ -102,7 +102,7 @@ int microcode_sanity_check(void *mc, int print_err)
orig_sum = 0;
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
while (i--)
- orig_sum += ((int *)mc)[i];
+ orig_sum += ((u32 *)mc)[i];
if (orig_sum) {
if (print_err)
pr_err("aborting, bad checksum\n");
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 03/23] PM / sleep: declare __tracedata symbols as char[] rather than char

Ben Hutchings <ben.hutchings@...>
 

From: Eric Biggers <ebiggers3@...>

commit f97238373b8662a6d580e204df2e7bcbfa43e27a upstream.

Accessing more than one byte from a symbol declared simply 'char' is undefined
behavior, as reported by UBSAN:

UBSAN: Undefined behaviour in drivers/base/power/trace.c:178:18
load of address ffffffff8203fc78 with insufficient space
for an object of type 'char'

Avoid this by declaring the symbols as arrays.

Signed-off-by: Eric Biggers <ebiggers3@...>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
drivers/base/power/trace.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index a311cfa4c5bd..a6975795e7f3 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -166,14 +166,14 @@ void generate_pm_trace(const void *tracedata, unsigned int user)
}
EXPORT_SYMBOL(generate_pm_trace);

-extern char __tracedata_start, __tracedata_end;
+extern char __tracedata_start[], __tracedata_end[];
static int show_file_hash(unsigned int value)
{
int match;
char *tracedata;

match = 0;
- for (tracedata = &__tracedata_start ; tracedata < &__tracedata_end ;
+ for (tracedata = __tracedata_start ; tracedata < __tracedata_end ;
tracedata += 2 + sizeof(unsigned long)) {
unsigned short lineno = *(unsigned short *)tracedata;
const char *file = *(const char **)(tracedata + 2);
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 02/23] ubsan: cosmetic fix to Kconfig text

Ben Hutchings <ben.hutchings@...>
 

From: Yang Shi <yang.shi@...>

commit 7707535ab95e2231b6d7f2bfb4f27558e83c4dc2 upstream.

When enabling UBSAN_SANITIZE_ALL, the kernel image size gets increased
significantly (~3x). So, it sounds better to have some note in Kconfig.

And, fixed a typo.

Signed-off-by: Yang Shi <yang.shi@...>
Acked-by: Andrey Ryabinin <aryabinin@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
lib/Kconfig.ubsan | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 49518fb48cab..e07c1ba9ba13 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -18,6 +18,8 @@ config UBSAN_SANITIZE_ALL
This option activates instrumentation for the entire kernel.
If you don't enable this option, you have to explicitly specify
UBSAN_SANITIZE := y for the files/directories you want to check for UB.
+ Enabling this option will get kernel image size increased
+ significantly.

config UBSAN_ALIGNMENT
bool "Enable checking of pointers alignment"
@@ -25,5 +27,5 @@ config UBSAN_ALIGNMENT
default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS
help
This option enables detection of unaligned memory accesses.
- Enabling this option on architectures that support unalligned
+ Enabling this option on architectures that support unaligned
accesses may produce a lot of false positives.
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 01/23] UBSAN: run-time undefined behavior sanity checker

Ben Hutchings <ben.hutchings@...>
 

From: Andrey Ryabinin <aryabinin@...>

commit c6d308534aef6c99904bf5862066360ae067abc4 upstream.

UBSAN uses compile-time instrumentation to catch undefined behavior
(UB). Compiler inserts code that perform certain kinds of checks before
operations that could cause UB. If check fails (i.e. UB detected)
__ubsan_handle_* function called to print error message.

So the most of the work is done by compiler. This patch just implements
ubsan handlers printing errors.

GCC has this capability since 4.9.x [1] (see -fsanitize=undefined
option and its suboptions).
However GCC 5.x has more checkers implemented [2].
Article [3] has a bit more details about UBSAN in the GCC.

[1] - https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html
[2] - https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html
[3] - http://developerblog.redhat.com/2014/10/16/gcc-undefined-behavior-sanitizer-ubsan/

Issues which UBSAN has found thus far are:

Found bugs:

* out-of-bounds access - 97840cb67ff5 ("netfilter: nfnetlink: fix
insufficient validation in nfnetlink_bind")

undefined shifts:

* d48458d4a768 ("jbd2: use a better hash function for the revoke
table")

* 10632008b9e1 ("clockevents: Prevent shift out of bounds")

* 'x << -1' shift in ext4 -
http://lkml.kernel.org/r/<5444EF21.8020501@...>

* undefined rol32(0) -
http://lkml.kernel.org/r/<1449198241-20654-1-git-send-email-sasha.levin@...>

* undefined dirty_ratelimit calculation -
http://lkml.kernel.org/r/<566594E2.3050306@...>

* undefined roundown_pow_of_two(0) -
http://lkml.kernel.org/r/<1449156616-11474-1-git-send-email-sasha.levin@...>

* [WONTFIX] undefined shift in __bpf_prog_run -
http://lkml.kernel.org/r/<CACT4Y+ZxoR3UjLgcNdUm4fECLMx2VdtfrENMtRRCdgHB2n0bJA@...>

WONTFIX here because it should be fixed in bpf program, not in kernel.

signed overflows:

* 32a8df4e0b33f ("sched: Fix odd values in effective_load()
calculations")

* mul overflow in ntp -
http://lkml.kernel.org/r/<1449175608-1146-1-git-send-email-sasha.levin@...>

* incorrect conversion into rtc_time in rtc_time64_to_tm() -
http://lkml.kernel.org/r/<1449187944-11730-1-git-send-email-sasha.levin@...>

* unvalidated timespec in io_getevents() -
http://lkml.kernel.org/r/<CACT4Y+bBxVYLQ6LtOKrKtnLthqLHcw-BMp3aqP3mjdAvr9FULQ@...>

* [NOTABUG] signed overflow in ktime_add_safe() -
http://lkml.kernel.org/r/<CACT4Y+aJ4muRnWxsUe1CMnA6P8nooO33kwG-c8YZg=0Xc8rJqw@...>

[akpm@...: fix unused local warning]
[akpm@...: fix __int128 build woes]
Signed-off-by: Andrey Ryabinin <aryabinin@...>
Cc: Peter Zijlstra <peterz@...>
Cc: Sasha Levin <sasha.levin@...>
Cc: Randy Dunlap <rdunlap@...>
Cc: Rasmus Villemoes <linux@...>
Cc: Jonathan Corbet <corbet@...>
Cc: Michal Marek <mmarek@...>
Cc: Thomas Gleixner <tglx@...>
Cc: Ingo Molnar <mingo@...>
Cc: "H. Peter Anvin" <hpa@...>
Cc: Yury Gribov <y.gribov@...>
Cc: Dmitry Vyukov <dvyukov@...>
Cc: Konstantin Khlebnikov <koct9i@...>
Cc: Kostya Serebryany <kcc@...>
Cc: Johannes Berg <johannes@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>

Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
Documentation/ubsan.txt | 84 +++++++
Makefile | 3 +-
arch/x86/Kconfig | 1 +
arch/x86/boot/Makefile | 1 +
arch/x86/boot/compressed/Makefile | 1 +
arch/x86/entry/vdso/Makefile | 1 +
arch/x86/realmode/rm/Makefile | 1 +
drivers/firmware/efi/libstub/Makefile | 1 +
include/linux/sched.h | 3 +
lib/Kconfig.debug | 1 +
lib/Kconfig.ubsan | 29 +++
lib/Makefile | 3 +
lib/ubsan.c | 456 ++++++++++++++++++++++++++++++++++
lib/ubsan.h | 84 +++++++
mm/kasan/Makefile | 1 +
scripts/Makefile.lib | 6 +
scripts/Makefile.ubsan | 17 ++
17 files changed, 692 insertions(+), 1 deletion(-)
create mode 100644 Documentation/ubsan.txt
create mode 100644 lib/Kconfig.ubsan
create mode 100644 lib/ubsan.c
create mode 100644 lib/ubsan.h
create mode 100644 scripts/Makefile.ubsan

diff --git a/Documentation/ubsan.txt b/Documentation/ubsan.txt
new file mode 100644
index 000000000000..f58215ef5797
--- /dev/null
+++ b/Documentation/ubsan.txt
@@ -0,0 +1,84 @@
+Undefined Behavior Sanitizer - UBSAN
+
+Overview
+--------
+
+UBSAN is a runtime undefined behaviour checker.
+
+UBSAN uses compile-time instrumentation to catch undefined behavior (UB).
+Compiler inserts code that perform certain kinds of checks before operations
+that may cause UB. If check fails (i.e. UB detected) __ubsan_handle_*
+function called to print error message.
+
+GCC has that feature since 4.9.x [1] (see -fsanitize=undefined option and
+its suboptions). GCC 5.x has more checkers implemented [2].
+
+Report example
+---------------
+
+ ================================================================================
+ UBSAN: Undefined behaviour in ../include/linux/bitops.h:110:33
+ shift exponent 32 is to large for 32-bit type 'unsigned int'
+ CPU: 0 PID: 0 Comm: swapper Not tainted 4.4.0-rc1+ #26
+ 0000000000000000 ffffffff82403cc8 ffffffff815e6cd6 0000000000000001
+ ffffffff82403cf8 ffffffff82403ce0 ffffffff8163a5ed 0000000000000020
+ ffffffff82403d78 ffffffff8163ac2b ffffffff815f0001 0000000000000002
+ Call Trace:
+ [<ffffffff815e6cd6>] dump_stack+0x45/0x5f
+ [<ffffffff8163a5ed>] ubsan_epilogue+0xd/0x40
+ [<ffffffff8163ac2b>] __ubsan_handle_shift_out_of_bounds+0xeb/0x130
+ [<ffffffff815f0001>] ? radix_tree_gang_lookup_slot+0x51/0x150
+ [<ffffffff8173c586>] _mix_pool_bytes+0x1e6/0x480
+ [<ffffffff83105653>] ? dmi_walk_early+0x48/0x5c
+ [<ffffffff8173c881>] add_device_randomness+0x61/0x130
+ [<ffffffff83105b35>] ? dmi_save_one_device+0xaa/0xaa
+ [<ffffffff83105653>] dmi_walk_early+0x48/0x5c
+ [<ffffffff831066ae>] dmi_scan_machine+0x278/0x4b4
+ [<ffffffff8111d58a>] ? vprintk_default+0x1a/0x20
+ [<ffffffff830ad120>] ? early_idt_handler_array+0x120/0x120
+ [<ffffffff830b2240>] setup_arch+0x405/0xc2c
+ [<ffffffff830ad120>] ? early_idt_handler_array+0x120/0x120
+ [<ffffffff830ae053>] start_kernel+0x83/0x49a
+ [<ffffffff830ad120>] ? early_idt_handler_array+0x120/0x120
+ [<ffffffff830ad386>] x86_64_start_reservations+0x2a/0x2c
+ [<ffffffff830ad4f3>] x86_64_start_kernel+0x16b/0x17a
+ ================================================================================
+
+Usage
+-----
+
+To enable UBSAN configure kernel with:
+
+ CONFIG_UBSAN=y
+
+and to check the entire kernel:
+
+ CONFIG_UBSAN_SANITIZE_ALL=y
+
+To enable instrumentation for specific files or directories, add a line
+similar to the following to the respective kernel Makefile:
+
+ For a single file (e.g. main.o):
+ UBSAN_SANITIZE_main.o := y
+
+ For all files in one directory:
+ UBSAN_SANITIZE := y
+
+To exclude files from being instrumented even if
+CONFIG_UBSAN_SANITIZE_ALL=y, use:
+
+ UBSAN_SANITIZE_main.o := n
+ and:
+ UBSAN_SANITIZE := n
+
+Detection of unaligned accesses controlled through the separate option -
+CONFIG_UBSAN_ALIGNMENT. It's off by default on architectures that support
+unaligned accesses (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y). One could
+still enable it in config, just note that it will produce a lot of UBSAN
+reports.
+
+References
+----------
+
+[1] - https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html
+[2] - https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html
diff --git a/Makefile b/Makefile
index 98239d56924c..504a8b90cf4e 100644
--- a/Makefile
+++ b/Makefile
@@ -415,7 +415,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS

export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN CFLAGS_UBSAN
export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@ -794,6 +794,7 @@ endif

include scripts/Makefile.kasan
include scripts/Makefile.extrawarn
+include scripts/Makefile.ubsan

# Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the
# last assignments
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 436639a31624..803b262eb0bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -30,6 +30,7 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index c0cc2a6be0bf..4a3a140f26bf 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -60,6 +60,7 @@ clean-files += cpustr.h
KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+UBSAN_SANITIZE := n

$(obj)/bzImage: asflags-y := $(SVGA_MODE)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index efa6073ffa7e..7f6c157e5da5 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -33,6 +33,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)

KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+UBSAN_SANITIZE :=n

LDFLAGS := -m elf_$(UTS_MACHINE)
ifeq ($(CONFIG_RELOCATABLE),y)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 265c0ed68118..c854541d93ff 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -4,6 +4,7 @@

KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n

VDSO64-$(CONFIG_X86_64) := y
VDSOX32-$(CONFIG_X86_X32_ABI) := y
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 2730d775ef9a..3e75fcf6b836 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -70,3 +70,4 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
-I$(srctree)/arch/x86/boot
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+UBSAN_SANITIZE := n
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 3c0467d3688c..b2a172d93a08 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -22,6 +22,7 @@ KBUILD_CFLAGS := $(cflags-y) \

GCOV_PROFILE := n
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n

lib-y := efi-stub-helper.o

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c0193baea2a..90e11e40219a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1632,6 +1632,9 @@ struct task_struct {
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
#endif
+#ifdef CONFIG_UBSAN
+ unsigned int in_ubsan;
+#endif

/* journalling filesystem info */
void *journal_info;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8c15b29d5adc..4bab98fe59a4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1853,3 +1853,4 @@ source "samples/Kconfig"

source "lib/Kconfig.kgdb"

+source "lib/Kconfig.ubsan"
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
new file mode 100644
index 000000000000..49518fb48cab
--- /dev/null
+++ b/lib/Kconfig.ubsan
@@ -0,0 +1,29 @@
+config ARCH_HAS_UBSAN_SANITIZE_ALL
+ bool
+
+config UBSAN
+ bool "Undefined behaviour sanity checker"
+ help
+ This option enables undefined behaviour sanity checker
+ Compile-time instrumentation is used to detect various undefined
+ behaviours in runtime. Various types of checks may be enabled
+ via boot parameter ubsan_handle (see: Documentation/ubsan.txt).
+
+config UBSAN_SANITIZE_ALL
+ bool "Enable instrumentation for the entire kernel"
+ depends on UBSAN
+ depends on ARCH_HAS_UBSAN_SANITIZE_ALL
+ default y
+ help
+ This option activates instrumentation for the entire kernel.
+ If you don't enable this option, you have to explicitly specify
+ UBSAN_SANITIZE := y for the files/directories you want to check for UB.
+
+config UBSAN_ALIGNMENT
+ bool "Enable checking of pointers alignment"
+ depends on UBSAN
+ default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS
+ help
+ This option enables detection of unaligned memory accesses.
+ Enabling this option on architectures that support unalligned
+ accesses may produce a lot of false positives.
diff --git a/lib/Makefile b/lib/Makefile
index 7f1de26613d2..e710b082d38f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -208,3 +208,6 @@ quiet_cmd_build_OID_registry = GEN $@
clean-files += oid_registry_data.c

obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
+obj-$(CONFIG_UBSAN) += ubsan.o
+
+UBSAN_SANITIZE_ubsan.o := n
diff --git a/lib/ubsan.c b/lib/ubsan.c
new file mode 100644
index 000000000000..8799ae5e2e42
--- /dev/null
+++ b/lib/ubsan.c
@@ -0,0 +1,456 @@
+/*
+ * UBSAN error reporting functions
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@...>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#include "ubsan.h"
+
+const char *type_check_kinds[] = {
+ "load of",
+ "store to",
+ "reference binding to",
+ "member access within",
+ "member call on",
+ "constructor call on",
+ "downcast of",
+ "downcast of"
+};
+
+#define REPORTED_BIT 31
+
+#if (BITS_PER_LONG == 64) && defined(__BIG_ENDIAN)
+#define COLUMN_MASK (~(1U << REPORTED_BIT))
+#define LINE_MASK (~0U)
+#else
+#define COLUMN_MASK (~0U)
+#define LINE_MASK (~(1U << REPORTED_BIT))
+#endif
+
+#define VALUE_LENGTH 40
+
+static bool was_reported(struct source_location *location)
+{
+ return test_and_set_bit(REPORTED_BIT, &location->reported);
+}
+
+static void print_source_location(const char *prefix,
+ struct source_location *loc)
+{
+ pr_err("%s %s:%d:%d\n", prefix, loc->file_name,
+ loc->line & LINE_MASK, loc->column & COLUMN_MASK);
+}
+
+static bool suppress_report(struct source_location *loc)
+{
+ return current->in_ubsan || was_reported(loc);
+}
+
+static bool type_is_int(struct type_descriptor *type)
+{
+ return type->type_kind == type_kind_int;
+}
+
+static bool type_is_signed(struct type_descriptor *type)
+{
+ WARN_ON(!type_is_int(type));
+ return type->type_info & 1;
+}
+
+static unsigned type_bit_width(struct type_descriptor *type)
+{
+ return 1 << (type->type_info >> 1);
+}
+
+static bool is_inline_int(struct type_descriptor *type)
+{
+ unsigned inline_bits = sizeof(unsigned long)*8;
+ unsigned bits = type_bit_width(type);
+
+ WARN_ON(!type_is_int(type));
+
+ return bits <= inline_bits;
+}
+
+static s_max get_signed_val(struct type_descriptor *type, unsigned long val)
+{
+ if (is_inline_int(type)) {
+ unsigned extra_bits = sizeof(s_max)*8 - type_bit_width(type);
+ return ((s_max)val) << extra_bits >> extra_bits;
+ }
+
+ if (type_bit_width(type) == 64)
+ return *(s64 *)val;
+
+ return *(s_max *)val;
+}
+
+static bool val_is_negative(struct type_descriptor *type, unsigned long val)
+{
+ return type_is_signed(type) && get_signed_val(type, val) < 0;
+}
+
+static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val)
+{
+ if (is_inline_int(type))
+ return val;
+
+ if (type_bit_width(type) == 64)
+ return *(u64 *)val;
+
+ return *(u_max *)val;
+}
+
+static void val_to_string(char *str, size_t size, struct type_descriptor *type,
+ unsigned long value)
+{
+ if (type_is_int(type)) {
+ if (type_bit_width(type) == 128) {
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+ u_max val = get_unsigned_val(type, value);
+
+ scnprintf(str, size, "0x%08x%08x%08x%08x",
+ (u32)(val >> 96),
+ (u32)(val >> 64),
+ (u32)(val >> 32),
+ (u32)(val));
+#else
+ WARN_ON(1);
+#endif
+ } else if (type_is_signed(type)) {
+ scnprintf(str, size, "%lld",
+ (s64)get_signed_val(type, value));
+ } else {
+ scnprintf(str, size, "%llu",
+ (u64)get_unsigned_val(type, value));
+ }
+ }
+}
+
+static bool location_is_valid(struct source_location *loc)
+{
+ return loc->file_name != NULL;
+}
+
+static DEFINE_SPINLOCK(report_lock);
+
+static void ubsan_prologue(struct source_location *location,
+ unsigned long *flags)
+{
+ current->in_ubsan++;
+ spin_lock_irqsave(&report_lock, *flags);
+
+ pr_err("========================================"
+ "========================================\n");
+ print_source_location("UBSAN: Undefined behaviour in", location);
+}
+
+static void ubsan_epilogue(unsigned long *flags)
+{
+ dump_stack();
+ pr_err("========================================"
+ "========================================\n");
+ spin_unlock_irqrestore(&report_lock, *flags);
+ current->in_ubsan--;
+}
+
+static void handle_overflow(struct overflow_data *data, unsigned long lhs,
+ unsigned long rhs, char op)
+{
+
+ struct type_descriptor *type = data->type;
+ unsigned long flags;
+ char lhs_val_str[VALUE_LENGTH];
+ char rhs_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
+ val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
+ pr_err("%s integer overflow:\n",
+ type_is_signed(type) ? "signed" : "unsigned");
+ pr_err("%s %c %s cannot be represented in type %s\n",
+ lhs_val_str,
+ op,
+ rhs_val_str,
+ type->type_name);
+
+ ubsan_epilogue(&flags);
+}
+
+void __ubsan_handle_add_overflow(struct overflow_data *data,
+ unsigned long lhs,
+ unsigned long rhs)
+{
+
+ handle_overflow(data, lhs, rhs, '+');
+}
+EXPORT_SYMBOL(__ubsan_handle_add_overflow);
+
+void __ubsan_handle_sub_overflow(struct overflow_data *data,
+ unsigned long lhs,
+ unsigned long rhs)
+{
+ handle_overflow(data, lhs, rhs, '-');
+}
+EXPORT_SYMBOL(__ubsan_handle_sub_overflow);
+
+void __ubsan_handle_mul_overflow(struct overflow_data *data,
+ unsigned long lhs,
+ unsigned long rhs)
+{
+ handle_overflow(data, lhs, rhs, '*');
+}
+EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
+
+void __ubsan_handle_negate_overflow(struct overflow_data *data,
+ unsigned long old_val)
+{
+ unsigned long flags;
+ char old_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
+
+ pr_err("negation of %s cannot be represented in type %s:\n",
+ old_val_str, data->type->type_name);
+
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
+
+
+void __ubsan_handle_divrem_overflow(struct overflow_data *data,
+ unsigned long lhs,
+ unsigned long rhs)
+{
+ unsigned long flags;
+ char rhs_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs);
+
+ if (type_is_signed(data->type) && get_signed_val(data->type, rhs) == -1)
+ pr_err("division of %s by -1 cannot be represented in type %s\n",
+ rhs_val_str, data->type->type_name);
+ else
+ pr_err("division by zero\n");
+
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
+
+static void handle_null_ptr_deref(struct type_mismatch_data *data)
+{
+ unsigned long flags;
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ pr_err("%s null pointer of type %s\n",
+ type_check_kinds[data->type_check_kind],
+ data->type->type_name);
+
+ ubsan_epilogue(&flags);
+}
+
+static void handle_missaligned_access(struct type_mismatch_data *data,
+ unsigned long ptr)
+{
+ unsigned long flags;
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ pr_err("%s misaligned address %p for type %s\n",
+ type_check_kinds[data->type_check_kind],
+ (void *)ptr, data->type->type_name);
+ pr_err("which requires %ld byte alignment\n", data->alignment);
+
+ ubsan_epilogue(&flags);
+}
+
+static void handle_object_size_mismatch(struct type_mismatch_data *data,
+ unsigned long ptr)
+{
+ unsigned long flags;
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+ pr_err("%s address %pk with insufficient space\n",
+ type_check_kinds[data->type_check_kind],
+ (void *) ptr);
+ pr_err("for an object of type %s\n", data->type->type_name);
+ ubsan_epilogue(&flags);
+}
+
+void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+ unsigned long ptr)
+{
+
+ if (!ptr)
+ handle_null_ptr_deref(data);
+ else if (data->alignment && !IS_ALIGNED(ptr, data->alignment))
+ handle_missaligned_access(data, ptr);
+ else
+ handle_object_size_mismatch(data, ptr);
+}
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
+
+void __ubsan_handle_nonnull_return(struct nonnull_return_data *data)
+{
+ unsigned long flags;
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ pr_err("null pointer returned from function declared to never return null\n");
+
+ if (location_is_valid(&data->attr_location))
+ print_source_location("returns_nonnull attribute specified in",
+ &data->attr_location);
+
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_nonnull_return);
+
+void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
+ unsigned long bound)
+{
+ unsigned long flags;
+ char bound_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(bound_str, sizeof(bound_str), data->type, bound);
+ pr_err("variable length array bound value %s <= 0\n", bound_str);
+
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive);
+
+void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data,
+ unsigned long index)
+{
+ unsigned long flags;
+ char index_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(index_str, sizeof(index_str), data->index_type, index);
+ pr_err("index %s is out of range for type %s\n", index_str,
+ data->array_type->type_name);
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
+
+void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+ unsigned long lhs, unsigned long rhs)
+{
+ unsigned long flags;
+ struct type_descriptor *rhs_type = data->rhs_type;
+ struct type_descriptor *lhs_type = data->lhs_type;
+ char rhs_str[VALUE_LENGTH];
+ char lhs_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs);
+ val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs);
+
+ if (val_is_negative(rhs_type, rhs))
+ pr_err("shift exponent %s is negative\n", rhs_str);
+
+ else if (get_unsigned_val(rhs_type, rhs) >=
+ type_bit_width(lhs_type))
+ pr_err("shift exponent %s is too large for %u-bit type %s\n",
+ rhs_str,
+ type_bit_width(lhs_type),
+ lhs_type->type_name);
+ else if (val_is_negative(lhs_type, lhs))
+ pr_err("left shift of negative value %s\n",
+ lhs_str);
+ else
+ pr_err("left shift of %s by %s places cannot be"
+ " represented in type %s\n",
+ lhs_str, rhs_str,
+ lhs_type->type_name);
+
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
+
+
+void __noreturn
+__ubsan_handle_builtin_unreachable(struct unreachable_data *data)
+{
+ unsigned long flags;
+
+ ubsan_prologue(&data->location, &flags);
+ pr_err("calling __builtin_unreachable()\n");
+ ubsan_epilogue(&flags);
+ panic("can't return from __builtin_unreachable()");
+}
+EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
+
+void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
+ unsigned long val)
+{
+ unsigned long flags;
+ char val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, &flags);
+
+ val_to_string(val_str, sizeof(val_str), data->type, val);
+
+ pr_err("load of value %s is not a valid value for type %s\n",
+ val_str, data->type->type_name);
+
+ ubsan_epilogue(&flags);
+}
+EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
diff --git a/lib/ubsan.h b/lib/ubsan.h
new file mode 100644
index 000000000000..b2d18d4a53f5
--- /dev/null
+++ b/lib/ubsan.h
@@ -0,0 +1,84 @@
+#ifndef _LIB_UBSAN_H
+#define _LIB_UBSAN_H
+
+enum {
+ type_kind_int = 0,
+ type_kind_float = 1,
+ type_unknown = 0xffff
+};
+
+struct type_descriptor {
+ u16 type_kind;
+ u16 type_info;
+ char type_name[1];
+};
+
+struct source_location {
+ const char *file_name;
+ union {
+ unsigned long reported;
+ struct {
+ u32 line;
+ u32 column;
+ };
+ };
+};
+
+struct overflow_data {
+ struct source_location location;
+ struct type_descriptor *type;
+};
+
+struct type_mismatch_data {
+ struct source_location location;
+ struct type_descriptor *type;
+ unsigned long alignment;
+ unsigned char type_check_kind;
+};
+
+struct nonnull_arg_data {
+ struct source_location location;
+ struct source_location attr_location;
+ int arg_index;
+};
+
+struct nonnull_return_data {
+ struct source_location location;
+ struct source_location attr_location;
+};
+
+struct vla_bound_data {
+ struct source_location location;
+ struct type_descriptor *type;
+};
+
+struct out_of_bounds_data {
+ struct source_location location;
+ struct type_descriptor *array_type;
+ struct type_descriptor *index_type;
+};
+
+struct shift_out_of_bounds_data {
+ struct source_location location;
+ struct type_descriptor *lhs_type;
+ struct type_descriptor *rhs_type;
+};
+
+struct unreachable_data {
+ struct source_location location;
+};
+
+struct invalid_value_data {
+ struct source_location location;
+ struct type_descriptor *type;
+};
+
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+typedef __int128 s_max;
+typedef unsigned __int128 u_max;
+#else
+typedef s64 s_max;
+typedef u64 u_max;
+#endif
+
+#endif
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 64710148941e..a61460d9f5b0 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -1,4 +1,5 @@
KASAN_SANITIZE := n
+UBSAN_SANITIZE_kasan.o := n

CFLAGS_REMOVE_kasan.o = -pg
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 79e86613712f..d7c2032d0fe4 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -129,6 +129,12 @@ _c_flags += $(if $(patsubst n%,, \
$(CFLAGS_KASAN))
endif

+ifeq ($(CONFIG_UBSAN),y)
+_c_flags += $(if $(patsubst n%,, \
+ $(UBSAN_SANITIZE_$(basetarget).o)$(UBSAN_SANITIZE)$(CONFIG_UBSAN_SANITIZE_ALL)), \
+ $(CFLAGS_UBSAN))
+endif
+
# If building the kernel in a separate objtree expand all occurrences
# of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').

diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
new file mode 100644
index 000000000000..8ab68679cfb5
--- /dev/null
+++ b/scripts/Makefile.ubsan
@@ -0,0 +1,17 @@
+ifdef CONFIG_UBSAN
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=shift)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=returns-nonnull-attribute)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool)
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum)
+
+ifdef CONFIG_UBSAN_ALIGNMENT
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
+endif
+endif
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 00/23] Undefined Behaviour Sanititizer support

Ben Hutchings <ben.hutchings@...>
 

This is a backport of changes in 4.5 to add support for GCC's Undefined
Behaviour Sanitizer (UBSAN), and fixes for undefined behaviour that it
has found (that are not already in 4.4.30).

UBSAN detects (at run time) many operations that will result in
undefined behaviour, i.e. unpredictable results. These include overflow
of signed integer calculations, bitwise shifts by too many places, and
some array bounds errors. (KASAN detects other kinds of address
errors, but is currently only supported on arm64 and x86_64.) See
<http://developerblog.redhat.com/2014/10/16/gcc-undefined-behavior-sanitizer-ubsan/> for more information.

UBSAN can be enabled globally on x86 (CONFIG_UBSAN_SANITIZE_ALL), but
must be enabled selectively on arm (add 'UBSAN_SANITIZE := y' to the
Makefile in each directory you want to apply it to). I assume there are
a few critical regions of code that must to be avoided.
It has a significant performance cost, so it is unlikely to be suitable
for use in production builds.

For this and other backports, I'm looking for feedback like:
- Did I miss a follow-up fix or an earlier dependency?
- Does this cause a regression (other than as explained above)?
- Are you likely to use it?
- Are there related features you want in 4.4?

Ben.

Adam Borowski (2):
perf/x86/amd: Set the size of event map array to PERF_COUNT_HW_MAX
btrfs: fix int32 overflow in shrink_delalloc().

Andrey Ryabinin (3):
UBSAN: run-time undefined behavior sanity checker
mm/page-writeback: fix dirty_ratelimit calculation
ubsan: fix tree-wide -Wmaybe-uninitialized false positives

Bartlomiej Zolnierkiewicz (1):
blk-mq: fix undefined behaviour in order_to_size()

Chris Bainbridge (1):
x86/microcode/intel: Change checksum variables to u32

David Decotigny (1):
mlx4: remove unused fields

Eric Biggers (1):
PM / sleep: declare __tracedata symbols as char[] rather than char

Eric Dumazet (1):
net: get rid of an signed integer overflow in ip_idents_reserve()

Nicolai Stange (2):
mm/filemap: generic_file_read_iter(): check for zero reads
unconditionally
drm/radeon: don't include RADEON_HPD_NONE in HPD IRQ enable bitsets

Nicolas Iooss (1):
UBSAN: fix typo in format string

Oleg Nesterov (1):
signal: move the "sig < SIGRTMIN" check into siginmask(sig)

Peter Zijlstra (1):
perf/core: Fix Undefined behaviour in rb_alloc()

Seung-Woo Kim (2):
mmc: dw_mmc: remove UBSAN warning in dw_mci_setup_bus()
pwm: samsung: Fix to use lowest div for large enough modulation bits

Sven Eckelmann (1):
batman-adv: Fix integer overflow in batadv_iv_ogm_calc_tq

Vegard Nossum (2):
rhashtable: fix shift by 64 when shrinking
time: Avoid undefined behaviour in ktime_add_safe()

Xie XiuQi (2):
drm: fix signed integer overflow
xfs: fix signed integer overflow

Yang Shi (1):
ubsan: cosmetic fix to Kconfig text

Documentation/ubsan.txt | 84 ++++++
Makefile | 3 +-
arch/x86/Kconfig | 1 +
arch/x86/boot/Makefile | 1 +
arch/x86/boot/compressed/Makefile | 1 +
arch/x86/entry/vdso/Makefile | 1 +
arch/x86/kernel/cpu/microcode/intel_lib.c | 8 +-
arch/x86/kernel/cpu/perf_event_amd.c | 2 +-
arch/x86/realmode/rm/Makefile | 1 +
block/blk-mq.c | 2 +-
drivers/base/power/trace.c | 4 +-
drivers/firmware/efi/libstub/Makefile | 1 +
drivers/gpu/drm/drm_hashtab.c | 2 +-
drivers/gpu/drm/radeon/evergreen.c | 6 +-
drivers/gpu/drm/radeon/r100.c | 6 +-
drivers/gpu/drm/radeon/r600.c | 6 +-
drivers/gpu/drm/radeon/rs600.c | 6 +-
drivers/mmc/host/dw_mmc.c | 14 +-
drivers/mmc/host/dw_mmc.h | 4 -
drivers/net/ethernet/mellanox/mlx4/fw.c | 4 -
drivers/net/ethernet/mellanox/mlx4/fw.h | 2 -
drivers/pwm/pwm-samsung.c | 15 +-
fs/btrfs/extent-tree.c | 2 +-
fs/xfs/xfs_buf_item.c | 4 +-
include/linux/ktime.h | 7 +
include/linux/sched.h | 3 +
include/linux/signal.h | 16 +-
kernel/events/ring_buffer.c | 6 +-
kernel/time/hrtimer.c | 2 +-
lib/Kconfig.debug | 1 +
lib/Kconfig.ubsan | 36 +++
lib/Makefile | 3 +
lib/rhashtable.c | 6 +-
lib/ubsan.c | 456 ++++++++++++++++++++++++++++++
lib/ubsan.h | 84 ++++++
mm/filemap.c | 7 +-
mm/kasan/Makefile | 1 +
mm/page-writeback.c | 11 +-
net/batman-adv/bat_iv_ogm.c | 5 +-
net/ipv4/route.c | 10 +-
scripts/Makefile.lib | 6 +
scripts/Makefile.ubsan | 21 ++
42 files changed, 792 insertions(+), 69 deletions(-)
create mode 100644 Documentation/ubsan.txt
create mode 100644 lib/Kconfig.ubsan
create mode 100644 lib/ubsan.c
create mode 100644 lib/ubsan.h
create mode 100644 scripts/Makefile.ubsan

--
2.10.2


--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 5/5] mm/page_poisoning.c: allow for zero poisoning

Ben Hutchings <ben.hutchings@...>
 

From: Laura Abbott <labbott@...>

commit 1414c7f4f7d72d138fff35f00151d15749b5beda upstream.

By default, page poisoning uses a poison value (0xaa) on free. If this
is changed to 0, the page is not only sanitized but zeroing on alloc
with __GFP_ZERO can be skipped as well. The tradeoff is that detecting
corruption from the poisoning is harder to detect. This feature also
cannot be used with hibernation since pages are not guaranteed to be
zeroed after hibernation.

Credit to Grsecurity/PaX team for inspiring this work

Signed-off-by: Laura Abbott <labbott@...>
Acked-by: Rafael J. Wysocki <rjw@...>
Cc: "Kirill A. Shutemov" <kirill.shutemov@...>
Cc: Vlastimil Babka <vbabka@...>
Cc: Michal Hocko <mhocko@...>
Cc: Kees Cook <keescook@...>
Cc: Mathias Krause <minipli@...>
Cc: Dave Hansen <dave.hansen@...>
Cc: Jianyu Zhan <nasa4836@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
include/linux/mm.h | 2 ++
include/linux/poison.h | 4 ++++
kernel/power/hibernate.c | 17 +++++++++++++++++
mm/Kconfig.debug | 14 ++++++++++++++
mm/page_alloc.c | 11 ++++++++++-
mm/page_ext.c | 10 ++++++++--
mm/page_poison.c | 7 +++++--
7 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 56272e69c9a3..5d8beadd908d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2132,10 +2132,12 @@ static inline void vm_stat_account(struct mm_struct *mm,
#ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void);
extern void kernel_poison_pages(struct page *page, int numpages, int enable);
+extern bool page_is_poisoned(struct page *page);
#else
static inline bool page_poisoning_enabled(void) { return false; }
static inline void kernel_poison_pages(struct page *page, int numpages,
int enable) { }
+static inline bool page_is_poisoned(struct page *page) { return false; }
#endif

#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 317e16de09e5..199ffec4bdf3 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -30,7 +30,11 @@
#define TIMER_ENTRY_STATIC ((void *) 0x74737461)

/********** mm/debug-pagealloc.c **********/
+#ifdef CONFIG_PAGE_POISONING_ZERO
+#define PAGE_POISON 0x00
+#else
#define PAGE_POISON 0xaa
+#endif

/********** mm/slab.c **********/
/*
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 3124cebaec31..2fc1177383a0 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1159,6 +1159,22 @@ static int __init kaslr_nohibernate_setup(char *str)
return nohibernate_setup(str);
}

+static int __init page_poison_nohibernate_setup(char *str)
+{
+#ifdef CONFIG_PAGE_POISONING_ZERO
+ /*
+ * The zeroing option for page poison skips the checks on alloc.
+ * since hibernation doesn't save free pages there's no way to
+ * guarantee the pages will still be zeroed.
+ */
+ if (!strcmp(str, "on")) {
+ pr_info("Disabling hibernation due to page poisoning\n");
+ return nohibernate_setup(str);
+ }
+#endif
+ return 1;
+}
+
__setup("noresume", noresume_setup);
__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
@@ -1167,3 +1183,4 @@ __setup("resumewait", resumewait_setup);
__setup("resumedelay=", resumedelay_setup);
__setup("nohibernate", nohibernate_setup);
__setup("kaslr", kaslr_nohibernate_setup);
+__setup("page_poison=", page_poison_nohibernate_setup);
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 35d68954ad34..613437b03ebb 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -51,3 +51,17 @@ config PAGE_POISONING_NO_SANITY

If you are only interested in sanitization, say Y. Otherwise
say N.
+
+config PAGE_POISONING_ZERO
+ bool "Use zero for poisoning instead of random data"
+ depends on PAGE_POISONING
+ ---help---
+ Instead of using the existing poison value, fill the pages with
+ zeros. This makes it harder to detect when errors are occurring
+ due to sanitization but the zeroing at free means that it is
+ no longer necessary to write zeros when GFP_ZERO is used on
+ allocation.
+
+ Enabling page poisoning with this option will disable hibernation
+
+ If unsure, say N
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 94e98734ecd9..4b3b5cb36844 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1384,15 +1384,24 @@ static inline int check_new_page(struct page *page)
return 0;
}

+static inline bool free_pages_prezeroed(bool poisoned)
+{
+ return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
+ page_poisoning_enabled() && poisoned;
+}
+
static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
int alloc_flags)
{
int i;
+ bool poisoned = true;

for (i = 0; i < (1 << order); i++) {
struct page *p = page + i;
if (unlikely(check_new_page(p)))
return 1;
+ if (poisoned)
+ poisoned &= page_is_poisoned(p);
}

set_page_private(page, 0);
@@ -1403,7 +1412,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
kernel_poison_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);

- if (gfp_flags & __GFP_ZERO)
+ if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
for (i = 0; i < (1 << order); i++)
clear_highpage(page + i);

diff --git a/mm/page_ext.c b/mm/page_ext.c
index 292ca7b8debd..2d864e64f7fe 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -106,12 +106,15 @@ struct page_ext *lookup_page_ext(struct page *page)
struct page_ext *base;

base = NODE_DATA(page_to_nid(page))->node_page_ext;
-#ifdef CONFIG_DEBUG_VM
+#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING)
/*
* The sanity checks the page allocator does upon freeing a
* page can reach here before the page_ext arrays are
* allocated when feeding a range of pages to the allocator
* for the first time during bootup or memory hotplug.
+ *
+ * This check is also necessary for ensuring page poisoning
+ * works as expected when enabled
*/
if (unlikely(!base))
return NULL;
@@ -180,12 +183,15 @@ struct page_ext *lookup_page_ext(struct page *page)
{
unsigned long pfn = page_to_pfn(page);
struct mem_section *section = __pfn_to_section(pfn);
-#ifdef CONFIG_DEBUG_VM
+#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING)
/*
* The sanity checks the page allocator does upon freeing a
* page can reach here before the page_ext arrays are
* allocated when feeding a range of pages to the allocator
* for the first time during bootup or memory hotplug.
+ *
+ * This check is also necessary for ensuring page poisoning
+ * works as expected when enabled
*/
if (!section->page_ext)
return NULL;
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 89d3bc773633..479e7ea2bea6 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -71,11 +71,14 @@ static inline void clear_page_poison(struct page *page)
__clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}

-static inline bool page_poison(struct page *page)
+bool page_is_poisoned(struct page *page)
{
struct page_ext *page_ext;

page_ext = lookup_page_ext(page);
+ if (!page_ext)
+ return false;
+
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}

@@ -137,7 +140,7 @@ static void unpoison_page(struct page *page)
{
void *addr;

- if (!page_poison(page))
+ if (!page_is_poisoned(page))
return;

addr = kmap_atomic(page);
--
2.10.2


--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 4/5] mm/page_poison.c: enable PAGE_POISONING as a separate option

Ben Hutchings <ben.hutchings@...>
 

From: Laura Abbott <labbott@...>

commit 8823b1dbc05fab1a8bec275eeae4709257c2661d upstream.

Page poisoning is currently set up as a feature if architectures don't
have architecture debug page_alloc to allow unmapping of pages. It has
uses apart from that though. Clearing of the pages on free provides an
increase in security as it helps to limit the risk of information leaks.
Allow page poisoning to be enabled as a separate option independent of
kernel_map pages since the two features do separate work. Because of
how hiberanation is implemented, the checks on alloc cannot occur if
hibernation is enabled. The runtime alloc checks can also be enabled
with an option when !HIBERNATION.

Credit to Grsecurity/PaX team for inspiring this work

Signed-off-by: Laura Abbott <labbott@...>
Cc: Rafael J. Wysocki <rjw@...>
Cc: "Kirill A. Shutemov" <kirill.shutemov@...>
Cc: Vlastimil Babka <vbabka@...>
Cc: Michal Hocko <mhocko@...>
Cc: Kees Cook <keescook@...>
Cc: Mathias Krause <minipli@...>
Cc: Dave Hansen <dave.hansen@...>
Cc: Jianyu Zhan <nasa4836@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
Documentation/kernel-parameters.txt | 5 +++
include/linux/mm.h | 9 +++++
mm/Kconfig.debug | 25 +++++++++++++-
mm/Makefile | 2 +-
mm/page_alloc.c | 2 ++
mm/{debug-pagealloc.c => page_poison.c} | 60 ++++++++++++++++++++++++++-------
6 files changed, 89 insertions(+), 14 deletions(-)
rename mm/{debug-pagealloc.c => page_poison.c} (67%)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c360f80c3473..37180e331185 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2672,6 +2672,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
we can turn it on.
on: enable the feature

+ page_poison= [KNL] Boot-time parameter changing the state of
+ poisoning on the buddy allocator.
+ off: turn off poisoning
+ on: turn on poisoning
+
panic= [KNL] Kernel behaviour on panic: delay <timeout>
timeout > 0: seconds before rebooting
timeout = 0: wait forever
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b93d147439f9..56272e69c9a3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2129,6 +2129,15 @@ static inline void vm_stat_account(struct mm_struct *mm,
}
#endif /* CONFIG_PROC_FS */

+#ifdef CONFIG_PAGE_POISONING
+extern bool page_poisoning_enabled(void);
+extern void kernel_poison_pages(struct page *page, int numpages, int enable);
+#else
+static inline bool page_poisoning_enabled(void) { return false; }
+static inline void kernel_poison_pages(struct page *page, int numpages,
+ int enable) { }
+#endif
+
#ifdef CONFIG_DEBUG_PAGEALLOC
extern bool _debug_pagealloc_enabled;
extern void __kernel_map_pages(struct page *page, int numpages, int enable);
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 957d3da53ddd..35d68954ad34 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -27,4 +27,27 @@ config DEBUG_PAGEALLOC
a resume because free pages are not saved to the suspend image.

config PAGE_POISONING
- bool
+ bool "Poison pages after freeing"
+ select PAGE_EXTENSION
+ select PAGE_POISONING_NO_SANITY if HIBERNATION
+ ---help---
+ Fill the pages with poison patterns after free_pages() and verify
+ the patterns before alloc_pages. The filling of the memory helps
+ reduce the risk of information leaks from freed data. This does
+ have a potential performance impact.
+
+ Note that "poison" here is not the same thing as the "HWPoison"
+ for CONFIG_MEMORY_FAILURE. This is software poisoning only.
+
+ If unsure, say N
+
+config PAGE_POISONING_NO_SANITY
+ depends on PAGE_POISONING
+ bool "Only poison, don't sanity check"
+ ---help---
+ Skip the sanity checking on alloc, only fill the pages with
+ poison on free. This reduces some of the overhead of the
+ poisoning feature.
+
+ If you are only interested in sanitization, say Y. Otherwise
+ say N.
diff --git a/mm/Makefile b/mm/Makefile
index 2ed43191fc3b..cfdd481d27a5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -48,7 +48,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_KSM) += ksm.o
-obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
+obj-$(CONFIG_PAGE_POISONING) += page_poison.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bcdfbf8c36d..94e98734ecd9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1004,6 +1004,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
PAGE_SIZE << order);
}
arch_free_page(page, order);
+ kernel_poison_pages(page, 1 << order, 0);
kernel_map_pages(page, 1 << order, 0);

return true;
@@ -1399,6 +1400,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,

arch_alloc_page(page, order);
kernel_map_pages(page, 1 << order, 1);
+ kernel_poison_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);

if (gfp_flags & __GFP_ZERO)
diff --git a/mm/debug-pagealloc.c b/mm/page_poison.c
similarity index 67%
rename from mm/debug-pagealloc.c
rename to mm/page_poison.c
index 5bf5906ce13b..89d3bc773633 100644
--- a/mm/debug-pagealloc.c
+++ b/mm/page_poison.c
@@ -6,22 +6,48 @@
#include <linux/poison.h>
#include <linux/ratelimit.h>

-static bool page_poisoning_enabled __read_mostly;
+static bool __page_poisoning_enabled __read_mostly;
+static bool want_page_poisoning __read_mostly;

-static bool need_page_poisoning(void)
+static int early_page_poison_param(char *buf)
{
- if (!debug_pagealloc_enabled())
- return false;
+ if (!buf)
+ return -EINVAL;
+
+ if (strcmp(buf, "on") == 0)
+ want_page_poisoning = true;
+ else if (strcmp(buf, "off") == 0)
+ want_page_poisoning = false;

- return true;
+ return 0;
+}
+early_param("page_poison", early_page_poison_param);
+
+bool page_poisoning_enabled(void)
+{
+ return __page_poisoning_enabled;
+}
+
+static bool need_page_poisoning(void)
+{
+ return want_page_poisoning;
}

static void init_page_poisoning(void)
{
- if (!debug_pagealloc_enabled())
- return;
+ /*
+ * page poisoning is debug page alloc for some arches. If either
+ * of those options are enabled, enable poisoning
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC)) {
+ if (!want_page_poisoning && !debug_pagealloc_enabled())
+ return;
+ } else {
+ if (!want_page_poisoning)
+ return;
+ }

- page_poisoning_enabled = true;
+ __page_poisoning_enabled = true;
}

struct page_ext_operations page_poisoning_ops = {
@@ -83,6 +109,9 @@ static void check_poison_mem(unsigned char *mem, size_t bytes)
unsigned char *start;
unsigned char *end;

+ if (IS_ENABLED(CONFIG_PAGE_POISONING_NO_SANITY))
+ return;
+
start = memchr_inv(mem, PAGE_POISON, bytes);
if (!start)
return;
@@ -95,9 +124,9 @@ static void check_poison_mem(unsigned char *mem, size_t bytes)
if (!__ratelimit(&ratelimit))
return;
else if (start == end && single_bit_flip(*start, PAGE_POISON))
- printk(KERN_ERR "pagealloc: single bit error\n");
+ pr_err("pagealloc: single bit error\n");
else
- printk(KERN_ERR "pagealloc: memory corruption\n");
+ pr_err("pagealloc: memory corruption\n");

print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start,
end - start + 1, 1);
@@ -125,9 +154,9 @@ static void unpoison_pages(struct page *page, int n)
unpoison_page(page + i);
}

-void __kernel_map_pages(struct page *page, int numpages, int enable)
+void kernel_poison_pages(struct page *page, int numpages, int enable)
{
- if (!page_poisoning_enabled)
+ if (!page_poisoning_enabled())
return;

if (enable)
@@ -135,3 +164,10 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
else
poison_pages(page, numpages);
}
+
+#ifndef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ /* This function does nothing, all work is done via poison pages */
+}
+#endif
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 3/5] mm/slab: clean up DEBUG_PAGEALLOC processing code

Ben Hutchings <ben.hutchings@...>
 

From: Joonsoo Kim <iamjoonsoo.kim@...>

commit 40b44137971c2e5865a78f9f7de274449983ccb5 upstream.

Currently, open code for checking DEBUG_PAGEALLOC cache is spread to
some sites. It makes code unreadable and hard to change.

This patch cleans up this code. The following patch will change the
criteria for DEBUG_PAGEALLOC cache so this clean-up will help it, too.

[akpm@...: fix build with CONFIG_DEBUG_PAGEALLOC=n]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@...>
Cc: Christoph Lameter <cl@...>
Cc: Pekka Enberg <penberg@...>
Cc: David Rientjes <rientjes@...>
Cc: Joonsoo Kim <iamjoonsoo.kim@...>
Cc: Jesper Dangaard Brouer <brouer@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
include/linux/mm.h | 12 ++++---
mm/slab.c | 97 +++++++++++++++++++++++++++---------------------------
2 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0ffa01c90d9..b93d147439f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2148,14 +2148,18 @@ kernel_map_pages(struct page *page, int numpages, int enable)
}
#ifdef CONFIG_HIBERNATION
extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
-#else
+#endif /* CONFIG_HIBERNATION */
+#else /* CONFIG_DEBUG_PAGEALLOC */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
#ifdef CONFIG_HIBERNATION
static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
-#endif
+#endif /* CONFIG_HIBERNATION */
+static inline bool debug_pagealloc_enabled(void)
+{
+ return false;
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC */

#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
diff --git a/mm/slab.c b/mm/slab.c
index 62a7e4d3dde3..aa11af56f139 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1670,6 +1670,14 @@ static void kmem_rcu_free(struct rcu_head *head)
}

#if DEBUG
+static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
+{
+ if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
+ (cachep->size % PAGE_SIZE) == 0)
+ return true;
+
+ return false;
+}

#ifdef CONFIG_DEBUG_PAGEALLOC
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
@@ -1703,6 +1711,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
}
*addr++ = 0x87654321;
}
+
+static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
+ int map, unsigned long caller)
+{
+ if (!is_debug_pagealloc_cache(cachep))
+ return;
+
+ if (caller)
+ store_stackinfo(cachep, objp, caller);
+
+ kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
+}
+
+#else
+static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
+ int map, unsigned long caller) {}
+
#endif

static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
@@ -1781,6 +1806,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
int size, i;
int lines = 0;

+ if (is_debug_pagealloc_cache(cachep))
+ return;
+
realobj = (char *)objp + obj_offset(cachep);
size = cachep->object_size;

@@ -1846,17 +1874,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
void *objp = index_to_obj(cachep, page, i);

if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled() &&
- cachep->size % PAGE_SIZE == 0 &&
- OFF_SLAB(cachep))
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 1);
- else
- check_poison_obj(cachep, objp);
-#else
check_poison_obj(cachep, objp);
-#endif
+ slab_kernel_map(cachep, objp, 1, 0);
}
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
@@ -2237,16 +2256,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
freelist_size = calculate_freelist_size(cachep->num, 0);
-
-#ifdef CONFIG_PAGE_POISONING
- /* If we're going to use the generic kernel_map_pages()
- * poisoning, then it's going to smash the contents of
- * the redzone and userword anyhow, so switch them off.
- */
- if (debug_pagealloc_enabled() &&
- size % PAGE_SIZE == 0 && flags & SLAB_POISON)
- flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
-#endif
}

cachep->colour_off = cache_line_size();
@@ -2262,7 +2271,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
cachep->size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);

- if (flags & CFLGS_OFF_SLAB) {
+#if DEBUG
+ /*
+ * If we're going to use the generic kernel_map_pages()
+ * poisoning, then it's going to smash the contents of
+ * the redzone and userword anyhow, so switch them off.
+ */
+ if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
+ (cachep->flags & SLAB_POISON) &&
+ is_debug_pagealloc_cache(cachep))
+ cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
+#endif
+
+ if (OFF_SLAB(cachep)) {
cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
/*
* This is a possibility for one of the kmalloc_{dma,}_caches.
@@ -2489,9 +2510,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
#if DEBUG
- /* need to poison the objs? */
- if (cachep->flags & SLAB_POISON)
- poison_obj(cachep, objp, POISON_FREE);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;

@@ -2515,10 +2533,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
- if ((cachep->size % PAGE_SIZE) == 0 &&
- OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 0);
+ /* need to poison the objs? */
+ if (cachep->flags & SLAB_POISON) {
+ poison_obj(cachep, objp, POISON_FREE);
+ slab_kernel_map(cachep, objp, 0, 0);
+ }
#else
if (cachep->ctor)
cachep->ctor(objp);
@@ -2737,18 +2756,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,

set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled() &&
- (cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
- store_stackinfo(cachep, objp, caller);
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 0);
- } else {
- poison_obj(cachep, objp, POISON_FREE);
- }
-#else
poison_obj(cachep, objp, POISON_FREE);
-#endif
+ slab_kernel_map(cachep, objp, 0, caller);
}
return objp;
}
@@ -2875,16 +2884,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled() &&
- (cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 1);
- else
- check_poison_obj(cachep, objp);
-#else
check_poison_obj(cachep, objp);
-#endif
+ slab_kernel_map(cachep, objp, 1, 0);
poison_obj(cachep, objp, POISON_INUSE);
}
if (cachep->flags & SLAB_STORE_USER)
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 2/5] mm/slab: use more appropriate condition check for debug_pagealloc

Ben Hutchings <ben.hutchings@...>
 

From: Joonsoo Kim <iamjoonsoo.kim@...>

commit 40323278b557a5909bbecfa181c91a3af7afbbe3 upstream.

debug_pagealloc debugging is related to SLAB_POISON flag rather than
FORCED_DEBUG option, although FORCED_DEBUG option will enable
SLAB_POISON. Fix it.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@...>
Acked-by: Christoph Lameter <cl@...>
Cc: Pekka Enberg <penberg@...>
Cc: David Rientjes <rientjes@...>
Cc: Joonsoo Kim <iamjoonsoo.kim@...>
Cc: Jesper Dangaard Brouer <brouer@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
mm/slab.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index ed7eb6c92c81..62a7e4d3dde3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2180,7 +2180,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
else
size += BYTES_PER_WORD;
}
-#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
/*
* To activate debug pagealloc, off-slab management is necessary
* requirement. In early phase of initialization, small sized slab
@@ -2188,7 +2187,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
- if (debug_pagealloc_enabled() &&
+ if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
ALIGN(size, cachep->align) < PAGE_SIZE) {
@@ -2196,7 +2195,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size = PAGE_SIZE;
}
#endif
-#endif

/*
* Determine if the slab management is 'on' or 'off' slab.
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 1/5] mm/slab: activate debug_pagealloc in SLAB when it is actually enabled

Ben Hutchings <ben.hutchings@...>
 

From: Joonsoo Kim <iamjoonsoo.kim@...>

commit a307ebd468e0b97c203f5a99a56a6017e4d1991a upstream.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@...>
Acked-by: Christoph Lameter <cl@...>
Cc: Pekka Enberg <penberg@...>
Cc: David Rientjes <rientjes@...>
Cc: Joonsoo Kim <iamjoonsoo.kim@...>
Cc: Jesper Dangaard Brouer <brouer@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
mm/slab.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 4765c97ce690..ed7eb6c92c81 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1847,7 +1847,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,

if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if (cachep->size % PAGE_SIZE == 0 &&
+ if (debug_pagealloc_enabled() &&
+ cachep->size % PAGE_SIZE == 0 &&
OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 1);
@@ -2187,7 +2188,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
- if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
+ if (debug_pagealloc_enabled() &&
+ !slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
@@ -2243,7 +2245,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* poisoning, then it's going to smash the contents of
* the redzone and userword anyhow, so switch them off.
*/
- if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
+ if (debug_pagealloc_enabled() &&
+ size % PAGE_SIZE == 0 && flags & SLAB_POISON)
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
#endif
}
@@ -2737,7 +2740,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
+ if (debug_pagealloc_enabled() &&
+ (cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
store_stackinfo(cachep, objp, caller);
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 0);
@@ -2874,7 +2878,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
return objp;
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+ if (debug_pagealloc_enabled() &&
+ (cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 1);
else
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 0/5] Faster page poisoning for security

Ben Hutchings <ben.hutchings@...>
 

This is a backport of changes in 4.6 to add new config options to page
poisoning. Page poisoning was originally intended a debug feature for
finding use-after-free bugs in the kernel, but it can also be a security
feature, mitigating the security impact of such bugs. Unfortunately, it
normally has a significant performance cost. The new options
(CONFIG_PAGE_POISONING_NO_SANITY and CONFIG_PAGE_POISONING_ZERO) can be
enabled to reduce the performance cost, though they also reduce the
likelihood of detecting when such a bug has been triggered.

Enable CONFIG_PAGE_POISONING plus the new options can make some such
bugs harder to exploit, at a small performance cost (you'll need to
measure it yourself).

For this and other backports, I'm looking for feedback like:
- Did I miss a follow-up fix or an earlier dependency?
- Does this cause a regression (other than as explained above)?
- Are you likely to use it?
- Are there related features you want in 4.4?

Ben.

Joonsoo Kim (3):
mm/slab: activate debug_pagealloc in SLAB when it is actually enabled
mm/slab: use more appropriate condition check for debug_pagealloc
mm/slab: clean up DEBUG_PAGEALLOC processing code

Laura Abbott (2):
mm/page_poison.c: enable PAGE_POISONING as a separate option
mm/page_poisoning.c: allow for zero poisoning

Documentation/kernel-parameters.txt | 5 ++
include/linux/mm.h | 23 ++++++--
include/linux/poison.h | 4 ++
kernel/power/hibernate.c | 17 ++++++
mm/Kconfig.debug | 39 ++++++++++++-
mm/Makefile | 2 +-
mm/page_alloc.c | 13 ++++-
mm/page_ext.c | 10 +++-
mm/{debug-pagealloc.c => page_poison.c} | 67 +++++++++++++++++-----
mm/slab.c | 98 +++++++++++++++++----------------
10 files changed, 208 insertions(+), 70 deletions(-)
rename mm/{debug-pagealloc.c => page_poison.c} (64%)

--
2.10.2


--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 6/6] mm: ASLR: use get_random_long()

Ben Hutchings <ben.hutchings@...>
 

From: Daniel Cashman <dcashman@...>

commit 5ef11c35ce86b94bfb878b684de4cdaf96f54b2f upstream.

Replace calls to get_random_int() followed by a cast to (unsigned long)
with calls to get_random_long(). Also address shifting bug which, in
case of x86 removed entropy mask for mmap_rnd_bits values > 31 bits.

Signed-off-by: Daniel Cashman <dcashman@...>
Acked-by: Kees Cook <keescook@...>
Cc: "Theodore Ts'o" <tytso@...>
Cc: Arnd Bergmann <arnd@...>
Cc: Greg Kroah-Hartman <gregkh@...>
Cc: Catalin Marinas <catalin.marinas@...>
Cc: Will Deacon <will.deacon@...>
Cc: Ralf Baechle <ralf@...>
Cc: Benjamin Herrenschmidt <benh@...>
Cc: Paul Mackerras <paulus@...>
Cc: Michael Ellerman <mpe@...>
Cc: David S. Miller <davem@...>
Cc: Thomas Gleixner <tglx@...>
Cc: Ingo Molnar <mingo@...>
Cc: H. Peter Anvin <hpa@...>
Cc: Al Viro <viro@...>
Cc: Nick Kralevich <nnk@...>
Cc: Jeff Vander Stoep <jeffv@...>
Cc: Mark Salyzyn <salyzyn@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
arch/arm/mm/mmap.c | 2 +-
arch/arm64/mm/mmap.c | 4 ++--
arch/mips/mm/mmap.c | 4 ++--
arch/powerpc/kernel/process.c | 4 ++--
arch/powerpc/mm/mmap.c | 4 ++--
arch/sparc/kernel/sys_sparc_64.c | 2 +-
arch/x86/mm/mmap.c | 6 +++---
fs/binfmt_elf.c | 2 +-
8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 4b4058db0781..66353caa35b9 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;

- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);

return rnd << PAGE_SHIFT;
}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 4c893b5189dd..232f787a088a 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void)

#ifdef CONFIG_COMPAT
if (test_thread_flag(TIF_32BIT))
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
else
#endif
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}

diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 5c81fdd032c3..353037699512 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;

- rnd = (unsigned long)get_random_int();
+ rnd = get_random_long();
rnd <<= PAGE_SHIFT;
if (TASK_IS_32BIT_ADDR)
rnd &= 0xfffffful;
@@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)

static inline unsigned long brk_rnd(void)
{
- unsigned long rnd = get_random_int();
+ unsigned long rnd = get_random_long();

rnd = rnd << PAGE_SHIFT;
/* 8MB for 32bit, 256MB for 64bit */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index cf788d7d7e56..b7abf3cd2a67 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1651,9 +1651,9 @@ static inline unsigned long brk_rnd(void)

/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
else
- rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));

return rnd << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e12f6c..4087705ba90f 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)

/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+ rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
else
- rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+ rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));

return rnd << PAGE_SHIFT;
}
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c690c8e16a96..b489e9759518 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void)
unsigned long rnd = 0UL;

if (current->flags & PF_RANDOMIZE) {
- unsigned long val = get_random_int();
+ unsigned long val = get_random_long();
if (test_thread_flag(TIF_32BIT))
rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
else
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 389939f74dd5..d2dc0438d654 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void)

if (mmap_is_ia32())
#ifdef CONFIG_COMPAT
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
#else
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
#endif
else
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);

return rnd << PAGE_SHIFT;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3a93755e880f..0c52941dd62c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -651,7 +651,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)

if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = (unsigned long) get_random_int();
+ random_variable = get_random_long();
random_variable &= STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
--
2.10.2


--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 5/6] drivers: char: random: add get_random_long()

Ben Hutchings <ben.hutchings@...>
 

Daniel Cashman <dcashman@...>

commit ec9ee4acd97c0039a61c0ae4f12705767ae62153 upstream.

Commit d07e22597d1d ("mm: mmap: add new /proc tunable for mmap_base
ASLR") added the ability to choose from a range of values to use for
entropy count in generating the random offset to the mmap_base address.

The maximum value on this range was set to 32 bits for 64-bit x86
systems, but this value could be increased further, requiring more than
the 32 bits of randomness provided by get_random_int(), as is already
possible for arm64. Add a new function: get_random_long() which more
naturally fits with the mmap usage of get_random_int() but operates
exactly the same as get_random_int().

Also, fix the shifting constant in mmap_rnd() to be an unsigned long so
that values greater than 31 bits generate an appropriate mask without
overflow. This is especially important on x86, as its shift instruction
uses a 5-bit mask for the shift operand, which meant that any value for
mmap_rnd_bits over 31 acts as a no-op and effectively disables mmap_base
randomization.

Finally, replace calls to get_random_int() with get_random_long() where
appropriate.

This patch (of 2):

Add get_random_long().

Signed-off-by: Daniel Cashman <dcashman@...>
Acked-by: Kees Cook <keescook@...>
Cc: "Theodore Ts'o" <tytso@...>
Cc: Arnd Bergmann <arnd@...>
Cc: Greg Kroah-Hartman <gregkh@...>
Cc: Catalin Marinas <catalin.marinas@...>
Cc: Will Deacon <will.deacon@...>
Cc: Ralf Baechle <ralf@...>
Cc: Benjamin Herrenschmidt <benh@...>
Cc: Paul Mackerras <paulus@...>
Cc: Michael Ellerman <mpe@...>
Cc: David S. Miller <davem@...>
Cc: Thomas Gleixner <tglx@...>
Cc: Ingo Molnar <mingo@...>
Cc: H. Peter Anvin <hpa@...>
Cc: Al Viro <viro@...>
Cc: Nick Kralevich <nnk@...>
Cc: Jeff Vander Stoep <jeffv@...>
Cc: Mark Salyzyn <salyzyn@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
drivers/char/random.c | 22 ++++++++++++++++++++++
include/linux/random.h | 1 +
2 files changed, 23 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 491a4dce13fe..d93dfebae0bb 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1825,6 +1825,28 @@ unsigned int get_random_int(void)
EXPORT_SYMBOL(get_random_int);

/*
+ * Same as get_random_int(), but returns unsigned long.
+ */
+unsigned long get_random_long(void)
+{
+ __u32 *hash;
+ unsigned long ret;
+
+ if (arch_get_random_long(&ret))
+ return ret;
+
+ hash = get_cpu_var(get_random_int_hash);
+
+ hash[0] += current->pid + jiffies + random_get_entropy();
+ md5_transform(hash, random_int_secret);
+ ret = *(unsigned long *)hash;
+ put_cpu_var(get_random_int_hash);
+
+ return ret;
+}
+EXPORT_SYMBOL(get_random_long);
+
+/*
* randomize_range() returns a start address such that
*
* [...... <range> .....]
diff --git a/include/linux/random.h b/include/linux/random.h
index a75840c1aa71..9c29122037f9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops;
#endif

unsigned int get_random_int(void);
+unsigned long get_random_long(void);
unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);

u32 prandom_u32(void);
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 4/6] x86: mm: support ARCH_MMAP_RND_BITS

Ben Hutchings <ben.hutchings@...>
 

From: Daniel Cashman <dcashman@...>

commit 9e08f57d684ac2f40685f55f659564bfd91a971e upstream.

x86: arch_mmap_rnd() uses hard-coded values, 8 for 32-bit and 28 for
64-bit, to generate the random offset for the mmap base address. This
value represents a compromise between increased ASLR effectiveness and
avoiding address-space fragmentation. Replace it with a Kconfig option,
which is sensibly bounded, so that platform developers may choose where
to place this compromise. Keep default values as new minimums.

Signed-off-by: Daniel Cashman <dcashman@...>
Cc: Russell King <linux@...>
Acked-by: Kees Cook <keescook@...>
Cc: Ingo Molnar <mingo@...>
Cc: Jonathan Corbet <corbet@...>
Cc: Don Zickus <dzickus@...>
Cc: Eric W. Biederman <ebiederm@...>
Cc: Heinrich Schuchardt <xypron.glpk@...>
Cc: Josh Poimboeuf <jpoimboe@...>
Cc: Kirill A. Shutemov <kirill.shutemov@...>
Cc: Naoya Horiguchi <n-horiguchi@...>
Cc: Andrea Arcangeli <aarcange@...>
Cc: Mel Gorman <mgorman@...>
Cc: Thomas Gleixner <tglx@...>
Cc: David Rientjes <rientjes@...>
Cc: Mark Salyzyn <salyzyn@...>
Cc: Jeff Vander Stoep <jeffv@...>
Cc: Nick Kralevich <nnk@...>
Cc: Catalin Marinas <catalin.marinas@...>
Cc: Will Deacon <will.deacon@...>
Cc: "H. Peter Anvin" <hpa@...>
Cc: Hector Marco-Gisbert <hecmargi@...>
Cc: Borislav Petkov <bp@...>
Cc: Ralf Baechle <ralf@...>
Cc: Heiko Carstens <heiko.carstens@...>
Cc: Martin Schwidefsky <schwidefsky@...>
Cc: Benjamin Herrenschmidt <benh@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
arch/x86/Kconfig | 16 ++++++++++++++++
arch/x86/mm/mmap.c | 12 ++++++------
2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 436639a31624..ffbfa85271a3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -82,6 +82,8 @@ config X86
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
select HAVE_ARCH_KGDB
select HAVE_ARCH_KMEMCHECK
+ select HAVE_ARCH_MMAP_RND_BITS if MMU
+ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
@@ -183,6 +185,20 @@ config HAVE_LATENCYTOP_SUPPORT
config MMU
def_bool y

+config ARCH_MMAP_RND_BITS_MIN
+ default 28 if 64BIT
+ default 8
+
+config ARCH_MMAP_RND_BITS_MAX
+ default 32 if 64BIT
+ default 16
+
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+ default 8
+
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+ default 16
+
config SBUS
bool

diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 307f60ecfc6d..389939f74dd5 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -69,14 +69,14 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;

- /*
- * 8 bits of randomness in 32bit mmaps, 20 address space bits
- * 28 bits of randomness in 64bit mmaps, 40 address space bits
- */
if (mmap_is_ia32())
- rnd = (unsigned long)get_random_int() % (1<<8);
+#ifdef CONFIG_COMPAT
+ rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+#else
+ rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+#endif
else
- rnd = (unsigned long)get_random_int() % (1<<28);
+ rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);

return rnd << PAGE_SHIFT;
}
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 3/6] arm64: mm: support ARCH_MMAP_RND_BITS

Ben Hutchings <ben.hutchings@...>
 

From: Daniel Cashman <dcashman@...>

commit 8f0d3aa9de57662fe35d8bacfbd9d7ef85ffe98f upstream.

arm64: arch_mmap_rnd() uses STACK_RND_MASK to generate the random offset
for the mmap base address. This value represents a compromise between
increased ASLR effectiveness and avoiding address-space fragmentation.
Replace it with a Kconfig option, which is sensibly bounded, so that
platform developers may choose where to place this compromise. Keep
default values as new minimums.

Signed-off-by: Daniel Cashman <dcashman@...>
Cc: Russell King <linux@...>
Acked-by: Kees Cook <keescook@...>
Cc: Ingo Molnar <mingo@...>
Cc: Jonathan Corbet <corbet@...>
Cc: Don Zickus <dzickus@...>
Cc: Eric W. Biederman <ebiederm@...>
Cc: Heinrich Schuchardt <xypron.glpk@...>
Cc: Josh Poimboeuf <jpoimboe@...>
Cc: Kirill A. Shutemov <kirill.shutemov@...>
Cc: Naoya Horiguchi <n-horiguchi@...>
Cc: Andrea Arcangeli <aarcange@...>
Cc: Mel Gorman <mgorman@...>
Cc: Thomas Gleixner <tglx@...>
Cc: David Rientjes <rientjes@...>
Cc: Mark Salyzyn <salyzyn@...>
Cc: Jeff Vander Stoep <jeffv@...>
Cc: Nick Kralevich <nnk@...>
Cc: Catalin Marinas <catalin.marinas@...>
Cc: Will Deacon <will.deacon@...>
Cc: "H. Peter Anvin" <hpa@...>
Cc: Hector Marco-Gisbert <hecmargi@...>
Cc: Borislav Petkov <bp@...>
Cc: Ralf Baechle <ralf@...>
Cc: Heiko Carstens <heiko.carstens@...>
Cc: Martin Schwidefsky <schwidefsky@...>
Cc: Benjamin Herrenschmidt <benh@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
arch/arm64/Kconfig | 29 +++++++++++++++++++++++++++++
arch/arm64/mm/mmap.c | 8 ++++++--
2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 14cdc6dea493..edf62be35adc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -51,6 +51,8 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KGDB
+ select HAVE_ARCH_MMAP_RND_BITS
+ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_BPF_JIT
@@ -104,6 +106,33 @@ config ARCH_PHYS_ADDR_T_64BIT
config MMU
def_bool y

+config ARCH_MMAP_RND_BITS_MIN
+ default 14 if ARM64_64K_PAGES
+ default 16 if ARM64_16K_PAGES
+ default 18
+
+# max bits determined by the following formula:
+# VA_BITS - PAGE_SHIFT - 3
+config ARCH_MMAP_RND_BITS_MAX
+ default 19 if ARM64_VA_BITS=36
+ default 24 if ARM64_VA_BITS=39
+ default 27 if ARM64_VA_BITS=42
+ default 30 if ARM64_VA_BITS=47
+ default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
+ default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
+ default 33 if ARM64_VA_BITS=48
+ default 14 if ARM64_64K_PAGES
+ default 16 if ARM64_16K_PAGES
+ default 18
+
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+ default 7 if ARM64_64K_PAGES
+ default 9 if ARM64_16K_PAGES
+ default 11
+
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+ default 16
+
config NO_IOPORT_MAP
def_bool y if !PCI

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index ed177475dd8c..4c893b5189dd 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -51,8 +51,12 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;

- rnd = (unsigned long)get_random_int() & STACK_RND_MASK;
-
+#ifdef CONFIG_COMPAT
+ if (test_thread_flag(TIF_32BIT))
+ rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ else
+#endif
+ rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}

--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 2/6] arm: mm: support ARCH_MMAP_RND_BITS

Ben Hutchings <ben.hutchings@...>
 

From: Daniel Cashman <dcashman@...>

commit e0c25d958f78acfd5c97df5776eeba3e0684101b upstream.

arm: arch_mmap_rnd() uses a hard-code value of 8 to generate the random
offset for the mmap base address. This value represents a compromise
between increased ASLR effectiveness and avoiding address-space
fragmentation. Replace it with a Kconfig option, which is sensibly
bounded, so that platform developers may choose where to place this
compromise. Keep 8 as the minimum acceptable value.

[arnd@...: ARM: avoid ARCH_MMAP_RND_BITS for NOMMU]
Signed-off-by: Daniel Cashman <dcashman@...>
Cc: Russell King <linux@...>
Acked-by: Kees Cook <keescook@...>
Cc: Ingo Molnar <mingo@...>
Cc: Jonathan Corbet <corbet@...>
Cc: Don Zickus <dzickus@...>
Cc: Eric W. Biederman <ebiederm@...>
Cc: Heinrich Schuchardt <xypron.glpk@...>
Cc: Josh Poimboeuf <jpoimboe@...>
Cc: Kirill A. Shutemov <kirill.shutemov@...>
Cc: Naoya Horiguchi <n-horiguchi@...>
Cc: Andrea Arcangeli <aarcange@...>
Cc: Mel Gorman <mgorman@...>
Cc: Thomas Gleixner <tglx@...>
Cc: David Rientjes <rientjes@...>
Cc: Mark Salyzyn <salyzyn@...>
Cc: Jeff Vander Stoep <jeffv@...>
Cc: Nick Kralevich <nnk@...>
Cc: Catalin Marinas <catalin.marinas@...>
Cc: Will Deacon <will.deacon@...>
Cc: "H. Peter Anvin" <hpa@...>
Cc: Hector Marco-Gisbert <hecmargi@...>
Cc: Borislav Petkov <bp@...>
Cc: Ralf Baechle <ralf@...>
Cc: Heiko Carstens <heiko.carstens@...>
Cc: Martin Schwidefsky <schwidefsky@...>
Cc: Benjamin Herrenschmidt <benh@...>
Signed-off-by: Arnd Bergmann <arnd@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
[bwh: Backported to 4.4: adjust context]
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
arch/arm/Kconfig | 9 +++++++++
arch/arm/mm/mmap.c | 3 +--
2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 34e1569a11ee..a1b8ca129a8a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@ config ARM
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
+ select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_TRACEHOOK
select HAVE_BPF_JIT
@@ -308,6 +309,14 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.

+config ARCH_MMAP_RND_BITS_MIN
+ default 8
+
+config ARCH_MMAP_RND_BITS_MAX
+ default 14 if PAGE_OFFSET=0x40000000
+ default 15 if PAGE_OFFSET=0x80000000
+ default 16
+
#
# The "ARM system type" choice list is ordered alphabetically by option
# text. Please add new entries in the option alphabetic order.
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 407dc786583a..4b4058db0781 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -173,8 +173,7 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;

- /* 8 bits of randomness in 20 address space bits */
- rnd = (unsigned long)get_random_int() % (1 << 8);
+ rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);

return rnd << PAGE_SHIFT;
}
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.


[PATCH 4.4-cip 1/6] mm: mmap: add new /proc tunable for mmap_base ASLR

Ben Hutchings <ben.hutchings@...>
 

From: Daniel Cashman <dcashman@...>

commit d07e22597d1d355829b7b18ac19afa912cf758d1 upstream.

Address Space Layout Randomization (ASLR) provides a barrier to
exploitation of user-space processes in the presence of security
vulnerabilities by making it more difficult to find desired code/data
which could help an attack. This is done by adding a random offset to
the location of regions in the process address space, with a greater
range of potential offset values corresponding to better protection/a
larger search-space for brute force, but also to greater potential for
fragmentation.

The offset added to the mmap_base address, which provides the basis for
the majority of the mappings for a process, is set once on process exec
in arch_pick_mmap_layout() and is done via hard-coded per-arch values,
which reflect, hopefully, the best compromise for all systems. The
trade-off between increased entropy in the offset value generation and
the corresponding increased variability in address space fragmentation
is not absolute, however, and some platforms may tolerate higher amounts
of entropy. This patch introduces both new Kconfig values and a sysctl
interface which may be used to change the amount of entropy used for
offset generation on a system.

The direct motivation for this change was in response to the
libstagefright vulnerabilities that affected Android, specifically to
information provided by Google's project zero at:

http://googleprojectzero.blogspot.com/2015/09/stagefrightened.html

The attack presented therein, by Google's project zero, specifically
targeted the limited randomness used to generate the offset added to the
mmap_base address in order to craft a brute-force-based attack.
Concretely, the attack was against the mediaserver process, which was
limited to respawning every 5 seconds, on an arm device. The hard-coded
8 bits used resulted in an average expected success rate of defeating
the mmap ASLR after just over 10 minutes (128 tries at 5 seconds a
piece). With this patch, and an accompanying increase in the entropy
value to 16 bits, the same attack would take an average expected time of
over 45 hours (32768 tries), which makes it both less feasible and more
likely to be noticed.

The introduced Kconfig and sysctl options are limited by per-arch
minimum and maximum values, the minimum of which was chosen to match the
current hard-coded value and the maximum of which was chosen so as to
give the greatest flexibility without generating an invalid mmap_base
address, generally a 3-4 bits less than the number of bits in the
user-space accessible virtual address space.

When decided whether or not to change the default value, a system
developer should consider that mmap_base address could be placed
anywhere up to 2^(value) bits away from the non-randomized location,
which would introduce variable-sized areas above and below the mmap_base
address such that the maximum vm_area_struct size may be reduced,
preventing very large allocations.

This patch (of 4):

ASLR only uses as few as 8 bits to generate the random offset for the
mmap base address on 32 bit architectures. This value was chosen to
prevent a poorly chosen value from dividing the address space in such a
way as to prevent large allocations. This may not be an issue on all
platforms. Allow the specification of a minimum number of bits so that
platforms desiring greater ASLR protection may determine where to place
the trade-off.

Signed-off-by: Daniel Cashman <dcashman@...>
Cc: Russell King <linux@...>
Acked-by: Kees Cook <keescook@...>
Cc: Ingo Molnar <mingo@...>
Cc: Jonathan Corbet <corbet@...>
Cc: Don Zickus <dzickus@...>
Cc: Eric W. Biederman <ebiederm@...>
Cc: Heinrich Schuchardt <xypron.glpk@...>
Cc: Josh Poimboeuf <jpoimboe@...>
Cc: Kirill A. Shutemov <kirill.shutemov@...>
Cc: Naoya Horiguchi <n-horiguchi@...>
Cc: Andrea Arcangeli <aarcange@...>
Cc: Mel Gorman <mgorman@...>
Cc: Thomas Gleixner <tglx@...>
Cc: David Rientjes <rientjes@...>
Cc: Mark Salyzyn <salyzyn@...>
Cc: Jeff Vander Stoep <jeffv@...>
Cc: Nick Kralevich <nnk@...>
Cc: Catalin Marinas <catalin.marinas@...>
Cc: Will Deacon <will.deacon@...>
Cc: "H. Peter Anvin" <hpa@...>
Cc: Hector Marco-Gisbert <hecmargi@...>
Cc: Borislav Petkov <bp@...>
Cc: Ralf Baechle <ralf@...>
Cc: Heiko Carstens <heiko.carstens@...>
Cc: Martin Schwidefsky <schwidefsky@...>
Cc: Benjamin Herrenschmidt <benh@...>
Signed-off-by: Andrew Morton <akpm@...>
Signed-off-by: Linus Torvalds <torvalds@...>
Signed-off-by: Ben Hutchings <ben.hutchings@...>
---
Documentation/sysctl/vm.txt | 29 +++++++++++++++++++
arch/Kconfig | 68 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm.h | 11 ++++++++
kernel/sysctl.c | 22 +++++++++++++++
mm/mmap.c | 12 ++++++++
5 files changed, 142 insertions(+)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index f72370b440b1..ee763f3d3b52 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -42,6 +42,8 @@ Currently, these files are in /proc/sys/vm:
- min_slab_ratio
- min_unmapped_ratio
- mmap_min_addr
+- mmap_rnd_bits
+- mmap_rnd_compat_bits
- nr_hugepages
- nr_overcommit_hugepages
- nr_trim_pages (only if CONFIG_MMU=n)
@@ -485,6 +487,33 @@ against future potential kernel bugs.

==============================================================

+mmap_rnd_bits:
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations on architectures which support
+tuning address space randomization. This value will be bounded
+by the architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_bits tunable
+
+==============================================================
+
+mmap_rnd_compat_bits:
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations for applications run in
+compatibility mode on architectures which support tuning address
+space randomization. This value will be bounded by the
+architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_compat_bits tunable
+
+==============================================================
+
nr_hugepages

Change the minimum size of the hugepage pool.
diff --git a/arch/Kconfig b/arch/Kconfig
index 4e949e58b192..ba1b626bca00 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -511,6 +511,74 @@ config ARCH_HAS_ELF_RANDOMIZE
- arch_mmap_rnd()
- arch_randomize_brk()

+config HAVE_ARCH_MMAP_RND_BITS
+ bool
+ help
+ An arch should select this symbol if it supports setting a variable
+ number of bits for use in establishing the base address for mmap
+ allocations, has MMU enabled and provides values for both:
+ - ARCH_MMAP_RND_BITS_MIN
+ - ARCH_MMAP_RND_BITS_MAX
+
+config ARCH_MMAP_RND_BITS_MIN
+ int
+
+config ARCH_MMAP_RND_BITS_MAX
+ int
+
+config ARCH_MMAP_RND_BITS_DEFAULT
+ int
+
+config ARCH_MMAP_RND_BITS
+ int "Number of bits to use for ASLR of mmap base address" if EXPERT
+ range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
+ default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
+ default ARCH_MMAP_RND_BITS_MIN
+ depends on HAVE_ARCH_MMAP_RND_BITS
+ help
+ This value can be used to select the number of bits to use to
+ determine the random offset to the base address of vma regions
+ resulting from mmap allocations. This value will be bounded
+ by the architecture's minimum and maximum supported values.
+
+ This value can be changed after boot using the
+ /proc/sys/vm/mmap_rnd_bits tunable
+
+config HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ bool
+ help
+ An arch should select this symbol if it supports running applications
+ in compatibility mode, supports setting a variable number of bits for
+ use in establishing the base address for mmap allocations, has MMU
+ enabled and provides values for both:
+ - ARCH_MMAP_RND_COMPAT_BITS_MIN
+ - ARCH_MMAP_RND_COMPAT_BITS_MAX
+
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+ int
+
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+ int
+
+config ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+ int
+
+config ARCH_MMAP_RND_COMPAT_BITS
+ int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
+ range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
+ default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+ default ARCH_MMAP_RND_COMPAT_BITS_MIN
+ depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ help
+ This value can be used to select the number of bits to use to
+ determine the random offset to the base address of vma regions
+ resulting from mmap allocations for compatible applications This
+ value will be bounded by the architecture's minimum and maximum
+ supported values.
+
+ This value can be changed after boot using the
+ /proc/sys/vm/mmap_rnd_compat_bits tunable
+
config HAVE_COPY_THREAD_TLS
bool
help
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0ffa01c90d9..ee18791531e4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -51,6 +51,17 @@ extern int sysctl_legacy_va_layout;
#define sysctl_legacy_va_layout 0
#endif

+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+extern const int mmap_rnd_bits_min;
+extern const int mmap_rnd_bits_max;
+extern int mmap_rnd_bits __read_mostly;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+extern const int mmap_rnd_compat_bits_min;
+extern const int mmap_rnd_compat_bits_max;
+extern int mmap_rnd_compat_bits __read_mostly;
+#endif
+
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 999e025bf68e..9bc9f56b8b3d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1568,6 +1568,28 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+ {
+ .procname = "mmap_rnd_bits",
+ .data = &mmap_rnd_bits,
+ .maxlen = sizeof(mmap_rnd_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_bits_min,
+ .extra2 = (void *)&mmap_rnd_bits_max,
+ },
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ {
+ .procname = "mmap_rnd_compat_bits",
+ .data = &mmap_rnd_compat_bits,
+ .maxlen = sizeof(mmap_rnd_compat_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_compat_bits_min,
+ .extra2 = (void *)&mmap_rnd_compat_bits_max,
+ },
+#endif
{ }
};

diff --git a/mm/mmap.c b/mm/mmap.c
index 455772a05e54..d99cef70ad18 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -58,6 +58,18 @@
#define arch_rebalance_pgtables(addr, len) (addr)
#endif

+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
+const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
+int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
+const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
+int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
+#endif
+
+
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
--
2.10.2



--
Ben Hutchings
Software Developer, Codethink Ltd.

10061 - 10080 of 10169