* [PATCH] Quick trial on tracing host inputs
@ 2022-03-08 8:56 Elena Reshetova
2022-03-08 11:34 ` Dan Carpenter
0 siblings, 1 reply; 4+ messages in thread
From: Elena Reshetova @ 2022-03-08 8:56 UTC (permalink / raw)
To: dan.carpenter; +Cc: smatch, Elena Reshetova
This is just a quick trial to trace inputs received
from the host/VMM in the same way as user inputs.
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
---
smatch_kernel_host_data.c | 1320 ++++++++++++++++++++++++++++++++++
smatch_points_to_host_data.c | 334 +++++++++
2 files changed, 1654 insertions(+)
create mode 100755 smatch_kernel_host_data.c
create mode 100755 smatch_points_to_host_data.c
diff --git a/smatch_kernel_host_data.c b/smatch_kernel_host_data.c
new file mode 100755
index 00000000..540875c5
--- /dev/null
+++ b/smatch_kernel_host_data.c
@@ -0,0 +1,1320 @@
+/*
+ * Copyright (C) 2011 Dan Carpenter.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
+ */
+
+/* Note: The below code is just a quick trial to modify the
+ * smatch_kernel_host_data.c to work on data received from a
+ * untrusted host/VMM.
+ * Similar as smatch_kernel_user_data.c it works with
+ * smatch_points_to_host_data.c code. It also uses some helper functions
+ * from the check_host_input.c pattern.
+ */
+
+#include "smatch.h"
+#include "smatch_slist.h"
+#include "smatch_extra.h"
+#include <math.h>
+
+const char *host_input_funcs[] = {
+ "inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32",
+ "ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep",
+ "ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio",
+ "mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw",
+ "__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq",
+ "lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed",
+ "readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu",
+ "rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read",
+ "native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read",
+ "io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32",
+ "__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64",
+ "virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu",
+ "__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu",
+ "read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte",
+ "pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word",
+ "pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte",
+ "pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword",
+ "pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read",
+ "serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob",
+ "acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read",
+ "acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read",
+ "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
+
+};
+
+
+static int my_id;
+static int my_call_id;
+
+STATE(called);
+static unsigned long func_gets_host_data;
+static struct stree *start_states;
+
+static void save_start_states(struct statement *stmt)
+{
+ start_states = clone_stree(__get_cur_stree());
+}
+
+static void free_start_states(void)
+{
+ free_stree(&start_states);
+}
+
+static struct smatch_state *empty_state(struct sm_state *sm)
+{
+ return alloc_estate_empty();
+}
+
+static struct smatch_state *new_state(struct symbol *type)
+{
+ struct smatch_state *state;
+
+ if (!type || type_is_ptr(type))
+ return NULL;
+
+ state = alloc_estate_whole(type);
+ estate_set_new(state);
+
+ return state;
+}
+
+static void pre_merge_hook(struct sm_state *cur, struct sm_state *other)
+{
+ struct smatch_state *user = cur->state;
+ struct smatch_state *extra;
+ struct smatch_state *state;
+ struct range_list *rl;
+
+ extra = __get_state(SMATCH_EXTRA, cur->name, cur->sym);
+ if (!extra)
+ return;
+ rl = rl_intersection(estate_rl(user), estate_rl(extra));
+ state = alloc_estate_rl(clone_rl(rl));
+ if (estate_capped(user) || is_capped_var_sym(cur->name, cur->sym))
+ estate_set_capped(state);
+ if (estate_treat_untagged(user))
+ estate_set_treat_untagged(state);
+ if (estates_equiv(state, cur->state))
+ return;
+ set_state(my_id, cur->name, cur->sym, state);
+}
+
+static void extra_nomod_hook(const char *name, struct symbol *sym, struct expression *expr, struct smatch_state *state)
+{
+ struct smatch_state *user, *new;
+ struct range_list *rl;
+
+ user = __get_state(my_id, name, sym);
+ if (!user)
+ return;
+
+ rl = rl_intersection(estate_rl(user), estate_rl(state));
+ if (rl_equiv(rl, estate_rl(user)))
+ return;
+ new = alloc_estate_rl(rl);
+ if (estate_capped(user))
+ estate_set_capped(new);
+ if (estate_treat_untagged(user))
+ estate_set_treat_untagged(new);
+ set_state(my_id, name, sym, new);
+}
+
+static void store_type_info(struct expression *expr, struct smatch_state *state)
+{
+ struct symbol *type;
+ char *type_str, *member;
+
+ if (__in_fake_assign)
+ return;
+
+ if (!estate_rl(state))
+ return;
+
+ expr = strip_expr(expr);
+ if (!expr || expr->type != EXPR_DEREF || !expr->member)
+ return;
+
+ type = get_type(expr->deref);
+ if (!type || !type->ident)
+ return;
+
+ type_str = type_to_str(type);
+ if (!type_str)
+ return;
+ member = get_member_name(expr);
+ if (!member)
+ return;
+
+ sql_insert_function_type_info(HOST_DATA, type_str, member, state->name);
+}
+
+static void set_host_data(struct expression *expr, struct smatch_state *state)
+{
+ store_type_info(expr, state);
+ set_state_expr(my_id, expr, state);
+}
+
+static bool host_rl_known(struct expression *expr)
+{
+ struct range_list *rl;
+ sval_t close_to_max;
+
+ if (!get_host_rl(expr, &rl))
+ return true;
+
+ close_to_max = sval_type_max(rl_type(rl));
+ close_to_max.value -= 100;
+
+ if (sval_cmp(rl_max(rl), close_to_max) >= 0)
+ return false;
+ return true;
+}
+
+static bool is_array_index_mask_nospec(struct expression *expr)
+{
+ struct expression *orig;
+
+ orig = get_assigned_expr(expr);
+ if (!orig || orig->type != EXPR_CALL)
+ return false;
+ return sym_name_is("array_index_mask_nospec", orig->fn);
+}
+
+static bool binop_capped(struct expression *expr)
+{
+ struct range_list *left_rl;
+ int comparison;
+ sval_t sval;
+
+ if (expr->op == '-' && get_host_rl(expr->left, &left_rl)) {
+ if (host_rl_capped(expr->left))
+ return true;
+ comparison = get_comparison(expr->left, expr->right);
+ if (comparison && show_special(comparison)[0] == '>')
+ return true;
+ return false;
+ }
+
+ if (expr->op == '&' || expr->op == '%') {
+ bool left_user, left_capped, right_user, right_capped;
+
+ if (!get_value(expr->right, &sval) && is_capped(expr->right))
+ return true;
+ if (is_array_index_mask_nospec(expr->right))
+ return true;
+ if (is_capped(expr->left))
+ return true;
+ left_user = is_host_rl(expr->left);
+ right_user = is_host_rl(expr->right);
+ if (!left_user && !right_user)
+ return true;
+
+ left_capped = host_rl_capped(expr->left);
+ right_capped = host_rl_capped(expr->right);
+
+ if (left_user && left_capped) {
+ if (!right_user)
+ return true;
+ if (right_user && right_capped)
+ return true;
+ return false;
+ }
+ if (right_user && right_capped) {
+ if (!left_user)
+ return true;
+ return false;
+ }
+ return false;
+ }
+
+ /*
+ * Generally "capped" means that we capped it to an unknown value.
+ * This is useful because if Smatch doesn't know what the value is then
+ * we have to trust that it is correct. But if we known cap value is
+ * 100 then we can check if 100 is correct and complain if it's wrong.
+ *
+ * So then the problem is with BINOP when we take a capped variable
+ * plus a user variable which is clamped to a known range (uncapped)
+ * the result should be capped.
+ */
+ if ((host_rl_capped(expr->left) || host_rl_known(expr->left)) &&
+ (host_rl_capped(expr->right) || host_rl_known(expr->right)))
+ return true;
+
+ return false;
+}
+
+bool host_rl_capped(struct expression *expr)
+{
+ struct smatch_state *state;
+ struct range_list *rl;
+ sval_t sval;
+
+ expr = strip_expr(expr);
+ if (!expr)
+ return false;
+ if (get_value(expr, &sval))
+ return true;
+ if (expr->type == EXPR_BINOP)
+ return binop_capped(expr);
+ if ((expr->type == EXPR_PREOP || expr->type == EXPR_POSTOP) &&
+ (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT))
+ return host_rl_capped(expr->unop);
+ state = get_state_expr(my_id, expr);
+ if (state)
+ return estate_capped(state);
+
+ if (!get_host_rl(expr, &rl)) {
+ /*
+ * The non host data parts of a binop are capped and
+ * also empty host rl states are capped.
+ */
+ return true;
+ }
+
+ if (rl_to_sval(rl, &sval))
+ return true;
+
+ return false; /* uncapped user data */
+}
+
+bool host_rl_treat_untagged(struct expression *expr)
+{
+ struct smatch_state *state;
+ struct range_list *rl;
+ sval_t sval;
+
+ expr = strip_expr(expr);
+ if (!expr)
+ return false;
+ if (get_value(expr, &sval))
+ return true;
+
+ state = get_state_expr(my_id, expr);
+ if (state)
+ return estate_treat_untagged(state);
+
+ if (get_host_rl(expr, &rl))
+ return false; /* uncapped host data */
+
+ return true; /* not actually host data */
+}
+
+static void tag_inner_struct_members(struct expression *expr, struct symbol *member)
+{
+ struct expression *edge_member;
+ struct symbol *base = get_real_base_type(member);
+ struct symbol *tmp;
+
+ if (member->ident)
+ expr = member_expression(expr, '.', member->ident);
+
+ FOR_EACH_PTR(base->symbol_list, tmp) {
+ struct symbol *type;
+
+ type = get_real_base_type(tmp);
+ if (!type)
+ continue;
+
+ if (type->type == SYM_UNION || type->type == SYM_STRUCT) {
+ tag_inner_struct_members(expr, tmp);
+ continue;
+ }
+
+ if (!tmp->ident)
+ continue;
+
+ edge_member = member_expression(expr, '.', tmp->ident);
+ set_host_data(edge_member, new_state(type));
+ } END_FOR_EACH_PTR(tmp);
+}
+
+static void tag_struct_members(struct symbol *type, struct expression *expr)
+{
+ struct symbol *tmp;
+ struct expression *member;
+ int op = '*';
+
+ if (expr->type == EXPR_PREOP && expr->op == '&') {
+ expr = strip_expr(expr->unop);
+ op = '.';
+ }
+
+ FOR_EACH_PTR(type->symbol_list, tmp) {
+ type = get_real_base_type(tmp);
+ if (!type)
+ continue;
+
+ if (type->type == SYM_UNION || type->type == SYM_STRUCT) {
+ tag_inner_struct_members(expr, tmp);
+ continue;
+ }
+
+ if (!tmp->ident)
+ continue;
+
+ member = member_expression(expr, op, tmp->ident);
+ if (type->type == SYM_ARRAY) {
+ set_points_to_host_data(member);
+ } else {
+ set_host_data(member, new_state(get_type(member)));
+ }
+ } END_FOR_EACH_PTR(tmp);
+}
+
+static void tag_base_type(struct expression *expr)
+{
+ if (expr->type == EXPR_PREOP && expr->op == '&')
+ expr = strip_expr(expr->unop);
+ else
+ expr = deref_expression(expr);
+ set_host_data(expr, new_state(get_type(expr)));
+}
+
+static void tag_as_host_data(struct expression *expr)
+{
+ struct symbol *type;
+
+ expr = strip_expr(expr);
+ type = get_type(expr);
+
+ if (!type || type->type != SYM_PTR)
+ return;
+
+ type = get_real_base_type(type);
+ if (!type)
+ return;
+ if (type == &void_ctype) {
+ set_host_data(deref_expression(expr), new_state(&ulong_ctype));
+ return;
+ }
+ if (type->type == SYM_BASETYPE) {
+ if (expr->type != EXPR_PREOP && expr->op != '&')
+ set_points_to_host_data(expr);
+ tag_base_type(expr);
+ return;
+ }
+ if (type->type == SYM_STRUCT || type->type == SYM_UNION) {
+ if (expr->type != EXPR_PREOP || expr->op != '&')
+ expr = deref_expression(expr);
+ else
+ set_host_data(deref_expression(expr), new_state(&ulong_ctype));
+ tag_struct_members(type, expr);
+ }
+}
+
+static void tag_argument(struct expression *expr, int arg)
+{
+ struct expression *dest;
+
+ dest = get_argument_from_call_expr(expr->args, arg);
+ dest = strip_expr(dest);
+ if (!dest)
+ return;
+ tag_as_host_data(dest);
+}
+
+extern uint get_arg_bitmask(struct expression *expr);
+static struct expression *ignore_param_set;
+extern bool is_ignored_func(struct expression *expr);
+
+static void match_host_input(const char *fn, struct expression *expr)
+{
+
+ uint arg_bitmask = 0;
+
+ if (!expr)
+ return;
+
+ arg_bitmask = get_arg_bitmask(expr);
+
+ if (!arg_bitmask) /* function returns host data, handled via match_returns_host_rl */
+ return;
+
+ if (is_ignored_func(expr))
+ return;
+
+ func_gets_host_data = true;
+ ignore_param_set = expr;
+
+ switch((uint)log2(arg_bitmask)) {
+ case 0xC:
+ tag_argument(expr, 2);
+ tag_argument(expr, 3);
+ break;
+ case 0x36:
+ tag_argument(expr, 1);
+ tag_argument(expr, 2);
+ tag_argument(expr, 3);
+ tag_argument(expr, 4);
+ break;
+ case 0x74:
+ tag_argument(expr, 2);
+ tag_argument(expr, 3);
+ tag_argument(expr, 4);
+ tag_argument(expr, 5);
+ break;
+ default:
+ tag_argument(expr, (uint)log2(arg_bitmask));
+ break;
+ }
+
+ return;
+}
+
+bool is_host_fn(char *fn_name)
+{
+ int i;
+
+ if (!fn_name)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(host_input_funcs); i++) {
+ if (strcmp(fn_name, host_input_funcs[i]) == 0) {
+// func_gets_user_data = true;
+ return true;
+ }
+ }
+ return false;
+}
+
+
+static int get_rl_from_function(struct expression *expr, struct range_list **rl)
+{
+
+ if (!expr)
+ return 0;
+
+ if (expr->type != EXPR_CALL || expr->fn->type != EXPR_SYMBOL ||
+ !expr->fn->symbol_name || !expr->fn->symbol_name->name)
+ return 0;
+
+ if (is_host_fn(expr->fn->symbol_name->name)){
+ *rl = alloc_whole_rl(get_type(expr));
+ return 1;
+ }
+
+ return 0;
+}
+
+static bool state_is_new(struct expression *expr)
+{
+ struct smatch_state *state;
+
+ state = get_state_expr(my_id, expr);
+ if (estate_new(state))
+ return true;
+
+ if (expr->type == EXPR_BINOP) {
+ if (state_is_new(expr->left))
+ return true;
+ if (state_is_new(expr->right))
+ return true;
+ }
+ return false;
+}
+
+static bool handle_op_assign(struct expression *expr)
+{
+ struct expression *binop_expr;
+ struct smatch_state *state;
+ struct range_list *rl;
+
+ switch (expr->op) {
+ case SPECIAL_ADD_ASSIGN:
+ case SPECIAL_SUB_ASSIGN:
+ case SPECIAL_AND_ASSIGN:
+ case SPECIAL_MOD_ASSIGN:
+ case SPECIAL_SHL_ASSIGN:
+ case SPECIAL_SHR_ASSIGN:
+ case SPECIAL_OR_ASSIGN:
+ case SPECIAL_XOR_ASSIGN:
+ case SPECIAL_MUL_ASSIGN:
+ case SPECIAL_DIV_ASSIGN:
+ binop_expr = binop_expression(expr->left,
+ op_remove_assign(expr->op),
+ expr->right);
+ if (!get_host_rl(binop_expr, &rl))
+ return true;
+
+ rl = cast_rl(get_type(expr->left), rl);
+ state = alloc_estate_rl(rl);
+ if (expr->op == SPECIAL_AND_ASSIGN ||
+ expr->op == SPECIAL_MOD_ASSIGN ||
+ host_rl_capped(binop_expr))
+ estate_set_capped(state);
+ if (host_rl_treat_untagged(expr->left))
+ estate_set_treat_untagged(state);
+ if (state_is_new(binop_expr))
+ estate_set_new(state);
+ set_host_data(expr->left, state);
+ return true;
+ }
+ return false;
+}
+
+static void match_assign_host(struct expression *expr)
+{
+ struct symbol *left_type, *right_type;
+ struct range_list *rl;
+ static struct expression *handled;
+ struct smatch_state *state;
+ struct expression *faked;
+ bool is_capped = false;
+ bool is_new = false;
+
+ if (!expr)
+ return;
+ left_type = get_type(expr->left);
+ if (left_type == &void_ctype)
+ return;
+
+ faked = get_faked_expression();
+
+ /* FIXME: handle fake array assignments frob(&user_array[x]); */
+
+ if (is_fake_call(expr->right) && faked &&
+ faked->type == EXPR_ASSIGNMENT &&
+ points_to_host_data(faked->right)) {
+ //if (is_skb_data(faked->right))
+ // func_gets_host_data = true;
+ rl = alloc_whole_rl(get_type(expr->left));
+ is_new = true;
+ goto set;
+ }
+
+ if (faked && faked == handled)
+ return;
+ if (is_fake_call(expr->right))
+ goto clear_old_state;
+ if (points_to_host_data(expr->right) &&
+ is_struct_ptr(get_type(expr->left))) {
+ handled = expr;
+ // This should be handled by smatch_points_to_user_data.c
+ //set_points_to_user_data(expr->left);
+ }
+
+ if (handle_op_assign(expr))
+ return;
+
+ if (expr->op != '=')
+ goto clear_old_state;
+
+ /* Handled by DB code */
+ if (expr->right->type == EXPR_CALL)
+ return;
+
+ if (!get_host_rl(expr->right, &rl))
+ goto clear_old_state;
+
+ is_capped = host_rl_capped(expr->right);
+ is_new = state_is_new(expr->right);
+
+set:
+ if (type_is_ptr(left_type)) {
+ right_type = get_type(expr->right);
+ if (right_type && right_type->type == SYM_ARRAY)
+ set_points_to_host_data(expr->left);
+ return;
+ }
+
+ rl = cast_rl(left_type, rl);
+ state = alloc_estate_rl(rl);
+ if (is_new)
+ estate_set_new(state);
+ if (is_capped)
+ estate_set_capped(state);
+ if (host_rl_treat_untagged(expr->right))
+ estate_set_treat_untagged(state);
+
+ set_host_data(expr->left, state);
+ return;
+
+clear_old_state:
+
+ /*
+ * HACK ALERT!!! This should be at the start of the function. The
+ * the problem is that handling "pointer = array;" assignments is
+ * handled in this function instead of in kernel_points_to_user_data.c.
+ */
+ if (type_is_ptr(left_type))
+ return;
+ if (get_state_expr(my_id, expr->left))
+ set_host_data(expr->left, alloc_estate_empty());
+}
+
+static void handle_eq_noteq(struct expression *expr)
+{
+ struct smatch_state *left_orig, *right_orig;
+
+ left_orig = get_state_expr(my_id, expr->left);
+ right_orig = get_state_expr(my_id, expr->right);
+
+ if (!left_orig && !right_orig)
+ return;
+ if (left_orig && right_orig)
+ return;
+
+ if (left_orig) {
+ set_true_false_states_expr(my_id, expr->left,
+ expr->op == SPECIAL_EQUAL ? alloc_estate_empty() : NULL,
+ expr->op == SPECIAL_EQUAL ? NULL : alloc_estate_empty());
+ } else {
+ set_true_false_states_expr(my_id, expr->right,
+ expr->op == SPECIAL_EQUAL ? alloc_estate_empty() : NULL,
+ expr->op == SPECIAL_EQUAL ? NULL : alloc_estate_empty());
+ }
+}
+
+static struct range_list *strip_negatives(struct range_list *rl)
+{
+ sval_t min = rl_min(rl);
+ sval_t minus_one = { .type = rl_type(rl), .value = -1 };
+ sval_t over = { .type = rl_type(rl), .value = INT_MAX + 1ULL };
+ sval_t max = sval_type_max(rl_type(rl));
+
+ if (!rl)
+ return NULL;
+
+ if (type_unsigned(rl_type(rl)) && type_bits(rl_type(rl)) > 31)
+ return remove_range(rl, over, max);
+
+ return remove_range(rl, min, minus_one);
+}
+
+static void handle_compare(struct expression *expr)
+{
+ struct expression *left, *right;
+ struct range_list *left_rl = NULL;
+ struct range_list *right_rl = NULL;
+ struct range_list *user_rl;
+ struct smatch_state *capped_state;
+ struct smatch_state *left_true = NULL;
+ struct smatch_state *left_false = NULL;
+ struct smatch_state *right_true = NULL;
+ struct smatch_state *right_false = NULL;
+ struct symbol *type;
+ sval_t sval;
+
+ left = strip_expr(expr->left);
+ right = strip_expr(expr->right);
+
+ while (left->type == EXPR_ASSIGNMENT)
+ left = strip_expr(left->left);
+
+ /*
+ * Conditions are mostly handled by smatch_extra.c, but there are some
+ * times where the exact values are not known so we can't do that.
+ *
+ * Normally, we might consider using smatch_capped.c to supliment smatch
+ * extra but that doesn't work when we merge unknown uncapped kernel
+ * data with unknown capped user data. The result is uncapped user
+ * data. We need to keep it separate and say that the user data is
+ * capped. In the past, I would have marked this as just regular
+ * kernel data (not user data) but we can't do that these days because
+ * we need to track user data for Spectre.
+ *
+ * The other situation which we have to handle is when we do have an
+ * int and we compare against an unknown unsigned kernel variable. In
+ * that situation we assume that the kernel data is less than INT_MAX.
+ * Otherwise then we get all sorts of array underflow false positives.
+ *
+ */
+
+ /* Handled in smatch_extra.c */
+ if (get_implied_value(left, &sval) ||
+ get_implied_value(right, &sval))
+ return;
+
+ get_host_rl(left, &left_rl);
+ get_host_rl(right, &right_rl);
+
+ /* nothing to do */
+ if (!left_rl && !right_rl)
+ return;
+ /* if both sides are user data that's not a good limit */
+ if (left_rl && right_rl)
+ return;
+
+ if (left_rl)
+ user_rl = left_rl;
+ else
+ user_rl = right_rl;
+
+ type = get_type(expr);
+ if (type_unsigned(type))
+ user_rl = strip_negatives(user_rl);
+ capped_state = alloc_estate_rl(user_rl);
+ estate_set_capped(capped_state);
+
+ switch (expr->op) {
+ case '<':
+ case SPECIAL_UNSIGNED_LT:
+ case SPECIAL_LTE:
+ case SPECIAL_UNSIGNED_LTE:
+ if (left_rl)
+ left_true = capped_state;
+ else
+ right_false = capped_state;
+ break;
+ case '>':
+ case SPECIAL_UNSIGNED_GT:
+ case SPECIAL_GTE:
+ case SPECIAL_UNSIGNED_GTE:
+ if (left_rl)
+ left_false = capped_state;
+ else
+ right_true = capped_state;
+ break;
+ }
+
+ set_true_false_states_expr(my_id, left, left_true, left_false);
+ set_true_false_states_expr(my_id, right, right_true, right_false);
+}
+
+static void match_condition_host(struct expression *expr)
+{
+ if (!expr)
+ return;
+ if (expr->type != EXPR_COMPARE)
+ return;
+
+ if (expr->op == SPECIAL_EQUAL ||
+ expr->op == SPECIAL_NOTEQUAL) {
+ handle_eq_noteq(expr);
+ return;
+ }
+ handle_compare(expr);
+}
+
+static void match_returns_host_rl(const char *fn, struct expression *expr, void *unused)
+{
+ func_gets_host_data = true;
+}
+
+static int has_host_data(struct symbol *sym)
+{
+ struct sm_state *tmp;
+
+ FOR_EACH_MY_SM(my_id, __get_cur_stree(), tmp) {
+ if (tmp->sym == sym)
+ return 1;
+ } END_FOR_EACH_SM(tmp);
+ return 0;
+}
+
+
+bool we_pass_host_data(struct expression *call)
+{
+ struct expression *arg;
+ struct symbol *sym;
+
+ FOR_EACH_PTR(call->args, arg) {
+ if (points_to_host_data(arg))
+ return true;
+ sym = expr_to_sym(arg);
+ if (!sym)
+ continue;
+ if (has_host_data(sym))
+ return true;
+ } END_FOR_EACH_PTR(arg);
+
+ return false;
+}
+
+static int db_returned_host_rl(struct expression *call, struct range_list **rl)
+{
+ struct smatch_state *state;
+ char buf[48];
+
+ if (is_fake_call(call))
+ return 0;
+ snprintf(buf, sizeof(buf), "return %p", call);
+ state = get_state(my_id, buf, NULL);
+ if (!state || !estate_rl(state))
+ return 0;
+ *rl = estate_rl(state);
+ return 1;
+}
+
+struct stree *get_host_stree(void)
+{
+ return get_all_states_stree(my_id);
+}
+
+static int host_data_flag;
+static int no_host_data_flag;
+
+struct range_list *var_host_rl(struct expression *expr)
+{
+ struct smatch_state *state;
+ struct range_list *rl;
+ struct range_list *absolute_rl;
+
+ if (expr->type == EXPR_PREOP && expr->op == '&') {
+ no_host_data_flag = 1;
+ return NULL;
+ }
+ if (expr->type == EXPR_BINOP && expr->op == '%') {
+ struct range_list *left, *right;
+
+ if (!get_host_rl(expr->right, &right))
+ return NULL;
+ get_absolute_rl(expr->left, &left);
+ rl = rl_binop(left, '%', right);
+ goto found;
+ }
+
+ if (expr->type == EXPR_BINOP && expr->op == '/') {
+ struct range_list *left = NULL;
+ struct range_list *right = NULL;
+ struct range_list *abs_right;
+
+ /*
+ * The specific bug I'm dealing with is:
+ *
+ * foo = capped_user / unknown;
+ *
+ * Instead of just saying foo is now entirely user_rl we should
+ * probably say instead that it is not at all user data.
+ *
+ */
+
+ get_host_rl(expr->left, &left);
+ get_host_rl(expr->right, &right);
+ get_absolute_rl(expr->right, &abs_right);
+
+ if (left && !right) {
+ rl = rl_binop(left, '/', abs_right);
+ if (sval_cmp(rl_max(left), rl_max(rl)) < 0)
+ no_host_data_flag = 1;
+ }
+
+ return NULL;
+ }
+
+ if (get_rl_from_function(expr, &rl))
+ goto found;
+
+ state = get_state_expr(my_id, expr);
+ if (state && estate_rl(state)) {
+ rl = estate_rl(state);
+ goto found;
+ }
+
+ if (expr->type == EXPR_CALL && db_returned_host_rl(expr, &rl))
+ goto found;
+
+ if (expr->type == EXPR_PREOP && expr->op == '*' &&
+ points_to_host_data(expr->unop)) {
+ rl = var_to_absolute_rl(expr);
+ goto found;
+ }
+
+ if (is_array(expr)) {
+ struct expression *array = get_array_base(expr);
+
+ if (!get_state_expr(my_id, array)) {
+ no_host_data_flag = 1;
+ return NULL;
+ }
+ }
+
+ return NULL;
+found:
+ host_data_flag = 1;
+ absolute_rl = var_to_absolute_rl(expr);
+ return clone_rl(rl_intersection(rl, absolute_rl));
+}
+
+static bool is_ptr_subtract(struct expression *expr)
+{
+ expr = strip_expr(expr);
+ if (!expr)
+ return false;
+ if (expr->type == EXPR_BINOP && expr->op == '-' &&
+ type_is_ptr(get_type(expr->left))) {
+ return true;
+ }
+ return false;
+}
+
+int get_host_rl(struct expression *expr, struct range_list **rl)
+{
+
+ if (is_ptr_subtract(expr))
+ return 0;
+ host_data_flag = 0;
+ no_host_data_flag = 0;
+ custom_get_absolute_rl(expr, &var_host_rl, rl);
+ if (!host_data_flag || no_host_data_flag)
+ *rl = NULL;
+ return !!*rl;
+}
+
+int is_host_rl(struct expression *expr)
+{
+ struct range_list *tmp;
+
+ return get_host_rl(expr, &tmp) && tmp;
+}
+
+int get_host_rl_var_sym(const char *name, struct symbol *sym, struct range_list **rl)
+{
+ struct smatch_state *state;
+
+ state = get_state(my_id, name, sym);
+ if (state && estate_rl(state)) {
+ *rl = estate_rl(state);
+ return 1;
+ }
+ return 0;
+}
+
+static void return_info_callback_host(int return_id, char *return_ranges,
+ struct expression *returned_expr,
+ int param,
+ const char *printed_name,
+ struct sm_state *sm)
+{
+ struct smatch_state *extra;
+ struct range_list *rl;
+ char buf[64];
+
+ if (param >= 0) {
+ if (strcmp(printed_name, "$") == 0)
+ return;
+ if (!param_was_set_var_sym(sm->name, sm->sym))
+ return;
+ }
+ rl = estate_rl(sm->state);
+ if (!rl)
+ return;
+ extra = get_state(SMATCH_EXTRA, sm->name, sm->sym);
+ if (estate_rl(extra))
+ rl = rl_intersection(estate_rl(sm->state), estate_rl(extra));
+ if (!rl)
+ return;
+
+ snprintf(buf, sizeof(buf), "%s%s%s",
+ show_rl(rl),
+ estate_capped(sm->state) ? "[c]" : "",
+ estate_treat_untagged(sm->state) ? "[h]" : "");
+ sql_insert_return_states(return_id, return_ranges,
+ estate_new(sm->state) ? HOST_DATA_SET : HOST_DATA,
+ param, printed_name, buf);
+}
+
+static void caller_info_callback_host(struct expression *call, int param, char *printed_name, struct sm_state *sm)
+{
+ struct smatch_state *state;
+ struct range_list *rl;
+ struct symbol *type;
+ char buf[64];
+
+ /*
+ * Smatch uses a hack where if we get an unsigned long we say it's
+ * both user data and it points to user data. But if we pass it to a
+ * function which takes an int, then it's just user data. There's not
+ * enough bytes for it to be a pointer.
+ *
+ */
+ type = get_arg_type(call->fn, param);
+ if (strcmp(printed_name, "$") != 0 && type && type_bits(type) < type_bits(&ptr_ctype))
+ return;
+
+ if (strcmp(sm->state->name, "") == 0)
+ return;
+
+ state = __get_state(SMATCH_EXTRA, sm->name, sm->sym);
+ if (!state || !estate_rl(state))
+ rl = estate_rl(sm->state);
+ else
+ rl = rl_intersection(estate_rl(sm->state), estate_rl(state));
+
+ if (!rl)
+ return;
+
+ snprintf(buf, sizeof(buf), "%s%s%s", show_rl(rl),
+ estate_capped(sm->state) ? "[c]" : "",
+ estate_treat_untagged(sm->state) ? "[h]" : "");
+ sql_insert_caller_info(call, HOST_DATA, param, printed_name, buf);
+}
+
+static void db_param_set(struct expression *expr, int param, char *key, char *value)
+{
+ struct expression *arg;
+ char *name;
+ struct symbol *sym;
+ struct smatch_state *state;
+ while (expr->type == EXPR_ASSIGNMENT)
+ expr = strip_expr(expr->right);
+ if (expr->type != EXPR_CALL)
+ return;
+ if (expr == ignore_param_set)
+ return;
+
+ arg = get_argument_from_call_expr(expr->args, param);
+ if (!arg)
+ return;
+ name = get_variable_from_key(arg, key, &sym);
+ if (!name || !sym)
+ goto free;
+ state = get_state(my_id, name, sym);
+ if (!state)
+ goto free;
+ set_state(my_id, name, sym, alloc_estate_empty());
+free:
+ free_string(name);
+}
+
+static bool param_data_capped(const char *value)
+{
+ if (strstr(value, ",c") || strstr(value, "[c"))
+ return true;
+ return false;
+}
+
+static bool param_data_treat_untagged(const char *value)
+{
+ if (strstr(value, ",h") || strstr(value, "[h"))
+ return true;
+ return false;
+}
+
+static void set_param_host_data(const char *name, struct symbol *sym, char *key, char *value)
+{
+ struct expression *expr;
+ struct range_list *rl = NULL;
+ struct smatch_state *state;
+ struct symbol *type;
+ char *fullname;
+
+ expr = symbol_expression(sym);
+ fullname = get_variable_from_key(expr, key, NULL);
+ if (!fullname)
+ return;
+
+ type = get_member_type_from_key(expr, key);
+ if (type && type->type == SYM_STRUCT)
+ return;
+
+ if (!type)
+ return;
+
+ str_to_rl(type, value, &rl);
+ rl = swap_mtag_seed(expr, rl);
+ state = alloc_estate_rl(rl);
+ if (param_data_capped(value) || is_capped(expr))
+ estate_set_capped(state);
+ if (param_data_treat_untagged(value) || sym->ctype.as == 5)
+ estate_set_treat_untagged(state);
+ set_state(my_id, fullname, sym, state);
+}
+
+static void set_called(const char *name, struct symbol *sym, char *key, char *value)
+{
+ set_state(my_call_id, "this_function", NULL, &called);
+}
+
+
+#define OLD 0
+#define NEW 1
+
+static void store_host_data_return(struct expression *expr, char *key, char *value, bool is_new)
+{
+ struct smatch_state *state;
+ struct range_list *rl;
+ struct symbol *type;
+ char buf[48];
+
+ if (key[0] != '$')
+ return;
+
+ type = get_type(expr);
+ snprintf(buf, sizeof(buf), "return %p%s", expr, key + 1);
+ call_results_to_rl(expr, type, value, &rl);
+
+ state = alloc_estate_rl(rl);
+ if (is_new)
+ estate_set_new(state);
+
+ set_state(my_id, buf, NULL, state);
+}
+
+static void set_to_host_data(struct expression *expr, char *key, char *value, bool is_new)
+{
+ struct smatch_state *state;
+ char *name;
+ struct symbol *sym;
+ struct symbol *type;
+ struct range_list *rl = NULL;
+
+ type = get_member_type_from_key(expr, key);
+ name = get_variable_from_key(expr, key, &sym);
+ if (!name || !sym)
+ goto free;
+
+ call_results_to_rl(expr, type, value, &rl);
+
+ state = alloc_estate_rl(rl);
+ if (param_data_capped(value))
+ estate_set_capped(state);
+ if (param_data_treat_untagged(value))
+ estate_set_treat_untagged(state);
+ if (is_new)
+ estate_set_new(state);
+ set_state(my_id, name, sym, state);
+free:
+ free_string(name);
+}
+
+static void returns_param_host_data(struct expression *expr, int param, char *key, char *value)
+{
+ struct expression *arg;
+ struct expression *call;
+
+ call = expr;
+ while (call->type == EXPR_ASSIGNMENT)
+ call = strip_expr(call->right);
+ if (call->type != EXPR_CALL)
+ return;
+
+ if (!we_pass_host_data(call))
+ return;
+
+ if (param == -1) {
+ if (expr->type != EXPR_ASSIGNMENT) {
+ store_host_data_return(expr, key, value, OLD);
+ return;
+ }
+ set_to_host_data(expr->left, key, value, OLD);
+ return;
+ }
+
+ arg = get_argument_from_call_expr(call->args, param);
+ if (!arg)
+ return;
+ set_to_host_data(arg, key, value, OLD);
+}
+
+static void returns_param_host_data_set(struct expression *expr, int param, char *key, char *value)
+{
+ struct expression *arg;
+
+ func_gets_host_data = true;
+
+ if (param == -1) {
+ if (expr->type != EXPR_ASSIGNMENT) {
+ store_host_data_return(expr, key, value, NEW);
+ return;
+ }
+ set_to_host_data(expr->left, key, value, NEW);
+ return;
+ }
+
+ while (expr->type == EXPR_ASSIGNMENT)
+ expr = strip_expr(expr->right);
+ if (expr->type != EXPR_CALL)
+ return;
+
+ arg = get_argument_from_call_expr(expr->args, param);
+ if (!arg)
+ return;
+ set_to_host_data(arg, key, value, NEW);
+}
+
+static void returns_param_capped_host(struct expression *expr, int param, char *key, char *value)
+{
+ struct smatch_state *state, *new;
+ struct symbol *sym;
+ char *name;
+
+ name = get_name_sym_from_param_key(expr, param, key, &sym);
+ if (!name || !sym)
+ goto free;
+
+ state = get_state(my_id, name, sym);
+ if (!state || estate_capped(state))
+ goto free;
+
+ new = clone_estate(state);
+ estate_set_capped(new);
+
+ set_state(my_id, name, sym, new);
+free:
+ free_string(name);
+}
+
+static void match_function_def(struct symbol *sym)
+{
+ if (is_host_fn(sym->ident->name))
+ func_gets_host_data = true;
+}
+
+void register_host_input_funcs(const void* fun_ptr)
+{
+ for (int i = 0; i < ARRAY_SIZE(host_input_funcs); i++)
+ add_function_hook(host_input_funcs[i], fun_ptr, NULL);
+}
+
+void register_kernel_host_data(int id)
+{
+ my_id = id;
+
+ if (option_project != PROJ_KERNEL)
+ return;
+
+ set_dynamic_states(my_id);
+
+ add_function_data(&func_gets_host_data);
+ add_hook(&match_function_def, FUNC_DEF_HOOK);
+
+ add_hook(&save_start_states, AFTER_DEF_HOOK);
+ add_hook(&free_start_states, AFTER_FUNC_HOOK);
+ add_function_data((unsigned long *)&start_states);
+
+ add_unmatched_state_hook(my_id, &empty_state);
+ add_extra_nomod_hook(&extra_nomod_hook);
+ add_pre_merge_hook(my_id, &pre_merge_hook);
+ add_merge_hook(my_id, &merge_estates);
+
+ register_host_input_funcs(&match_host_input);
+ register_host_input_funcs(&match_returns_host_rl);
+
+ add_hook(&match_assign_host, ASSIGNMENT_HOOK);
+ select_return_states_hook(PARAM_SET, &db_param_set);
+ add_hook(&match_condition_host, CONDITION_HOOK);
+
+ add_caller_info_callback(my_id, caller_info_callback_host);
+ add_return_info_callback(my_id, return_info_callback_host);
+ select_caller_info_hook(set_param_host_data, HOST_DATA);
+ select_return_states_hook(HOST_DATA, &returns_param_host_data);
+ select_return_states_hook(HOST_DATA_SET, &returns_param_host_data_set);
+ select_return_states_hook(CAPPED_DATA, &returns_param_capped_host);
+
+
+}
+
+void register_kernel_host_data2(int id)
+{
+ my_call_id = id;
+
+ if (option_project != PROJ_KERNEL)
+ return;
+
+ select_caller_info_hook(set_called, INTERNAL);
+}
+
diff --git a/smatch_points_to_host_data.c b/smatch_points_to_host_data.c
new file mode 100755
index 00000000..58e516d2
--- /dev/null
+++ b/smatch_points_to_host_data.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2020 Oracle.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
+ */
+
+/* Note: The below code is just a quick trial to modify the
+ * smatch_points_to_user_data.c to work on data received from a
+ * untrusted host/VMM.
+ * Similar as smatch_points_to_host_data.c it works with
+ * smatch_kernel_host_data.c code. It also uses some helper functions
+ * from the check_host_input.c pattern.
+ */
+
+#include "smatch.h"
+#include "smatch_slist.h"
+#include "smatch_extra.h"
+#include <math.h>
+
+static int my_id;
+STATE(host_data);
+
+
+extern bool is_host_fn(char *fn_name);
+extern uint get_arg_bitmask(struct expression *expr);
+
+
+static bool is_points_to_host_data_fn(struct expression *expr)
+{
+ if (!expr)
+ return false;
+
+ expr = strip_expr(expr);
+ if (!expr || expr->type != EXPR_CALL || expr->fn->type != EXPR_SYMBOL ||
+ !expr->fn->symbol)
+ return false;
+ return is_host_fn(expr->fn->symbol->ident->name);
+}
+
+static bool is_array_of_host_data(struct expression *expr)
+{
+ struct expression *deref;
+ struct symbol *type;
+
+ if (expr->type == EXPR_PREOP && expr->op == '&') {
+ expr = strip_expr(expr->unop);
+ if (expr->type == EXPR_PREOP && expr->op == '*')
+ expr = strip_expr(expr->unop);
+ }
+
+ /* This is for array elements &foo->data[4] */
+ if (expr->type == EXPR_BINOP && expr->op == '+') {
+ if (points_to_host_data(expr->left))
+ return true;
+ if (points_to_host_data(expr->right))
+ return true;
+ }
+
+ /* This is for if you have: foo = skb->data; frob(foo->array); */
+ type = get_type(expr);
+ if (!type || type->type != SYM_ARRAY)
+ return false;
+
+ if (expr->type != EXPR_DEREF)
+ return false;
+ deref = strip_expr(expr->deref);
+ if (deref->type != EXPR_PREOP || deref->op != '*')
+ return false;
+ deref = strip_expr(deref->unop);
+ return points_to_host_data(deref);
+}
+
+bool points_to_host_data(struct expression *expr)
+{
+ struct sm_state *sm;
+
+ if (!expr)
+ return false;
+
+ expr = strip_expr(expr);
+ if (!expr)
+ return false;
+
+ if (is_fake_call(expr))
+ return false;
+
+ if (expr->type == EXPR_ASSIGNMENT)
+ return points_to_host_data(expr->left);
+
+ if (is_array_of_host_data(expr))
+ return true;
+
+ if (expr->type == EXPR_BINOP && expr->op == '+')
+ expr = strip_expr(expr->left);
+
+ if (is_points_to_host_data_fn(expr))
+ return true;
+
+ sm = get_sm_state_expr(my_id, expr);
+ if (sm && slist_has_state(sm->possible, &host_data)) {
+ return true;
+ }
+ return false;
+}
+
+void set_points_to_host_data(struct expression *expr)
+{
+ set_state_expr(my_id, expr, &host_data);
+}
+
+static void match_assign_host(struct expression *expr)
+{
+
+ if (is_fake_call(expr->right))
+ return;
+
+ if (!is_ptr_type(get_type(expr->left))){
+ return;
+ }
+
+ if (points_to_host_data(expr->right)) {
+ set_points_to_host_data(expr->left);
+ return;
+ }
+
+
+ if (get_state_expr(my_id, expr->left)){
+ set_state_expr(my_id, expr->left, &undefined);
+ }
+}
+
+static void match_memcpy_host(const char *fn, struct expression *expr, void *_unused)
+{
+ struct expression *dest, *src;
+
+ dest = get_argument_from_call_expr(expr->args, 0);
+ src = get_argument_from_call_expr(expr->args, 1);
+
+ if (points_to_host_data(src)) {
+ set_points_to_host_data(dest);
+ return;
+ }
+
+ if (get_state_expr(my_id, dest))
+ set_state_expr(my_id, dest, &undefined);
+}
+
+
+static void set_state_expr_arg(struct expression *expr, int arg)
+{
+ struct expression *dest;
+
+ dest = get_argument_from_call_expr(expr->args, arg);
+ dest = strip_expr(dest);
+ if (!dest)
+ return;
+ /* this needs fixing: i have not been checking sizes of
+ the arguments before */
+ /*size = get_argument_from_call_expr(expr->args, 2);
+ if (get_implied_value(size, &sval))
+ return;*/
+
+ set_state_expr(my_id, dest, &host_data);
+}
+
+static void match_host_function(const char *fn, struct expression *expr, void *_unused)
+{
+ uint arg_bitmask = 0;
+
+ if (!expr)
+ return;
+
+ arg_bitmask = get_arg_bitmask(expr);
+
+ if (!arg_bitmask) /* function returns host data, nothing to do here */
+ return;
+
+ switch((uint)log2(arg_bitmask)) {
+ case 0xC:
+ set_state_expr_arg(expr, 2);
+ set_state_expr_arg(expr, 3);
+ break;
+ case 0x36:
+ set_state_expr_arg(expr, 1);
+ set_state_expr_arg(expr, 2);
+ set_state_expr_arg(expr, 3);
+ set_state_expr_arg(expr, 4);
+ break;
+ case 0x74:
+ set_state_expr_arg(expr, 2);
+ set_state_expr_arg(expr, 3);
+ set_state_expr_arg(expr, 4);
+ set_state_expr_arg(expr, 5);
+ break;
+ default:
+ set_state_expr_arg(expr, (uint)log2(arg_bitmask));
+ break;
+ }
+
+ return;
+
+}
+
+
+static void return_info_callback_host(int return_id, char *return_ranges,
+ struct expression *returned_expr,
+ int param,
+ const char *printed_name,
+ struct sm_state *sm)
+{
+ int type = HOST_PTR_SET;
+
+ if (!slist_has_state(sm->possible, &host_data))
+ return;
+
+ if (param >= 0) {
+ if (get_state_stree(get_start_states(), my_id, sm->name, sm->sym))
+ return;
+ } else {
+ if (!param_was_set_var_sym(sm->name, sm->sym))
+ type = HOST_PTR;
+ }
+ if (parent_is_gone_var_sym(sm->name, sm->sym))
+ return;
+
+ sql_insert_return_states(return_id, return_ranges, type,
+ param, printed_name, "");
+}
+
+static void returns_host_ptr_helper(struct expression *expr, int param, char *key, char *value, bool set)
+{
+ struct expression *arg;
+ struct expression *call;
+ char *name;
+ struct symbol *sym;
+
+ call = expr;
+ while (call->type == EXPR_ASSIGNMENT)
+ call = strip_expr(call->right);
+ if (call->type != EXPR_CALL)
+ return;
+
+ if (!set && !we_pass_host_data(call))
+ return;
+
+ if (param == -1) {
+ if (expr->type != EXPR_ASSIGNMENT) {
+ /* Nothing to do. Fake assignments should handle it */
+ return;
+ }
+ arg = expr->left;
+ goto set_user;
+ }
+
+ arg = get_argument_from_call_expr(call->args, param);
+ if (!arg)
+ return;
+set_user:
+ name = get_variable_from_key(arg, key, &sym);
+ if (!name || !sym)
+ goto free;
+ set_state(my_id, name, sym, &host_data);
+free:
+ free_string(name);
+
+}
+
+static void returns_host_ptr(struct expression *expr, int param, char *key, char *value)
+{
+ returns_host_ptr_helper(expr, param, key, value, false);
+}
+
+static void returns_host_ptr_set(struct expression *expr, int param, char *key, char *value)
+{
+ returns_host_ptr_helper(expr, param, key, value, true);
+}
+
+static void set_param_host_ptr(const char *name, struct symbol *sym, char *key, char *value)
+{
+ struct expression *expr;
+ char *fullname;
+
+ expr = symbol_expression(sym);
+ fullname = get_variable_from_key(expr, key, NULL);
+ if (!fullname)
+ return;
+ set_state(my_id, fullname, sym, &host_data);
+
+}
+
+static void caller_info_callback_host(struct expression *call, int param, char *printed_name, struct sm_state *sm)
+{
+
+ if (!slist_has_state(sm->possible, &host_data))
+ return;
+ sql_insert_caller_info(call, HOST_PTR, param, printed_name, "");
+
+}
+
+extern void register_host_input_funcs(const void* fun_ptr);
+
+void register_points_to_host_data(int id)
+{
+ my_id = id;
+
+ if (option_project != PROJ_KERNEL)
+ return;
+
+ add_hook(&match_assign_host, ASSIGNMENT_HOOK);
+
+ register_host_input_funcs(&match_host_function);
+
+ add_function_hook("memcpy", &match_memcpy_host, NULL);
+ add_function_hook("__memcpy", &match_memcpy_host, NULL);
+
+ add_caller_info_callback(my_id, caller_info_callback_host);
+ add_return_info_callback(my_id, return_info_callback_host);
+
+ select_caller_info_hook(set_param_host_ptr, HOST_PTR);
+ select_return_states_hook(HOST_PTR, &returns_host_ptr);
+ select_return_states_hook(HOST_PTR_SET, &returns_host_ptr_set);
+
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] Quick trial on tracing host inputs
2022-03-08 8:56 [PATCH] Quick trial on tracing host inputs Elena Reshetova
@ 2022-03-08 11:34 ` Dan Carpenter
2022-03-08 11:37 ` Dan Carpenter
2022-03-08 12:38 ` Dan Carpenter
0 siblings, 2 replies; 4+ messages in thread
From: Dan Carpenter @ 2022-03-08 11:34 UTC (permalink / raw)
To: Elena Reshetova; +Cc: smatch
On Tue, Mar 08, 2022 at 10:56:30AM +0200, Elena Reshetova wrote:
> This is just a quick trial to trace inputs received
> from the host/VMM in the same way as user inputs.
>
> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> ---
> smatch_kernel_host_data.c | 1320 ++++++++++++++++++++++++++++++++++
> smatch_points_to_host_data.c | 334 +++++++++
The changes to smatch.h and check_list.h are missing.
> 2 files changed, 1654 insertions(+)
> create mode 100755 smatch_kernel_host_data.c
> create mode 100755 smatch_points_to_host_data.c
>
> diff --git a/smatch_kernel_host_data.c b/smatch_kernel_host_data.c
> new file mode 100755
> index 00000000..540875c5
> --- /dev/null
> +++ b/smatch_kernel_host_data.c
> @@ -0,0 +1,1320 @@
> +/*
> + * Copyright (C) 2011 Dan Carpenter.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
> + */
> +
> +/* Note: The below code is just a quick trial to modify the
> + * smatch_kernel_host_data.c to work on data received from a
> + * untrusted host/VMM.
> + * Similar as smatch_kernel_user_data.c it works with
> + * smatch_points_to_host_data.c code. It also uses some helper functions
> + * from the check_host_input.c pattern.
> + */
> +
> +#include "smatch.h"
> +#include "smatch_slist.h"
> +#include "smatch_extra.h"
> +#include <math.h>
> +
> +const char *host_input_funcs[] = {
> + "inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32",
> + "ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep",
> + "ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio",
> + "mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw",
> + "__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq",
> + "lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed",
> + "readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu",
> + "rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read",
> + "native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read",
> + "io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32",
> + "__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64",
> + "virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu",
> + "__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu",
> + "read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte",
> + "pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word",
> + "pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte",
> + "pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword",
> + "pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read",
> + "serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob",
> + "acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read",
> + "acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read",
> + "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
> +
> +};
> +
> +
> +static int my_id;
> +static int my_call_id;
> +
> +STATE(called);
This state is not used here. Even in smatch_kernel_user_data.c it
should be moved out of there into a separate file...
> +static unsigned long func_gets_host_data;
> +static struct stree *start_states;
> +
> +static void save_start_states(struct statement *stmt)
> +{
> + start_states = clone_stree(__get_cur_stree());
> +}
> +
> +static void free_start_states(void)
> +{
> + free_stree(&start_states);
> +}
> +
No need to do this these days. It's stored in get_start_states().
> +static struct smatch_state *empty_state(struct sm_state *sm)
> +{
> + return alloc_estate_empty();
> +}
> +
> +static struct smatch_state *new_state(struct symbol *type)
> +{
> + struct smatch_state *state;
> +
> + if (!type || type_is_ptr(type))
> + return NULL;
> +
> + state = alloc_estate_whole(type);
> + estate_set_new(state);
> +
> + return state;
This code is supposed to differentiate between places where we return
user data because it was passed to us or we got fresh user data inside
the function. In smatch_kernel_user_data.c it's only a single question
"did we recieve user data? Y/N." But it should be "Was variable foo->bar
user data."
I would probably pull returned user data out into a separate file these
days. And have a different file for if a variable currently holds user
data or if we pass user data to a function.
> +}
> +
> +static void pre_merge_hook(struct sm_state *cur, struct sm_state *other)
> +{
> + struct smatch_state *user = cur->state;
> + struct smatch_state *extra;
> + struct smatch_state *state;
> + struct range_list *rl;
> +
> + extra = __get_state(SMATCH_EXTRA, cur->name, cur->sym);
> + if (!extra)
> + return;
> + rl = rl_intersection(estate_rl(user), estate_rl(extra));
> + state = alloc_estate_rl(clone_rl(rl));
> + if (estate_capped(user) || is_capped_var_sym(cur->name, cur->sym))
> + estate_set_capped(state);
> + if (estate_treat_untagged(user))
> + estate_set_treat_untagged(state);
Tagged is for ARM tagged pointers. It's probably not something you
need. Search for "tag" and delete.
> + if (estates_equiv(state, cur->state))
> + return;
> + set_state(my_id, cur->name, cur->sym, state);
> +}
> +
[ snip ]
> +
> +extern uint get_arg_bitmask(struct expression *expr);
> +static struct expression *ignore_param_set;
> +extern bool is_ignored_func(struct expression *expr);
> +
> +static void match_host_input(const char *fn, struct expression *expr)
> +{
> +
> + uint arg_bitmask = 0;
> +
> + if (!expr)
> + return;
> +
> + arg_bitmask = get_arg_bitmask(expr);
> +
> + if (!arg_bitmask) /* function returns host data, handled via match_returns_host_rl */
> + return;
> +
> + if (is_ignored_func(expr))
> + return;
> +
> + func_gets_host_data = true;
> + ignore_param_set = expr;
> +
> + switch((uint)log2(arg_bitmask)) {
> + case 0xC:
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + break;
> + case 0x36:
> + tag_argument(expr, 1);
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + tag_argument(expr, 4);
> + break;
> + case 0x74:
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + tag_argument(expr, 4);
> + tag_argument(expr, 5);
> + break;
> + default:
> + tag_argument(expr, (uint)log2(arg_bitmask));
> + break;
> + }
> +
> + return;
> +}
This function would be better re-written with param key API. Sorry
that Smatch doesn't have documentation. :( An example, of how that
works is in check_unwind.c, instead of a bitmap you would just have
multiple entries in the table.
I'm typing directly into my email client so this might not compile...
struct host_fn_info {
const char *name;
int type;
int param;
const char *key;
const sval_t *implies_start, *implies_end;
func_hook *call_back;
};
static struct host_fn_info func_table[] = {
{ "memcpy_fromio", HOST_DATA, 0, "*$" },
...
{ "cpuid", HOST_DATA, 1, "*$" },
{ "cpuid", HOST_DATA, 2, "*$" },
{ "cpuid", HOST_DATA, 3, "*$" },
{ "cpuid", HOST_DATA, 4, "*$" },
};
static void set_param_host_data(struct expression *expr, const char *name, struct symbol *sym, void *data)
{
struct expression *arg;
struct range_list *rl;
arg = gen_expression_from_name_sym(name, sym);
if (strcmp(key, "*$") == 0) {
tag_as_user_data(arg);
return;
}
// make sure that smatch_extra was run first
get_absolute_rl(arg, &rl);
set_state(my_id, name, sym, alloc_estate_rl(rl));
}
The code you have should work though and it should propagate.
What we need is some simple test cases.
//---
void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
unsigned int a, b, c, d e;
unsigned int test(void)
{
cpuid(0, &a, &b, &c, &d);
__smatch_states("host");
return a;
}
unsigned int test2(void)
{
unsigned int x = test();
__smatch_states("host");
}
regards,
dan carpenter
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Quick trial on tracing host inputs
2022-03-08 11:34 ` Dan Carpenter
@ 2022-03-08 11:37 ` Dan Carpenter
2022-03-08 12:38 ` Dan Carpenter
1 sibling, 0 replies; 4+ messages in thread
From: Dan Carpenter @ 2022-03-08 11:37 UTC (permalink / raw)
To: Elena Reshetova; +Cc: smatch
On Tue, Mar 08, 2022 at 02:34:16PM +0300, Dan Carpenter wrote:
>
> //---
The test case needs to #include "check_debug.h"
>
> void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
> unsigned int *ecx, unsigned int *edx);
>
> unsigned int a, b, c, d e;
> unsigned int test(void)
> {
> cpuid(0, &a, &b, &c, &d);
> __smatch_states("host");
> return a;
> }
>
> unsigned int test2(void)
> {
> unsigned int x = test();
> __smatch_states("host");
> }
>
> regards,
> dan carpenter
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Quick trial on tracing host inputs
2022-03-08 11:34 ` Dan Carpenter
2022-03-08 11:37 ` Dan Carpenter
@ 2022-03-08 12:38 ` Dan Carpenter
1 sibling, 0 replies; 4+ messages in thread
From: Dan Carpenter @ 2022-03-08 12:38 UTC (permalink / raw)
To: Elena Reshetova; +Cc: smatch
On Tue, Mar 08, 2022 at 02:34:16PM +0300, Dan Carpenter wrote:
> void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
> unsigned int *ecx, unsigned int *edx);
>
> unsigned int a, b, c, d e;
> unsigned int test(void)
> {
> cpuid(0, &a, &b, &c, &d);
> __smatch_states("host");
> return a;
> }
>
> unsigned int test2(void)
> {
> unsigned int x = test();
> __smatch_states("host");
> }
I had to hack it a bit to get it to compile but my test seems to work.
"x" is correctl marked as host data correctly in test2().
regards,
dan carpenter
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-03-08 12:40 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-08 8:56 [PATCH] Quick trial on tracing host inputs Elena Reshetova
2022-03-08 11:34 ` Dan Carpenter
2022-03-08 11:37 ` Dan Carpenter
2022-03-08 12:38 ` Dan Carpenter
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.