From ceafd85298e8f4f6bf0a436a05f372110b442ce6 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Jan 2018 15:28:53 +0000 Subject: [PATCH] Java string solver: ensure base types are loaded This adds a mechanism to the Java frontend class loader for its caller to advise it of extra class dependencies to load, and uses it from the String preprocessor to indicate that since it will add base classes to String, StringBuilder etc, then the class loader should create symbols for those bases. This restores the invariant that holds when --string-refine is not passed, that if a class references another as a base, then that other class has an entry in the symbol table (although that entry may be incomplete). --- src/java_bytecode/java_bytecode_language.cpp | 10 ++++ src/java_bytecode/java_class_loader.cpp | 20 +++++++ src/java_bytecode/java_class_loader.h | 7 +++ .../java_string_library_preprocess.cpp | 52 ++++++++++++++----- .../java_string_library_preprocess.h | 3 ++ 5 files changed, 78 insertions(+), 14 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 4c12e0b362f..2fd1173d034 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -147,6 +147,16 @@ bool java_bytecode_languaget::parse( java_class_loader.set_message_handler(get_message_handler()); java_class_loader.set_java_cp_include_files(java_cp_include_files); java_class_loader.add_load_classes(java_load_classes); + if(string_refinement_enabled) + { + string_preprocess.initialize_known_type_table(); + + auto get_string_base_classes = [this](const irep_idt &id) { // NOLINT (*) + return string_preprocess.get_string_type_base_classes(id); + }; + + java_class_loader.set_extra_class_refs_function(get_string_base_classes); + } // look at extension if(has_suffix(path, ".class")) diff --git a/src/java_bytecode/java_class_loader.cpp b/src/java_bytecode/java_class_loader.cpp index 838d47f9970..f4c2ffb5d88 100644 --- a/src/java_bytecode/java_class_loader.cpp +++ b/src/java_bytecode/java_class_loader.cpp @@ -72,6 +72,16 @@ java_bytecode_parse_treet &java_class_loadert::operator()( it!=parse_tree.class_refs.end(); it++) queue.push(*it); + + // Add any extra dependencies provided by our caller: + if(get_extra_class_refs) + { + std::vector extra_class_refs = + get_extra_class_refs(c); + + for(const irep_idt &id : extra_class_refs) + queue.push(id); + } } return class_map[class_name]; @@ -83,6 +93,16 @@ void java_class_loadert::set_java_cp_include_files( java_cp_include_files=_java_cp_include_files; } +/// Sets a function that provides extra dependencies for a particular class. +/// Currently used by the string preprocessor to note that even if we don't +/// have a definition of core string types, it will nontheless give them +/// certain known superclasses and interfaces, such as Serializable. +void java_class_loadert::set_extra_class_refs_function( + java_class_loadert::get_extra_class_refs_functiont func) +{ + get_extra_class_refs = func; +} + /// Retrieves a class file from a jar and loads it into the tree bool java_class_loadert::get_class_file( java_class_loader_limitt &class_loader_limit, diff --git a/src/java_bytecode/java_class_loader.h b/src/java_bytecode/java_class_loader.h index 0fbacb26a65..58241e6536a 100644 --- a/src/java_bytecode/java_class_loader.h +++ b/src/java_bytecode/java_class_loader.h @@ -34,6 +34,12 @@ class java_class_loadert:public messaget void set_java_cp_include_files(std::string &); void add_load_classes(const std::vector &); + /// A function that yields a list of extra dependencies based on a class name. + typedef std::function(const irep_idt &)> + get_extra_class_refs_functiont; + + void set_extra_class_refs_function(get_extra_class_refs_functiont func); + static std::string file_to_class_name(const std::string &); static std::string class_name_to_file(const irep_idt &); @@ -133,6 +139,7 @@ class java_class_loadert:public messaget private: std::map m_archives; std::vector java_load_classes; + get_extra_class_refs_functiont get_extra_class_refs; }; #endif // CPROVER_JAVA_BYTECODE_JAVA_CLASS_LOADER_H diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index 0d57077964f..936ea85ac5c 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -186,6 +186,34 @@ typet string_length_type() return java_int_type(); } +/// Gets the base classes for known String and String-related types, or returns +/// an empty list for other types. +/// \param class_name: class identifier, without "java::" prefix. +/// \return a list of base classes, again without "java::" prefix. +std::vector +java_string_library_preprocesst::get_string_type_base_classes( + const irep_idt &class_name) +{ + if(!is_known_string_type(class_name)) + return {}; + + std::vector bases; + bases.reserve(3); + if(class_name != "java.lang.CharSequence") + { + bases.push_back("java.io.Serializable"); + bases.push_back("java.lang.CharSequence"); + } + if(class_name == "java.lang.String") + bases.push_back("java.lang.Comparable"); + + if(class_name == "java.lang.StringBuilder" || + class_name == "java.lang.StringBuffer") + bases.push_back("java.lang.AbstractStringBuilder"); + + return bases; +} + /// Add to the symbol table type declaration for a String-like Java class. /// \param class_name: a name for the class such as "java.lang.String" /// \param symbol_table: symbol table to which the class will be added @@ -206,17 +234,10 @@ void java_string_library_preprocesst::add_string_type( string_type.components()[2].type() = pointer_type(java_char_type()); string_type.set_access(ID_public); string_type.add_base(symbol_typet("java::java.lang.Object")); - if(class_name!="java.lang.CharSequence") - { - string_type.add_base(symbol_typet("java::java.io.Serializable")); - string_type.add_base(symbol_typet("java::java.lang.CharSequence")); - } - if(class_name=="java.lang.String") - string_type.add_base(symbol_typet("java::java.lang.Comparable")); - if(class_name=="java.lang.StringBuilder" || - class_name=="java.lang.StringBuffer") - string_type.add_base(symbol_typet("java::java.lang.AbstractStringBuilder")); + std::vector bases = get_string_type_base_classes(class_name); + for(const irep_idt &base_name : bases) + string_type.add_base(symbol_typet("java::" + id2string(base_name))); symbolt tmp_string_symbol; tmp_string_symbol.name="java::"+id2string(class_name); @@ -1848,16 +1869,19 @@ bool java_string_library_preprocesst::is_known_string_type( return string_types.find(class_name)!=string_types.end(); } -/// fill maps with correspondence from java method names to conversion functions -void java_string_library_preprocesst::initialize_conversion_table() +void java_string_library_preprocesst::initialize_known_type_table() { - character_preprocess.initialize_conversion_table(); - string_types= std::unordered_set{"java.lang.String", "java.lang.StringBuilder", "java.lang.CharSequence", "java.lang.StringBuffer"}; +} + +/// fill maps with correspondence from java method names to conversion functions +void java_string_library_preprocesst::initialize_conversion_table() +{ + character_preprocess.initialize_conversion_table(); // The following list of function is organized by libraries, with // constructors first and then methods in alphabetic order. diff --git a/src/java_bytecode/java_string_library_preprocess.h b/src/java_bytecode/java_string_library_preprocess.h index 59b05304f28..c3bedd13734 100644 --- a/src/java_bytecode/java_string_library_preprocess.h +++ b/src/java_bytecode/java_string_library_preprocess.h @@ -43,6 +43,7 @@ class java_string_library_preprocesst:public messaget { } + void initialize_known_type_table(); void initialize_conversion_table(); void initialize_refined_string_type(); @@ -56,6 +57,8 @@ class java_string_library_preprocesst:public messaget { return character_preprocess.replace_character_call(call); } + std::vector get_string_type_base_classes( + const irep_idt &class_name); void add_string_type(const irep_idt &class_name, symbol_tablet &symbol_table); bool is_known_string_type(irep_idt class_name);