From 4eda8ad48999cfaedf510768903f37700ed80ff9 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 24 May 2018 18:07:48 +0100 Subject: [PATCH] Java class literals: init using a library hook when possible The Java library definition of java.lang.Class, when nondet initialized, can be quite time consuming due to the enumeration constant dictionary's internal array. Instead, let's give the library a chance to set Class constants (literals like MyClass.class) itself. While we're at it we might as well gain the ability to prove some properties of the literals. We currently supply those class attributes that are easy to come by in the parse tree; with a little more effort in the parser IsLocalClass and IsMemberClass could be determined. --- .../class-literals/ExampleAnnotation.class | Bin 0 -> 154 bytes .../class-literals/ExampleAnnotation.java | 4 + .../jbmc/class-literals/ExampleEnum.class | Bin 0 -> 668 bytes .../jbmc/class-literals/ExampleEnum.java | 5 + .../class-literals/ExampleInterface.class | Bin 0 -> 113 bytes .../jbmc/class-literals/ExampleInterface.java | 4 + .../class-literals/ExampleSynthetic.class | Bin 0 -> 110 bytes .../jbmc/class-literals/ExampleSynthetic.j | 4 + .../regression/jbmc/class-literals/Test.class | Bin 0 -> 1263 bytes jbmc/regression/jbmc/class-literals/Test.java | 35 ++++++ .../jbmc/class-literals/java/lang/Class.class | Bin 0 -> 1025 bytes .../jbmc/class-literals/java/lang/Class.java | 45 +++++++ jbmc/regression/jbmc/class-literals/test.desc | 8 ++ .../jbmc/class-literals/test_lazy.desc | 10 ++ jbmc/src/java_bytecode/ci_lazy_methods.cpp | 42 ++++++- .../java_bytecode_convert_class.cpp | 3 + .../java_bytecode/java_bytecode_language.cpp | 5 +- .../java_bytecode/java_bytecode_language.h | 2 + .../java_bytecode/java_bytecode_parse_tree.h | 3 + .../java_bytecode/java_bytecode_parser.cpp | 5 + jbmc/src/java_bytecode/java_entry_point.cpp | 114 +++++++++++++++++- jbmc/src/java_bytecode/java_entry_point.h | 5 +- src/util/irep_ids.def | 3 + 23 files changed, 288 insertions(+), 9 deletions(-) create mode 100644 jbmc/regression/jbmc/class-literals/ExampleAnnotation.class create mode 100644 jbmc/regression/jbmc/class-literals/ExampleAnnotation.java create mode 100644 jbmc/regression/jbmc/class-literals/ExampleEnum.class create mode 100644 jbmc/regression/jbmc/class-literals/ExampleEnum.java create mode 100644 jbmc/regression/jbmc/class-literals/ExampleInterface.class create mode 100644 jbmc/regression/jbmc/class-literals/ExampleInterface.java create mode 100644 jbmc/regression/jbmc/class-literals/ExampleSynthetic.class create mode 100644 jbmc/regression/jbmc/class-literals/ExampleSynthetic.j create mode 100644 jbmc/regression/jbmc/class-literals/Test.class create mode 100644 jbmc/regression/jbmc/class-literals/Test.java create mode 100644 jbmc/regression/jbmc/class-literals/java/lang/Class.class create mode 100644 jbmc/regression/jbmc/class-literals/java/lang/Class.java create mode 100644 jbmc/regression/jbmc/class-literals/test.desc create mode 100644 jbmc/regression/jbmc/class-literals/test_lazy.desc diff --git a/jbmc/regression/jbmc/class-literals/ExampleAnnotation.class b/jbmc/regression/jbmc/class-literals/ExampleAnnotation.class new file mode 100644 index 0000000000000000000000000000000000000000..ab1a979353344d43509e2474ef3ed0e848504b81 GIT binary patch literal 154 zcmX^0Z`VEs1_l!bPId-1b_RBK1`b9BuHgLAqU2P!%$!t41~J!)#N2|MRL8u${F20y z%=|pPti-ZJMg~C)2}TA1kd%H-VqUtwe^ORzatR}YJVGcDZiGIYwP)~{!Vh~6^SibZ$*OJMcp-%d<`T-R|;d6gf zbiWxkwGHm}-Fxo2=iKk>`~4HZ5wZb@*p#t_goJ>EZ5a}hJnTr=Wsvq-r}e8^lOc3n zuidM?J5#S&cCW^SV=HEtO)xq4R} zt3^|_I>qK_U|5|}esM(Eu#8cKA^s2eJl|pnG>wj>j_d((dvZMEFKgy+SIRYki!RU6 zSIC3h?ArrPKQp+th|Avr*QUVFcM?#LLYg7ysw)T~qacD++RUF1K83-nnSF~0W4XLb zL8IN(wGjTp4 z=YdWPD-?ZBO(1*-;Tc|VvT{@`yumwxxQrLLLkJFk3Lbg^sj#1Xh3|2RJW3e>Ya}j4 aaVje9i~#(e4~WlT%UuGded~xj{Tn~}+=My+ literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/class-literals/ExampleEnum.java b/jbmc/regression/jbmc/class-literals/ExampleEnum.java new file mode 100644 index 00000000000..9151252d7f0 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleEnum.java @@ -0,0 +1,5 @@ + +public enum ExampleEnum { + +} + diff --git a/jbmc/regression/jbmc/class-literals/ExampleInterface.class b/jbmc/regression/jbmc/class-literals/ExampleInterface.class new file mode 100644 index 0000000000000000000000000000000000000000..3669f3afa5c7c995b597616cd25ef0beeef84b91 GIT binary patch literal 113 zcmX^0Z`VEs1_l!bc6J6@1{MG|fgE1| literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/class-literals/ExampleInterface.java b/jbmc/regression/jbmc/class-literals/ExampleInterface.java new file mode 100644 index 00000000000..5f6dafbc4d2 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleInterface.java @@ -0,0 +1,4 @@ + +interface ExampleInterface { + +} diff --git a/jbmc/regression/jbmc/class-literals/ExampleSynthetic.class b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.class new file mode 100644 index 0000000000000000000000000000000000000000..3be8707e65e122d1407cb9531d586c87d67f3726 GIT binary patch literal 110 zcmX^0Z`VEsW(Hjbc6J69b_P~P1|ipq#N2|M)Zogzl8n@n%w)YRMh1bb#Ii*FoW#6z segCAa)Z`MN5V|@>2Cm@z(xT*4x6GVW0YwHT21W)5U}RteGME^c0p431p8x;= literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/class-literals/ExampleSynthetic.j b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.j new file mode 100644 index 00000000000..0a18d25c87b --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.j @@ -0,0 +1,4 @@ +; Compile me with Jasmin 2.1+ (https://sourceforge.net/projects/jasmin/) + +.class public synthetic ExampleSynthetic +.super java/lang/Object diff --git a/jbmc/regression/jbmc/class-literals/Test.class b/jbmc/regression/jbmc/class-literals/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..ef7d26bc773c3f56d66c6ea0686d47653139b1ed GIT binary patch literal 1263 zcmZvb%Tg0T6o&uKg`Ok>BomUL7!*Y%phOh$0&-6j1YCr&R8~XM#KB}j%}gxW`xMz* zZoO26Dy`C;3tzyEPvHYto*olIk(tFg-Cv*XKIi;BKmUID24Du8VWg1>VF=?K6JcD# zb&km}g2-@W<@^T6l=L?_Zb?7Qaa;NsjyoK8Iqq@X=a}V~<9NU^FBcaSOen}InBrKJ zV2NXyAwH_xwy+$dX4K+6|2ZG49ehaMK{aYrFO14%c@xn(bl##(-GF5UKA?wa!aJeb7sA& z;-N(C(88hVycCX6RPji1-wxfg_J356(=}<wfNvHib^VJD8;q-hKk68fOz;gojA284tvkSc|;279gUM%hO-v}wp_dp!zl3*8HEDkT)<>1j=gJvnp~WO zi638hQ`Yk~z7CU5ej=y3a1>3w;5hIW3xPGcc<0Y2etdg3%E| zjI$g^AK9xulA(XPR0SWrNx)2J`Z6+S_K8wm#2bits}eZNCO?|p(AZ{q?3*`W3(XDc+>~Hbr=O> z+5G|WodO(xDD)fSuRmMYin4;jeQxx4!ijwP;Qr0`=6o%^ #include #include @@ -52,6 +53,31 @@ ci_lazy_methodst::ci_lazy_methodst( class_hierarchy(symbol_table); } +/// Checks if an expression refers to any class literals (e.g. MyType.class) +/// These are expressed as ldc instructions in Java bytecode, and as symbols +/// of the form `MyType@class_model` in GOTO programs. +/// \param expr: expression to check +/// \return true if the expression or any of its subexpressions refer to a +/// class +static bool references_class_model(const exprt &expr) +{ + static const symbol_typet class_type("java::java.lang.Class"); + + for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it) + { + if(can_cast_expr(*it) && + it->type() == class_type && + has_suffix( + id2string(to_symbol_expr(*it).get_identifier()), + JAVA_CLASS_MODEL_SUFFIX)) + { + return true; + } + } + + return false; +} + /// Uses a simple context-insensitive ('ci') analysis to determine which methods /// may be reachable from the main entry point. In brief, static methods are /// reachable if we find a callsite in another reachable site, while virtual @@ -122,6 +148,7 @@ bool ci_lazy_methodst::operator()( std::unordered_set methods_already_populated; std::unordered_set virtual_function_calls; + bool class_initializer_seen = false; bool any_new_classes = true; while(any_new_classes) @@ -149,8 +176,19 @@ bool ci_lazy_methodst::operator()( // Couldn't convert this function continue; } - gather_virtual_callsites( - symbol_table.lookup_ref(mname).value, virtual_function_calls); + const exprt &method_body = symbol_table.lookup_ref(mname).value; + + gather_virtual_callsites(method_body, virtual_function_calls); + + if(!class_initializer_seen && references_class_model(method_body)) + { + class_initializer_seen = true; + irep_idt initializer_signature = + get_java_class_literal_initializer_signature(); + if(symbol_table.has_symbol(initializer_signature)) + methods_to_convert_later.insert(initializer_signature); + } + any_new_methods=true; } } diff --git a/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp b/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp index 706b61a0ecc..dbc8054390c 100644 --- a/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp @@ -265,6 +265,9 @@ void java_bytecode_convert_classt::convert( class_type.set_tag(c.name); class_type.set(ID_base_name, c.name); class_type.set(ID_abstract, c.is_abstract); + class_type.set(ID_is_annotation, c.is_annotation); + class_type.set(ID_interface, c.is_interface); + class_type.set(ID_synthetic, c.is_synthetic); class_type.set_final(c.is_final); if(c.is_enum) { diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index f96b23a38ba..a3130545edb 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -299,7 +299,7 @@ static symbol_exprt get_or_create_class_literal_symbol( { symbol_typet java_lang_Class("java::java.lang.Class"); symbol_exprt symbol_expr( - id2string(class_id)+"@class_model", + id2string(class_id) + JAVA_CLASS_MODEL_SUFFIX, java_lang_Class); if(!symbol_table.has_symbol(symbol_expr.get_identifier())) { @@ -785,7 +785,8 @@ bool java_bytecode_languaget::generate_support_functions( get_message_handler(), assume_inputs_non_null, object_factory_parameters, - get_pointer_type_selector()); + get_pointer_type_selector(), + string_refinement_enabled); } /// Uses a simple context-insensitive ('ci') analysis to determine which methods diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index 29c347fe227..0aabc7afc14 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -68,6 +68,8 @@ enum lazy_methods_modet LAZY_METHODS_MODE_EXTERNAL_DRIVER }; +#define JAVA_CLASS_MODEL_SUFFIX "@class_model" + class java_bytecode_languaget:public languaget { public: diff --git a/jbmc/src/java_bytecode/java_bytecode_parse_tree.h b/jbmc/src/java_bytecode/java_bytecode_parse_tree.h index 4ff281c12b2..c5762137152 100644 --- a/jbmc/src/java_bytecode/java_bytecode_parse_tree.h +++ b/jbmc/src/java_bytecode/java_bytecode_parse_tree.h @@ -209,6 +209,9 @@ class java_bytecode_parse_treet bool is_enum=false; bool is_public=false, is_protected=false, is_private=false; bool is_final = false; + bool is_interface = false; + bool is_synthetic = false; + bool is_annotation = false; bool attribute_bootstrapmethods_read = false; size_t enum_elements=0; diff --git a/jbmc/src/java_bytecode/java_bytecode_parser.cpp b/jbmc/src/java_bytecode/java_bytecode_parser.cpp index 18fb937234a..735824954f6 100644 --- a/jbmc/src/java_bytecode/java_bytecode_parser.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_parser.cpp @@ -451,9 +451,11 @@ bool java_bytecode_parsert::parse() #define ACC_BRIDGE 0x0040 #define ACC_VARARGS 0x0080 #define ACC_NATIVE 0x0100 +#define ACC_INTERFACE 0x0200 #define ACC_ABSTRACT 0x0400 #define ACC_STRICT 0x0800 #define ACC_SYNTHETIC 0x1000 +#define ACC_ANNOTATION 0x2000 #define ACC_ENUM 0x4000 #ifdef _MSC_VER @@ -496,6 +498,9 @@ void java_bytecode_parsert::rClassFile() parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0; parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0; parsed_class.is_final = (access_flags & ACC_FINAL) != 0; + parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0; + parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0; + parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0; parsed_class.name= constant(this_class).type().get(ID_C_base_name); diff --git a/jbmc/src/java_bytecode/java_entry_point.cpp b/jbmc/src/java_bytecode/java_entry_point.cpp index 62b18d3dcf7..9181dc48026 100644 --- a/jbmc/src/java_bytecode/java_entry_point.cpp +++ b/jbmc/src/java_bytecode/java_entry_point.cpp @@ -17,6 +17,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_object_factory.h" +#include "java_string_literals.h" #include "java_utils.h" static void create_initialize(symbol_table_baset &symbol_table) @@ -65,12 +66,60 @@ static bool should_init_symbol(const symbolt &sym) return is_java_string_literal_id(sym.name); } +/// Get the symbol name of java.lang.Class' initializer method. This method +/// should initialize a Class instance with known properties of the type it +/// represents, such as its name, whether it is abstract, or an enumeration, +/// etc. The method may or may not exist in any particular symbol table; it is +/// up to the caller to check. +/// The method's Java signature is: +/// void cproverInitializeClassLiteral( +/// String name, +/// boolean isAnnotation, +/// boolean isArray, +/// boolean isInterface, +/// boolean isSynthetic, +/// boolean isLocalClass, +/// boolean isMemberClass, +/// boolean isEnum); +/// \return Class initializer method's symbol name. +irep_idt get_java_class_literal_initializer_signature() +{ + static irep_idt signature = + "java::java.lang.Class.cproverInitializeClassLiteral:" + "(Ljava/lang/String;ZZZZZZZ)V"; + return signature; +} + +/// If symbol is a class literal, and an appropriate initializer method exists, +/// return a pointer to its symbol. If not, return null. +/// \param symbol: possible class literal symbol +/// \param symbol_table: table to search +/// \return pointer to the initializer method symbol or null +static const symbolt *get_class_literal_initializer( + const symbolt &symbol, + const symbol_table_baset &symbol_table) +{ + if(symbol.value.is_not_nil()) + return nullptr; + if(symbol.type != symbol_typet("java::java.lang.Class")) + return nullptr; + if(!has_suffix(id2string(symbol.name), JAVA_CLASS_MODEL_SUFFIX)) + return nullptr; + return symbol_table.lookup(get_java_class_literal_initializer_signature()); +} + +static constant_exprt constant_bool(bool val) +{ + return from_integer(val ? 1 : 0, java_boolean_type()); +} + static void java_static_lifetime_init( symbol_table_baset &symbol_table, const source_locationt &source_location, bool assume_init_pointers_not_null, const object_factory_parameterst &object_factory_parameters, - const select_pointer_typet &pointer_type_selector) + const select_pointer_typet &pointer_type_selector, + bool string_refinement_enabled) { symbolt &initialize_symbol=*symbol_table.get_writeable(INITIALIZE_FUNCTION); code_blockt &code_block=to_code_block(to_code(initialize_symbol.value)); @@ -88,7 +137,62 @@ static void java_static_lifetime_init( const symbolt &sym=*symbol_table.lookup(symname); if(should_init_symbol(sym)) { - if(sym.value.is_nil() && sym.type!=empty_typet()) + if(const symbolt *class_literal_init_method = + get_class_literal_initializer(sym, symbol_table)) + { + const std::string &name_str = id2string(sym.name); + irep_idt class_name = + name_str.substr(0, name_str.size() - strlen(JAVA_CLASS_MODEL_SUFFIX)); + const symbolt &class_symbol = symbol_table.lookup_ref(class_name); + + bool class_is_array = is_java_array_tag(sym.name); + + exprt name_literal(ID_java_string_literal); + name_literal.set(ID_value, to_class_type(class_symbol.type).get_tag()); + + symbol_exprt class_name_literal = + get_or_create_string_literal_symbol( + name_literal, symbol_table, string_refinement_enabled); + + // Call the literal initializer method instead of a nondet initializer: + + // For arguments we can't parse yet: + side_effect_expr_nondett nondet_bool(java_boolean_type()); + + // Argument order is: name, isAnnotation, isArray, isInterface, + // isSynthetic, isLocalClass, isMemberClass, isEnum + + code_function_callt initializer_call; + initializer_call.function() = class_literal_init_method->symbol_expr(); + + code_function_callt::argumentst &args = initializer_call.arguments(); + + // this: + args.push_back(address_of_exprt(sym.symbol_expr())); + // name: + args.push_back(address_of_exprt(class_name_literal)); + // isAnnotation: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_is_annotation))); + // isArray: + args.push_back(constant_bool(class_is_array)); + // isInterface: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_interface))); + // isSynthetic: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_synthetic))); + // isLocalClass: + args.push_back(nondet_bool); + // isMemberClass: + args.push_back(nondet_bool); + // isEnum: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_enumeration))); + + code_block.move_to_operands(initializer_call); + } + else if(sym.value.is_nil() && sym.type!=empty_typet()) { bool allow_null=!assume_init_pointers_not_null; if(allow_null) @@ -403,7 +507,8 @@ bool java_entry_point( message_handlert &message_handler, bool assume_init_pointers_not_null, const object_factory_parameterst &object_factory_parameters, - const select_pointer_typet &pointer_type_selector) + const select_pointer_typet &pointer_type_selector, + bool string_refinement_enabled) { // check if the entry point is already there if(symbol_table.symbols.find(goto_functionst::entry_point())!= @@ -426,7 +531,8 @@ bool java_entry_point( symbol.location, assume_init_pointers_not_null, object_factory_parameters, - pointer_type_selector); + pointer_type_selector, + string_refinement_enabled); return generate_java_start_function( symbol, diff --git a/jbmc/src/java_bytecode/java_entry_point.h b/jbmc/src/java_bytecode/java_entry_point.h index 006334ff99c..3fcdee49388 100644 --- a/jbmc/src/java_bytecode/java_entry_point.h +++ b/jbmc/src/java_bytecode/java_entry_point.h @@ -25,7 +25,8 @@ bool java_entry_point( class message_handlert &message_handler, bool assume_init_pointers_not_null, const object_factory_parameterst &object_factory_parameters, - const select_pointer_typet &pointer_type_selector); + const select_pointer_typet &pointer_type_selector, + bool string_refinement_enabled); struct main_function_resultt { @@ -60,6 +61,8 @@ struct main_function_resultt } }; +irep_idt get_java_class_literal_initializer_signature(); + /// Figures out the entry point of the code to verify main_function_resultt get_main_symbol( const symbol_table_baset &symbol_table, diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index be80594c8ce..7f5df683308 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -661,10 +661,13 @@ IREP_ID_TWO(overflow_shl, overflow-shl) IREP_ID_TWO(C_no_initialization_required, #no_initialization_required) IREP_ID_TWO(overlay_class, java::com.diffblue.OverlayClassImplementation) IREP_ID_TWO(overlay_method, java::com.diffblue.OverlayMethodImplementation) +IREP_ID_ONE(is_annotation) IREP_ID_TWO(C_annotations, #annotations) IREP_ID_ONE(final) IREP_ID_ONE(bits_per_byte) IREP_ID_TWO(C_abstract, #abstract) +IREP_ID_ONE(synthetic) +IREP_ID_ONE(interface) // Projects depending on this code base that wish to extend the list of // available ids should provide a file local_irep_ids.h in their source tree and