From 2903cddb58f6ee99116e0751a2305f75f9a86461 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Fri, 18 Aug 2017 18:15:47 -0700 Subject: Improve imgdiff for large zip files Due to the cache size limit for OTA generation, we used to split large zip files linearly into pieces and do bsdiff on them. As a result, i) we lose the advantage of imgdiff; ii) if there's an accidental order change of some huge files inside the zip, we'll create an insanely large patch. This patch splits the src&tgt more smartly based on the zip entry_name. If the entry_name is empty or no matching source is found for a target chunk, we'll skip adding its source and later do a bsdiff against the whole split source image (this rarely happens in our use cases except for the metadata inside a ziparchive). After the split, the target pieces are continuous and block aligned, while the sources pieces are mutually exclusive. (Some of the source blocks may not be used if there's no matching entry_name in the target.) Then we will generate patches accordingly between each split image pairs. Afterwards, if we apply imgpatch to each pair of split source/target images and add up the patched result, we can get back the original target image. For example: Input: [src_image, tgt_image] Split: [src-0,tgt-0; src-1,tgt-1, src-2,tgt-2] Diff: [ patch-0; patch-1; patch-2] Patch: [(src-0,patch-0)=tgt-0; (src-1,patch-1)=tgt-1; (src-2,patch-2)=tgt-2;] Append: [tgt-0 + tgt-1 + tgt-2 = tgt_image] Peformance: For the small package in b/34220646, we decrease the patch size of chrome.apk dramatically from 30M to 400K due to the order change of two big .so files. On two versions of angler, I also observe decent patch size decrease. For chrome.apk, we reduced the size from 5.9M to 3.2M; and for vevlet.apk from 8.0M to 6.5M. Bug: 34220646 Test: recovery component test && apply imgdiff & imgpatch on two chrome.apk Change-Id: I145d802984fa805efbbac9d01a2e64d82ef9728b --- tests/component/imgdiff_test.cpp | 339 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 334 insertions(+), 5 deletions(-) (limited to 'tests') diff --git a/tests/component/imgdiff_test.cpp b/tests/component/imgdiff_test.cpp index bf25aebb0..3163a57cf 100644 --- a/tests/component/imgdiff_test.cpp +++ b/tests/component/imgdiff_test.cpp @@ -16,13 +16,17 @@ #include +#include #include +#include #include #include #include +#include #include #include +#include #include #include #include @@ -75,15 +79,20 @@ static void verify_patch_header(const std::string& patch, size_t* num_normal, si if (num_deflate != nullptr) *num_deflate = deflate; } -static void verify_patched_image(const std::string& src, const std::string& patch, - const std::string& tgt) { - std::string patched; +static void GenerateTarget(const std::string& src, const std::string& patch, std::string* patched) { + patched->clear(); ASSERT_EQ(0, ApplyImagePatch(reinterpret_cast(src.data()), src.size(), reinterpret_cast(patch.data()), patch.size(), - [&patched](const unsigned char* data, size_t len) { - patched.append(reinterpret_cast(data), len); + [&](const unsigned char* data, size_t len) { + patched->append(reinterpret_cast(data), len); return len; })); +} + +static void verify_patched_image(const std::string& src, const std::string& patch, + const std::string& tgt) { + std::string patched; + GenerateTarget(src, patch, &patched); ASSERT_EQ(tgt, patched); } @@ -623,3 +632,323 @@ TEST(ImgpatchTest, image_mode_patch_corruption) { reinterpret_cast(patch.data()), patch.size(), [](const unsigned char* /*data*/, size_t len) { return len; })); } + +static void construct_store_entry(const std::vector>& info, + ZipWriter* writer) { + for (auto& t : info) { + // Create t(1) blocks of t(2), and write the data to t(0) + ASSERT_EQ(0, writer->StartEntry(std::get<0>(t).c_str(), 0)); + const std::string content(std::get<1>(t) * 4096, std::get<2>(t)); + ASSERT_EQ(0, writer->WriteBytes(content.data(), content.size())); + ASSERT_EQ(0, writer->FinishEntry()); + } +} + +static void construct_deflate_entry(const std::vector>& info, + ZipWriter* writer, const std::string& data) { + for (auto& t : info) { + // t(0): entry_name; t(1): block offset; t(2) length in blocks. + ASSERT_EQ(0, writer->StartEntry(std::get<0>(t).c_str(), ZipWriter::kCompress)); + ASSERT_EQ(0, writer->WriteBytes(data.data() + std::get<1>(t) * 4096, std::get<2>(t) * 4096)); + ASSERT_EQ(0, writer->FinishEntry()); + } +} + +// Look for the generated source and patch pieces in the debug_dir and generate the target on +// each pair. Concatenate the split target and match against the orignal one. +static void GenerateAndCheckSplitTarget(const std::string& debug_dir, size_t count, + const std::string& tgt) { + std::string patched; + for (size_t i = 0; i < count; i++) { + std::string split_src_path = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + std::string split_patch_path = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + + std::string split_src; + std::string split_patch; + ASSERT_TRUE(android::base::ReadFileToString(split_src_path, &split_src)); + ASSERT_TRUE(android::base::ReadFileToString(split_patch_path, &split_patch)); + + std::string split_tgt; + GenerateTarget(split_src, split_patch, &split_tgt); + patched += split_tgt; + } + + // Verify we can get back the original target image. + ASSERT_EQ(tgt, patched); +} + +std::vector ConstructImageChunks( + const std::vector& content, const std::vector>& info) { + std::vector chunks; + size_t start = 0; + for (const auto& t : info) { + size_t length = std::get<1>(t); + chunks.emplace_back(CHUNK_NORMAL, start, &content, length, std::get<0>(t)); + start += length; + } + + return chunks; +} + +TEST(ImgdiffTest, zip_mode_split_image_smoke) { + std::vector content; + content.reserve(4096 * 50); + uint8_t n = 0; + generate_n(back_inserter(content), 4096 * 50, [&n]() { return n++ / 4096; }); + + ZipModeImage tgt_image(false, 4096 * 10); + std::vector tgt_chunks = ConstructImageChunks(content, { { "a", 100 }, + { "b", 4096 * 2 }, + { "c", 4096 * 3 }, + { "d", 300 }, + { "e-0", 4096 * 10 }, + { "e-1", 4096 * 5 }, + { "CD", 200 } }); + tgt_image.Initialize(std::move(tgt_chunks), + std::vector(content.begin(), content.begin() + 82520)); + + tgt_image.DumpChunks(); + + ZipModeImage src_image(true, 4096 * 10); + std::vector src_chunks = ConstructImageChunks(content, { { "b", 4096 * 3 }, + { "c-0", 4096 * 10 }, + { "c-1", 4096 * 2 }, + { "a", 4096 * 5 }, + { "e-0", 4096 * 10 }, + { "e-1", 10000 }, + { "CD", 5000 } }); + src_image.Initialize(std::move(src_chunks), + std::vector(content.begin(), content.begin() + 137880)); + + std::vector split_tgt_images; + std::vector split_src_images; + std::vector split_src_ranges; + + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + // src_piece 1: a 5 blocks, b 3 blocks + // src_piece 2: c-0 10 blocks + // src_piece 3: d 0 block, e-0 10 blocks + // src_piece 4: e-1 2 blocks; CD 2 blocks + ASSERT_EQ(split_tgt_images.size(), split_src_images.size()); + ASSERT_EQ(static_cast(4), split_tgt_images.size()); + + ASSERT_EQ(static_cast(1), split_tgt_images[0].NumOfChunks()); + ASSERT_EQ(static_cast(12288), split_tgt_images[0][0].DataLengthForPatch()); + ASSERT_EQ("4,0,3,15,20", split_src_ranges[0].ToString()); + + ASSERT_EQ(static_cast(1), split_tgt_images[1].NumOfChunks()); + ASSERT_EQ(static_cast(12288), split_tgt_images[1][0].DataLengthForPatch()); + ASSERT_EQ("2,3,13", split_src_ranges[1].ToString()); + + ASSERT_EQ(static_cast(1), split_tgt_images[2].NumOfChunks()); + ASSERT_EQ(static_cast(40960), split_tgt_images[2][0].DataLengthForPatch()); + ASSERT_EQ("2,20,30", split_src_ranges[2].ToString()); + + ASSERT_EQ(static_cast(1), split_tgt_images[3].NumOfChunks()); + ASSERT_EQ(static_cast(16984), split_tgt_images[3][0].DataLengthForPatch()); + ASSERT_EQ("2,30,34", split_src_ranges[3].ToString()); +} + +TEST(ImgdiffTest, zip_mode_store_large_apk) { + // Construct src and tgt zip files with limit = 10 blocks. + // src tgt + // 12 blocks 'd' 3 blocks 'a' + // 8 blocks 'c' 3 blocks 'b' + // 3 blocks 'b' 8 blocks 'c' (exceeds limit) + // 3 blocks 'a' 12 blocks 'd' (exceeds limit) + // 3 blocks 'e' + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + construct_store_entry( + { { "a", 3, 'a' }, { "b", 3, 'b' }, { "c", 8, 'c' }, { "d", 12, 'd' }, { "e", 3, 'e' } }, + &tgt_writer); + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_store_entry({ { "d", 12, 'd' }, { "c", 8, 'c' }, { "b", 3, 'b' }, { "a", 3, 'a' } }, + &src_writer); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector args = { + "imgdiff", "-z", "--block-limit=10", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 4 pieces of patch.(Rougly 3'a',3'b'; 8'c'; 10'd'; 2'd'3'e') + GenerateAndCheckSplitTarget(debug_dir.path, 4, tgt); +} + +TEST(ImgdiffTest, zip_mode_deflate_large_apk) { + // Generate 50 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 50); + generate_n(back_inserter(random_data), 4096 * 50, []() { return rand() % 256; }); + + // Construct src and tgt zip files with limit = 10 blocks. + // src tgt + // 22 blocks, "d" 4 blocks, "a" + // 5 blocks, "b" 4 blocks, "b" + // 3 blocks, "a" 7 blocks, "c" (exceeds limit) + // 8 blocks, "c" 20 blocks, "d" (exceeds limit) + // 1 block, "f" 2 blocks, "e" + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry( + { { "a", 0, 4 }, { "b", 5, 4 }, { "c", 12, 8 }, { "d", 21, 20 }, { "e", 45, 2 }, + { "f", 48, 1 } }, &tgt_writer, random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + + construct_deflate_entry( + { { "d", 21, 22 }, { "b", 5, 5 }, { "a", 0, 3 }, { "g", 9, 1 }, { "c", 11, 8 }, + { "f", 45, 1 } }, &src_writer, random_data); + + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + ZipModeImage src_image(true, 10 * 4096); + ZipModeImage tgt_image(false, 10 * 4096); + ASSERT_TRUE(src_image.Initialize(src_file.path)); + ASSERT_TRUE(tgt_image.Initialize(tgt_file.path)); + ASSERT_TRUE(ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)); + + src_image.DumpChunks(); + tgt_image.DumpChunks(); + + std::vector split_tgt_images; + std::vector split_src_images; + std::vector split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + // src_piece 1: a 3 blocks, b 5 blocks + // src_piece 2: c 8 blocks + // src_piece 3: d-0 10 block + // src_piece 4: d-1 10 blocks + // src_piece 5: e 1 block, CD + ASSERT_EQ(split_tgt_images.size(), split_src_images.size()); + ASSERT_EQ(static_cast(5), split_tgt_images.size()); + + ASSERT_EQ(static_cast(2), split_src_images[0].NumOfChunks()); + ASSERT_EQ("a", split_src_images[0][0].GetEntryName()); + ASSERT_EQ("b", split_src_images[0][1].GetEntryName()); + + ASSERT_EQ(static_cast(1), split_src_images[1].NumOfChunks()); + ASSERT_EQ("c", split_src_images[1][0].GetEntryName()); + + ASSERT_EQ(static_cast(0), split_src_images[2].NumOfChunks()); + ASSERT_EQ(static_cast(0), split_src_images[3].NumOfChunks()); + ASSERT_EQ(static_cast(0), split_src_images[4].NumOfChunks()); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + ASSERT_TRUE(ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + patch_file.path, debug_dir.path)); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 5 pieces of patch. ["a","b"; "c"; "d-0"; "d-1"; "e"] + GenerateAndCheckSplitTarget(debug_dir.path, 5, tgt); +} + +TEST(ImgdiffTest, zip_mode_no_match_source) { + // Generate 20 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 20); + generate_n(back_inserter(random_data), 4096 * 20, []() { return rand() % 256; }); + + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry({ { "a", 0, 4 }, { "b", 5, 5 }, { "c", 11, 5 } }, &tgt_writer, + random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + // We don't have a matching source entry. + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_store_entry({ { "d", 1, 'd' } }, &src_writer); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector args = { + "imgdiff", "-z", "--block-limit=10", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 1 pieces of patch due to no matching source entry. + GenerateAndCheckSplitTarget(debug_dir.path, 1, tgt); +} + +TEST(ImgdiffTest, zip_mode_large_enough_limit) { + // Generate 20 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 20); + generate_n(back_inserter(random_data), 4096 * 20, []() { return rand() % 256; }); + + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry({ { "a", 0, 10 }, { "b", 10, 5 } }, &tgt_writer, random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + // Construct 10 blocks of source. + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_deflate_entry({ { "a", 1, 10 } }, &src_writer, random_data); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch with a limit of 20 blocks. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector args = { + "imgdiff", "-z", "--block-limit=20", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 1 pieces of patch since limit is larger than the zip file size. + GenerateAndCheckSplitTarget(debug_dir.path, 1, tgt); +} -- cgit v1.2.3