mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-12 02:30:30 +09:00
Userland: Speed up the execution of the cut command.
Avoid creating SingleIndexes in case of byte ranges. This, boosts the performance significantly in case a byte range is too big(e.g 666-123123). Also, claim copyright over this mess since I am the one responsible for it.
This commit is contained in:
parent
4dd4dd2f3c
commit
86812af077
Notes:
sideshowbarker
2024-07-19 08:55:00 +09:00
Author: https://github.com/marprok
Commit: 86812af077
Pull-request: https://github.com/SerenityOS/serenity/pull/1339
Reviewed-by: https://github.com/awesomekling
1 changed files with 37 additions and 23 deletions
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
* Copyright (c) 2018-2020, Marios Prokopakis <mariosprokopakis@gmail.com>
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,6 +25,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/QuickSort.h>
|
#include <AK/QuickSort.h>
|
||||||
|
#include <AK/StdLibExtras.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -34,11 +35,20 @@
|
||||||
struct Index {
|
struct Index {
|
||||||
enum class Type {
|
enum class Type {
|
||||||
SingleIndex,
|
SingleIndex,
|
||||||
|
SliceIndex,
|
||||||
RangedIndex
|
RangedIndex
|
||||||
};
|
};
|
||||||
ssize_t m_from { -1 };
|
ssize_t m_from { -1 };
|
||||||
ssize_t m_to { -1 };
|
ssize_t m_to { -1 };
|
||||||
Type m_type { Type::SingleIndex };
|
Type m_type { Type::SingleIndex };
|
||||||
|
|
||||||
|
bool intersects(const Index& other)
|
||||||
|
{
|
||||||
|
if (m_type != Type::RangedIndex)
|
||||||
|
return m_from == other.m_from;
|
||||||
|
|
||||||
|
return !(other.m_from > m_to || other.m_to < m_from);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void print_usage_and_exit(int ret)
|
static void print_usage_and_exit(int ret)
|
||||||
|
@ -49,8 +59,18 @@ static void print_usage_and_exit(int ret)
|
||||||
|
|
||||||
static void add_if_not_exists(Vector<Index>& indexes, Index data)
|
static void add_if_not_exists(Vector<Index>& indexes, Index data)
|
||||||
{
|
{
|
||||||
auto find = [data](auto& other) { return other.m_from == data.m_from && other.m_to == data.m_to; };
|
bool append_to_vector = true;
|
||||||
if (indexes.find(find) == indexes.end()) {
|
for (auto& index : indexes) {
|
||||||
|
if (index.intersects(data)) {
|
||||||
|
if (index.m_type == Index::Type::RangedIndex) {
|
||||||
|
index.m_from = AK::min(index.m_from, data.m_from);
|
||||||
|
index.m_to = AK::max(index.m_to, data.m_to);
|
||||||
|
}
|
||||||
|
append_to_vector = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (append_to_vector) {
|
||||||
indexes.append(data);
|
indexes.append(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,10 +101,8 @@ static void expand_list(Vector<String>& tokens, Vector<Index>& indexes)
|
||||||
print_usage_and_exit(1);
|
print_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (ssize_t i = 1; i <= index; ++i) {
|
Index tmp = { 1, index, Index::Type::RangedIndex };
|
||||||
Index tmp = { i, i, Index::Type::SingleIndex };
|
add_if_not_exists(indexes, tmp);
|
||||||
add_if_not_exists(indexes, tmp);
|
|
||||||
}
|
|
||||||
} else if (token[token.length() - 1] == '-') {
|
} else if (token[token.length() - 1] == '-') {
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
ssize_t index = token.substring(0, token.length() - 1).to_int(ok);
|
ssize_t index = token.substring(0, token.length() - 1).to_int(ok);
|
||||||
|
@ -97,7 +115,7 @@ static void expand_list(Vector<String>& tokens, Vector<Index>& indexes)
|
||||||
fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
|
fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
|
||||||
print_usage_and_exit(1);
|
print_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
Index tmp = { index, -1, Index::Type::RangedIndex };
|
Index tmp = { index, -1, Index::Type::SliceIndex };
|
||||||
add_if_not_exists(indexes, tmp);
|
add_if_not_exists(indexes, tmp);
|
||||||
} else {
|
} else {
|
||||||
auto range = token.split('-');
|
auto range = token.split('-');
|
||||||
|
@ -123,10 +141,8 @@ static void expand_list(Vector<String>& tokens, Vector<Index>& indexes)
|
||||||
print_usage_and_exit(1);
|
print_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; index1 <= index2; ++index1) {
|
Index tmp = { index1, index2, Index::Type::RangedIndex };
|
||||||
Index tmp = { index1, index1, Index::Type::SingleIndex };
|
add_if_not_exists(indexes, tmp);
|
||||||
add_if_not_exists(indexes, tmp);
|
|
||||||
}
|
|
||||||
} else if (range.size() == 1) {
|
} else if (range.size() == 1) {
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
ssize_t index = range[0].to_int(ok);
|
ssize_t index = range[0].to_int(ok);
|
||||||
|
@ -167,14 +183,15 @@ static void cut_file(const String& file, const Vector<Index>& byte_vector)
|
||||||
line[line_length - 1] = '\0';
|
line[line_length - 1] = '\0';
|
||||||
line_length--;
|
line_length--;
|
||||||
for (auto& i : byte_vector) {
|
for (auto& i : byte_vector) {
|
||||||
if (i.m_type == Index::Type::RangedIndex && i.m_from < line_length) {
|
if (i.m_type == Index::Type::SliceIndex && i.m_from < line_length)
|
||||||
printf("%s", line + i.m_from - 1);
|
printf("%s", line + i.m_from - 1);
|
||||||
break;
|
else if (i.m_type == Index::Type::SingleIndex && i.m_from <= line_length)
|
||||||
}
|
|
||||||
|
|
||||||
if (i.m_from <= line_length)
|
|
||||||
printf("%c", line[i.m_from - 1]);
|
printf("%c", line[i.m_from - 1]);
|
||||||
else
|
else if (i.m_type == Index::Type::RangedIndex && i.m_from <= line_length) {
|
||||||
|
auto to = i.m_to > line_length ? line_length : i.m_to;
|
||||||
|
auto sub_string = String(line).substring(i.m_from - 1, to - i.m_from + 1);
|
||||||
|
printf("%s", sub_string.characters());
|
||||||
|
} else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
@ -207,7 +224,6 @@ int main(int argc, char** argv)
|
||||||
} else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) {
|
} else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) {
|
||||||
print_usage_and_exit(1);
|
print_usage_and_exit(1);
|
||||||
} else if (argv[i][0] != '-') {
|
} else if (argv[i][0] != '-') {
|
||||||
//file = argv[i++];
|
|
||||||
files.append(argv[i++]);
|
files.append(argv[i++]);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "cut: invalid argument %s\n", argv[i]);
|
fprintf(stderr, "cut: invalid argument %s\n", argv[i]);
|
||||||
|
@ -215,17 +231,15 @@ int main(int argc, char** argv)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (files.is_empty() || byte_list == "") {
|
if (files.is_empty() || byte_list == "")
|
||||||
print_usage_and_exit(1);
|
print_usage_and_exit(1);
|
||||||
}
|
|
||||||
|
|
||||||
Vector<Index> byte_vector;
|
Vector<Index> byte_vector;
|
||||||
expand_list(tokens, byte_vector);
|
expand_list(tokens, byte_vector);
|
||||||
quick_sort(byte_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; });
|
quick_sort(byte_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; });
|
||||||
/* Process each file */
|
/* Process each file */
|
||||||
for (auto& file : files) {
|
for (auto& file : files)
|
||||||
cut_file(file, byte_vector);
|
cut_file(file, byte_vector);
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue