1
0
Fork 0

subcmds: reduce multiprocessing serialization overhead

Follow the same approach as 39ffd9977e to reduce serialization overhead.

Below benchmarks are tested with 2.7k projects on my workstation
(warm cache). git tracing is disabled for benchmark.

(seconds)              | v2.48 | v2.48 | this CL | this CL
	               |       |  -j32 |         |    -j32
-----------------------------------------------------------
with clean tree state:
branches (none)        |   5.6 |   5.9 |    1.0  |    0.9
status (clean)         |  21.3 |   9.4 |   19.4  |    4.7
diff (none)            |   7.6 |   7.2 |    5.7  |    2.2
prune (none)           |   5.7 |   6.1 |    1.3  |    1.2
abandon (none)         |  19.4 |  18.6 |    0.9  |    0.8
upload (none)          |  19.7 |  18.7 |    0.9  |    0.8
forall -c true         |   7.5 |   7.6 |    0.6  |    0.6
forall -c "git log -1" |  11.3 |  11.1 |    0.6  |    0.6

with branches:
start BRANCH --all     |  21.9 |  20.3 |   13.6  |    2.6
checkout BRANCH        |  29.1 |  27.8 |    1.1  |    1.0
branches (2)           |  28.0 |  28.6 |    1.5  |    1.3
abandon BRANCH         |  29.2 |  27.5 |    9.7  |    2.2

Bug: b/371638995
Change-Id: I53989a3d1e43063587b3f52f852b1c2c56b49412
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/440221
Reviewed-by: Josip Sokcevic <sokcevic@google.com>
Tested-by: Kuang-che Wu <kcwu@google.com>
Commit-Queue: Kuang-che Wu <kcwu@google.com>
This commit is contained in:
Kuang-che Wu 2024-10-22 21:04:41 +08:00 committed by LUCI
parent 39ffd9977e
commit 8da4861b38
11 changed files with 230 additions and 174 deletions

View file

@ -15,7 +15,6 @@
import errno
import functools
import io
import multiprocessing
import os
import re
import signal
@ -26,7 +25,6 @@ from color import Coloring
from command import Command
from command import DEFAULT_LOCAL_JOBS
from command import MirrorSafeCommand
from command import WORKER_BATCH_SIZE
from error import ManifestInvalidRevisionError
from repo_logging import RepoLogger
@ -241,7 +239,6 @@ without iterating through the remaining projects.
cmd.insert(cmd.index(cn) + 1, "--color")
mirror = self.manifest.IsMirror
rc = 0
smart_sync_manifest_name = "smart_sync_override.xml"
smart_sync_manifest_path = os.path.join(
@ -264,32 +261,41 @@ without iterating through the remaining projects.
os.environ["REPO_COUNT"] = str(len(projects))
def _ProcessResults(_pool, _output, results):
rc = 0
first = True
for r, output in results:
if output:
if first:
first = False
elif opt.project_header:
print()
# To simplify the DoWorkWrapper, take care of automatic
# newlines.
end = "\n"
if output[-1] == "\n":
end = ""
print(output, end=end)
rc = rc or r
if r != 0 and opt.abort_on_errors:
raise Exception("Aborting due to previous error")
return rc
try:
config = self.manifest.manifestProject.config
with multiprocessing.Pool(opt.jobs, InitWorker) as pool:
results_it = pool.imap(
with self.ParallelContext():
self.get_parallel_context()["projects"] = projects
rc = self.ExecuteInParallel(
opt.jobs,
functools.partial(
DoWorkWrapper, mirror, opt, cmd, shell, config
self.DoWorkWrapper, mirror, opt, cmd, shell, config
),
enumerate(projects),
chunksize=WORKER_BATCH_SIZE,
range(len(projects)),
callback=_ProcessResults,
ordered=True,
initializer=self.InitWorker,
chunksize=1,
)
first = True
for r, output in results_it:
if output:
if first:
first = False
elif opt.project_header:
print()
# To simplify the DoWorkWrapper, take care of automatic
# newlines.
end = "\n"
if output[-1] == "\n":
end = ""
print(output, end=end)
rc = rc or r
if r != 0 and opt.abort_on_errors:
raise Exception("Aborting due to previous error")
except (KeyboardInterrupt, WorkerKeyboardInterrupt):
# Catch KeyboardInterrupt raised inside and outside of workers
rc = rc or errno.EINTR
@ -304,31 +310,31 @@ without iterating through the remaining projects.
if rc != 0:
sys.exit(rc)
@classmethod
def InitWorker(cls):
signal.signal(signal.SIGINT, signal.SIG_IGN)
@classmethod
def DoWorkWrapper(cls, mirror, opt, cmd, shell, config, project_idx):
"""A wrapper around the DoWork() method.
Catch the KeyboardInterrupt exceptions here and re-raise them as a
different, ``Exception``-based exception to stop it flooding the console
with stacktraces and making the parent hang indefinitely.
"""
project = cls.get_parallel_context()["projects"][project_idx]
try:
return DoWork(project, mirror, opt, cmd, shell, project_idx, config)
except KeyboardInterrupt:
print("%s: Worker interrupted" % project.name)
raise WorkerKeyboardInterrupt()
class WorkerKeyboardInterrupt(Exception):
"""Keyboard interrupt exception for worker processes."""
def InitWorker():
signal.signal(signal.SIGINT, signal.SIG_IGN)
def DoWorkWrapper(mirror, opt, cmd, shell, config, args):
"""A wrapper around the DoWork() method.
Catch the KeyboardInterrupt exceptions here and re-raise them as a
different, ``Exception``-based exception to stop it flooding the console
with stacktraces and making the parent hang indefinitely.
"""
cnt, project = args
try:
return DoWork(project, mirror, opt, cmd, shell, cnt, config)
except KeyboardInterrupt:
print("%s: Worker interrupted" % project.name)
raise WorkerKeyboardInterrupt()
def DoWork(project, mirror, opt, cmd, shell, cnt, config):
env = os.environ.copy()