| | from loguru import logger
|
| |
|
| | import torch
|
| | import torch.backends.cudnn as cudnn
|
| |
|
| | from yolox.core import Trainer, launch
|
| | from yolox.exp import get_exp
|
| |
|
| | import argparse
|
| | import random
|
| | import warnings
|
| |
|
| |
|
| | def make_parser():
|
| | parser = argparse.ArgumentParser("YOLOX train parser")
|
| | parser.add_argument("-expn", "--experiment-name", type=str, default=None)
|
| | parser.add_argument("-n", "--name", type=str, default=None, help="model name")
|
| |
|
| |
|
| | parser.add_argument(
|
| | "--dist-backend", default="nccl", type=str, help="distributed backend"
|
| | )
|
| | parser.add_argument(
|
| | "--dist-url",
|
| | default=None,
|
| | type=str,
|
| | help="url used to set up distributed training",
|
| | )
|
| | parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
|
| | parser.add_argument(
|
| | "-d", "--devices", default=None, type=int, help="device for training"
|
| | )
|
| | parser.add_argument(
|
| | "--local_rank", default=0, type=int, help="local rank for dist training"
|
| | )
|
| | parser.add_argument(
|
| | "-f",
|
| | "--exp_file",
|
| | default=None,
|
| | type=str,
|
| | help="plz input your expriment description file",
|
| | )
|
| | parser.add_argument(
|
| | "--resume", default=False, action="store_true", help="resume training"
|
| | )
|
| | parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
|
| | parser.add_argument(
|
| | "-e",
|
| | "--start_epoch",
|
| | default=None,
|
| | type=int,
|
| | help="resume training start epoch",
|
| | )
|
| | parser.add_argument(
|
| | "--num_machines", default=1, type=int, help="num of node for training"
|
| | )
|
| | parser.add_argument(
|
| | "--machine_rank", default=0, type=int, help="node rank for multi-node training"
|
| | )
|
| | parser.add_argument(
|
| | "--fp16",
|
| | dest="fp16",
|
| | default=True,
|
| | action="store_true",
|
| | help="Adopting mix precision training.",
|
| | )
|
| | parser.add_argument(
|
| | "-o",
|
| | "--occupy",
|
| | dest="occupy",
|
| | default=False,
|
| | action="store_true",
|
| | help="occupy GPU memory first for training.",
|
| | )
|
| | parser.add_argument(
|
| | "opts",
|
| | help="Modify config options using the command-line",
|
| | default=None,
|
| | nargs=argparse.REMAINDER,
|
| | )
|
| | return parser
|
| |
|
| |
|
| | @logger.catch
|
| | def main(exp, args):
|
| | if exp.seed is not None:
|
| | random.seed(exp.seed)
|
| | torch.manual_seed(exp.seed)
|
| | cudnn.deterministic = True
|
| | warnings.warn(
|
| | "You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
|
| | "which can slow down your training considerably! You may see unexpected behavior "
|
| | "when restarting from checkpoints."
|
| | )
|
| |
|
| |
|
| | cudnn.benchmark = True
|
| |
|
| | trainer = Trainer(exp, args)
|
| | trainer.train()
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| | args = make_parser().parse_args()
|
| | exp = get_exp(args.exp_file, args.name)
|
| | exp.merge(args.opts)
|
| |
|
| | if not args.experiment_name:
|
| | args.experiment_name = exp.exp_name
|
| |
|
| | num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
|
| | assert num_gpu <= torch.cuda.device_count()
|
| |
|
| | launch(
|
| | main,
|
| | num_gpu,
|
| | args.num_machines,
|
| | args.machine_rank,
|
| | backend=args.dist_backend,
|
| | dist_url=args.dist_url,
|
| | args=(exp, args),
|
| | )
|
| |
|