From 2aaef2e9c863cf421ede75ae2cc28e8125a05306 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Mon, 14 Sep 2020 10:12:55 +0000 Subject: [PATCH 1/6] add srmodel --- ppgan/datasets/__init__.py | 1 + ppgan/datasets/base_dataset.py | 5 ++- ppgan/datasets/builder.py | 4 ++ ppgan/engine/trainer.py | 58 ++++++++++++++++++++++++++++- ppgan/models/__init__.py | 2 + ppgan/models/generators/__init__.py | 3 +- ppgan/utils/visual.py | 4 +- 7 files changed, 72 insertions(+), 5 deletions(-) diff --git a/ppgan/datasets/__init__.py b/ppgan/datasets/__init__.py index 9b807e9be0c83..0aeb70936b581 100644 --- a/ppgan/datasets/__init__.py +++ b/ppgan/datasets/__init__.py @@ -1,3 +1,4 @@ from .unpaired_dataset import UnpairedDataset from .single_dataset import SingleDataset from .paired_dataset import PairedDataset +from .sr_image_dataset import SRImageDataset \ No newline at end of file diff --git a/ppgan/datasets/base_dataset.py b/ppgan/datasets/base_dataset.py index 87e996925477c..c0036b40d4413 100644 --- a/ppgan/datasets/base_dataset.py +++ b/ppgan/datasets/base_dataset.py @@ -95,6 +95,9 @@ def get_transform(cfg, if convert: transform_list += [transforms.Permute(to_rgb=True)] transform_list += [ - transforms.Normalize((127.5, 127.5, 127.5), (127.5, 127.5, 127.5)) + transforms.Normalize((0., 0., 0.), (255., 255., 255.)) ] + # transform_list += [ + # transforms.Normalize((127.5, 127.5, 127.5), (127.5, 127.5, 127.5)) + # ] return transforms.Compose(transform_list) diff --git a/ppgan/datasets/builder.py b/ppgan/datasets/builder.py index 62b5346795c13..284c774214371 100644 --- a/ppgan/datasets/builder.py +++ b/ppgan/datasets/builder.py @@ -111,4 +111,8 @@ def build_dataloader(cfg, is_train=True): dataloader = DictDataLoader(dataset, batch_size, is_train, num_workers) + # for i, item in enumerate(dataloader): + # print(i, item.keys()) + # # break + # print('dataset build success!') return dataloader diff --git a/ppgan/engine/trainer.py b/ppgan/engine/trainer.py index f7f456962e61c..2e1f839fdeff9 100644 --- a/ppgan/engine/trainer.py +++ b/ppgan/engine/trainer.py @@ -1,5 +1,6 @@ import os import time +import copy import logging import paddle @@ -10,7 +11,7 @@ from ..models.builder import build_model from ..utils.visual import tensor2img, save_image from ..utils.filesystem import save, load, makedirs - +from ..metric.psnr_ssim import calculate_psnr, calculate_ssim class Trainer: def __init__(self, cfg): @@ -45,9 +46,11 @@ def __init__(self, cfg): # time count self.time_count = {} + self.best_metric = {} + def distributed_data_parallel(self): - strategy = paddle.prepare_context() + strategy = paddle.distributed.prepare_context() for name in self.model.model_names: if isinstance(name, str): net = getattr(self.model, 'net' + name) @@ -78,11 +81,61 @@ def train(self): step_start_time = time.time() self.logger.info('train one epoch time: {}'.format(time.time() - start_time)) + self.validate() self.model.lr_scheduler.step() if epoch % self.weight_interval == 0: self.save(epoch, 'weight', keep=-1) self.save(epoch) + def validate(self): + if not hasattr(self, 'val_dataloader'): + self.val_dataloader = build_dataloader(self.cfg.dataset.val, is_train=False) + + metric_result = {} + + for i, data in enumerate(self.val_dataloader): + self.batch_id = i + + self.model.set_input(data) + self.model.test() + + visual_results = {} + current_paths = self.model.get_image_paths() + current_visuals = self.model.get_current_visuals() + + # print('debug1:', self.cfg.validate.metrics) + for j in range(len(current_paths)): + short_path = os.path.basename(current_paths[j]) + basename = os.path.splitext(short_path)[0] + for k, img_tensor in current_visuals.items(): + name = '%s_%s' % (basename, k) + visual_results.update({name: img_tensor[j]}) + # print('debug2:', self.cfg.validate.metrics) + if 'psnr' in self.cfg.validate.metrics: + # args = copy.deepcopy(self.cfg.validate.metrics.pnsr) + # args.pop('name') + if 'psnr' not in metric_result: + metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.psnr) + else: + metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.psnr) + if 'ssim' in self.cfg.validate.metrics: + if 'ssim' not in metric_result: + metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.ssim) + else: + metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.ssim) + + self.visual('visual_val', visual_results=visual_results) + + if i % self.log_interval == 0: + self.logger.info('val iter: [%d/%d]' % + (i, len(self.val_dataloader))) + + for metric_name in metric_result.keys(): + metric_result[metric_name] /= len(self.val_dataloader.dataset) + + self.logger.info('Epoch {} validate end: {}'.format(self.current_epoch, metric_result)) + + def test(self): if not hasattr(self, 'test_dataloader'): self.test_dataloader = build_dataloader(self.cfg.dataset.test, @@ -210,5 +263,6 @@ def load(self, weight_path): for name in self.model.model_names: if isinstance(name, str): + self.logger.info('laod model {} {} params!'.format(self.cfg.model.name, 'net' + name)) net = getattr(self.model, 'net' + name) net.set_dict(state_dicts['net' + name]) diff --git a/ppgan/models/__init__.py b/ppgan/models/__init__.py index 621e8edf33783..1fb4e96098b6e 100644 --- a/ppgan/models/__init__.py +++ b/ppgan/models/__init__.py @@ -1,4 +1,6 @@ from .base_model import BaseModel from .cycle_gan_model import CycleGANModel from .pix2pix_model import Pix2PixModel +from .srgan_model import SRGANModel +from .sr_model import SRModel diff --git a/ppgan/models/generators/__init__.py b/ppgan/models/generators/__init__.py index 840d716a13990..15ac59d156f85 100644 --- a/ppgan/models/generators/__init__.py +++ b/ppgan/models/generators/__init__.py @@ -1,2 +1,3 @@ from .resnet import ResnetGenerator -from .unet import UnetGenerator \ No newline at end of file +from .unet import UnetGenerator +from .rrdb_net import RRDBNet \ No newline at end of file diff --git a/ppgan/utils/visual.py b/ppgan/utils/visual.py index a50c59eb673a3..56639acb52b68 100644 --- a/ppgan/utils/visual.py +++ b/ppgan/utils/visual.py @@ -15,7 +15,9 @@ def tensor2img(input_image, imtype=np.uint8): image_numpy = image_numpy[0] if image_numpy.shape[0] == 1: # grayscale to RGB image_numpy = np.tile(image_numpy, (3, 1, 1)) - image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 # post-processing: tranpose and scaling + # image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 # post-processing: tranpose and scaling + image_numpy = image_numpy.clip(0, 1) + image_numpy = (np.transpose(image_numpy, (1, 2, 0))) * 255.0 # post-processing: tranpose and scaling else: # if it is a numpy array, do nothing image_numpy = input_image return image_numpy.astype(imtype) From d78530cf9c82a0a495f3cbf3846d7ed4879c34d8 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Wed, 16 Sep 2020 10:50:15 +0000 Subject: [PATCH 2/6] move some model to ppgan --- applications/DAIN/predict.py | 6 +- applications/DAIN/util.py | 60 --- applications/DeOldify/predict.py | 54 +-- applications/DeOldify/spectral_norm.py | 63 --- applications/DeepRemaster/predict.py | 2 +- applications/EDVR/predict.py | 53 +-- applications/RealSR/predict.py | 54 +-- configs/cyclegan_cityscapes.yaml | 10 + configs/cyclegan_horse2zebra.yaml | 11 +- configs/pix2pix_cityscapes.yaml | 10 + configs/pix2pix_cityscapes_2gpus.yaml | 10 + configs/pix2pix_facades.yaml | 10 + ppgan/datasets/base_dataset.py | 11 +- ppgan/datasets/sr_image_dataset.py | 243 +++++++++++ ppgan/engine/trainer.py | 23 +- ppgan/metric/metric_util.py | 78 ++++ ppgan/metric/psnr_ssim.py | 137 +++++++ ppgan/models/backbones/__init__.py | 1 + .../models/backbones}/resnet_backbone.py | 0 .../models/generators/deoldify.py | 21 +- .../models/generators}/hook.py | 0 .../models/generators/remaster.py | 0 .../models/generators/rrdb_net.py | 12 +- ppgan/models/sr_model.py | 247 +++++++++++ ppgan/models/srgan_model.py | 388 ++++++++++++++++++ ppgan/modules/nn.py | 68 ++- ppgan/utils/video.py | 44 ++ ppgan/utils/visual.py | 6 +- requirments.txt | 1 + setup.py | 49 +++ 30 files changed, 1346 insertions(+), 326 deletions(-) delete mode 100644 applications/DeOldify/spectral_norm.py create mode 100644 ppgan/datasets/sr_image_dataset.py create mode 100644 ppgan/metric/metric_util.py create mode 100644 ppgan/metric/psnr_ssim.py create mode 100644 ppgan/models/backbones/__init__.py rename {applications/DeOldify => ppgan/models/backbones}/resnet_backbone.py (100%) rename applications/DeOldify/model.py => ppgan/models/generators/deoldify.py (96%) rename {applications/DeOldify => ppgan/models/generators}/hook.py (100%) rename applications/DeepRemaster/remasternet.py => ppgan/models/generators/remaster.py (100%) rename applications/RealSR/sr_model.py => ppgan/models/generators/rrdb_net.py (91%) create mode 100644 ppgan/models/sr_model.py create mode 100644 ppgan/models/srgan_model.py create mode 100644 ppgan/utils/video.py create mode 100644 requirments.txt create mode 100644 setup.py diff --git a/applications/DAIN/predict.py b/applications/DAIN/predict.py index 38c1d6baa4c83..f43a794150d52 100644 --- a/applications/DAIN/predict.py +++ b/applications/DAIN/predict.py @@ -13,8 +13,8 @@ import paddle.fluid as fluid from paddle.utils.download import get_path_from_url +from ppgan.utils.video import video2frames, frames2video -import networks from util import * from my_args import parser @@ -129,7 +129,7 @@ def run(self): r2 = str(int(fps) * times_interp) print("New fps (frame rate): ", r2) - out_path = dump_frames_ffmpeg(vid, frame_path_input) + out_path = video2frames(vid, frame_path_input) vidname = vid.split('/')[-1].split('.')[0] @@ -266,7 +266,7 @@ def run(self): vidname + '.mp4') if os.path.exists(video_pattern_output): os.remove(video_pattern_output) - frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, + frames2video(frame_pattern_combined, video_pattern_output, r2) return frame_pattern_combined, video_pattern_output diff --git a/applications/DAIN/util.py b/applications/DAIN/util.py index 3efbfe0dc7cac..24ea274151766 100644 --- a/applications/DAIN/util.py +++ b/applications/DAIN/util.py @@ -21,66 +21,6 @@ def update(self, val, n=1): self.avg = self.sum / self.count -def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - vid_name = vid_path.split('/')[-1].split('.')[0] - out_full_path = os.path.join(outpath, vid_name) - - if not os.path.exists(out_full_path): - os.makedirs(out_full_path) - - # video file name - outformat = out_full_path + '/%08d.png' - - if ss is not None and t is not None and r is not None: - cmd = ffmpeg + [ - ' -ss ', - ss, - ' -t ', - t, - ' -i ', - vid_path, - ' -r ', - r, - # ' -f ', ' image2 ', - # ' -s ', ' 960*540 ', - ' -qscale:v ', - ' 0.1 ', - ' -start_number ', - ' 0 ', - # ' -qmax ', ' 1 ', - outformat - ] - else: - cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat] - - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(vid_name)) - - sys.stdout.flush() - return out_full_path - - -def frames_to_video_ffmpeg(framepath, videopath, r): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - cmd = ffmpeg + [ - ' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ', - ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath - ] - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(videopath)) - - sys.stdout.flush() - - def combine_frames(input, interpolated, combined, num_frames): frames1 = sorted(glob.glob(os.path.join(input, '*.png'))) frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png'))) diff --git a/applications/DeOldify/predict.py b/applications/DeOldify/predict.py index ce637fefb2c49..35ca9c9ab5351 100644 --- a/applications/DeOldify/predict.py +++ b/applications/DeOldify/predict.py @@ -14,8 +14,10 @@ from PIL import Image from tqdm import tqdm from paddle import fluid -from model import build_model from paddle.utils.download import get_path_from_url +from ppgan.utils.video import frames2video, video2frames +from ppgan.models.generators.deoldify import build_model + parser = argparse.ArgumentParser(description='DeOldify') parser.add_argument('--input', type=str, default='none', help='Input video') @@ -32,22 +34,6 @@ DeOldify_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' -def frames_to_video_ffmpeg(framepath, videopath, r): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - cmd = ffmpeg + [ - ' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ', - ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath - ] - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(videopath)) - - sys.stdout.flush() - - class DeOldifyPredictor(): def __init__(self, input, @@ -127,7 +113,7 @@ def run(self): cap = cv2.VideoCapture(vid) fps = cap.get(cv2.CAP_PROP_FPS) - out_path = dump_frames_ffmpeg(vid, output_path) + out_path = video2frames(vid, output_path) frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) @@ -141,42 +127,12 @@ def run(self): vid_out_path = os.path.join(output_path, '{}_deoldify_out.mp4'.format(base_name)) - frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) return frame_pattern_combined, vid_out_path -def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - vid_name = vid_path.split('/')[-1].split('.')[0] - out_full_path = os.path.join(outpath, 'frames_input') - - if not os.path.exists(out_full_path): - os.makedirs(out_full_path) - - # video file name - outformat = out_full_path + '/%08d.png' - - if ss is not None and t is not None and r is not None: - cmd = ffmpeg + [ - ' -ss ', ss, ' -t ', t, ' -i ', vid_path, ' -r ', r, ' -qscale:v ', - ' 0.1 ', ' -start_number ', ' 0 ', outformat - ] - else: - cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat] - - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(vid_name)) - - sys.stdout.flush() - return out_full_path - - if __name__ == '__main__': paddle.disable_static() args = parser.parse_args() diff --git a/applications/DeOldify/spectral_norm.py b/applications/DeOldify/spectral_norm.py deleted file mode 100644 index 81500a51d48c4..0000000000000 --- a/applications/DeOldify/spectral_norm.py +++ /dev/null @@ -1,63 +0,0 @@ -import numpy as np -from paddle import fluid -from paddle.fluid import dygraph -from paddle.fluid import layers as F -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.data_feeder import check_variable_and_dtype - -import paddle -import paddle.nn as nn - -class _SpectralNorm(nn.SpectralNorm): - def __init__(self, - weight_shape, - dim=0, - power_iters=1, - eps=1e-12, - dtype='float32'): - super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype) - - def forward(self, weight): - check_variable_and_dtype(weight, "weight", ['float32', 'float64'], - 'SpectralNorm') - inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} - out = self._helper.create_variable_for_type_inference(self._dtype) - _power_iters = self._power_iters if self.training else 0 - self._helper.append_op( - type="spectral_norm", - inputs=inputs, - outputs={"Out": out, }, - attrs={ - "dim": self._dim, - "power_iters": _power_iters, #self._power_iters, - "eps": self._eps, - }) - - return out - - -class Spectralnorm(nn.Layer): - - def __init__(self, - layer, - dim=0, - power_iters=1, - eps=1e-12, - dtype='float32'): - super(Spectralnorm, self).__init__() - self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype) - self.dim = dim - self.power_iters = power_iters - self.eps = eps - self.layer = layer - weight = layer._parameters['weight'] - del layer._parameters['weight'] - self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype) - self.weight_orig.set_value(weight) - - - def forward(self, x): - weight = self.spectral_norm(self.weight_orig) - self.layer.weight = weight - out = self.layer(x) - return out diff --git a/applications/DeepRemaster/predict.py b/applications/DeepRemaster/predict.py index 3ad54b31eff7b..8a4777fbf3ca1 100644 --- a/applications/DeepRemaster/predict.py +++ b/applications/DeepRemaster/predict.py @@ -14,7 +14,7 @@ import argparse import subprocess import utils -from remasternet import NetworkR, NetworkC +from ppgan.models.generators.remaster import NetworkR, NetworkC from paddle.utils.download import get_path_from_url DeepRemaster_weight_url = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams' diff --git a/applications/EDVR/predict.py b/applications/EDVR/predict.py index 11ab8928e877b..8a7f5aa6adc6a 100644 --- a/applications/EDVR/predict.py +++ b/applications/EDVR/predict.py @@ -30,6 +30,7 @@ from tqdm import tqdm from data import EDVRDataset from paddle.utils.download import get_path_from_url +from ppgan.utils.video import frames2video, video2frames EDVR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' @@ -71,52 +72,6 @@ def save_img(img, framename): cv2.imwrite(framename, img) -def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - vid_name = vid_path.split('/')[-1].split('.')[0] - out_full_path = os.path.join(outpath, 'frames_input') - - if not os.path.exists(out_full_path): - os.makedirs(out_full_path) - - # video file name - outformat = out_full_path + '/%08d.png' - - if ss is not None and t is not None and r is not None: - cmd = ffmpeg + [ - ' -ss ', ss, ' -t ', t, ' -i ', vid_path, ' -r ', r, ' -qscale:v ', - ' 0.1 ', ' -start_number ', ' 0 ', outformat - ] - else: - cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat] - - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(vid_name)) - - sys.stdout.flush() - return out_full_path - - -def frames_to_video_ffmpeg(framepath, videopath, r): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - cmd = ffmpeg + [ - ' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ', - ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath - ] - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(videopath)) - - sys.stdout.flush() - - class EDVRPredictor: def __init__(self, input, output, weight_path=None): self.input = input @@ -129,8 +84,6 @@ def __init__(self, input, output, weight_path=None): if weight_path is None: weight_path = get_path_from_url(EDVR_weight_url, cur_path) - print(weight_path) - model_filename = 'EDVR_model.pdmodel' params_filename = 'EDVR_params.pdparams' @@ -155,7 +108,7 @@ def run(self): cap = cv2.VideoCapture(vid) fps = cap.get(cv2.CAP_PROP_FPS) - out_path = dump_frames_ffmpeg(vid, output_path) + out_path = video2frames(vid, output_path) frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) @@ -188,7 +141,7 @@ def run(self): frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') vid_out_path = os.path.join(self.output, '{}_edvr_out.mp4'.format(base_name)) - frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) return frame_pattern_combined, vid_out_path diff --git a/applications/RealSR/predict.py b/applications/RealSR/predict.py index a05bf788e0f11..e74e0ca0b0b16 100644 --- a/applications/RealSR/predict.py +++ b/applications/RealSR/predict.py @@ -13,7 +13,9 @@ from PIL import Image from tqdm import tqdm -from sr_model import RRDBNet + +from ppgan.models.generators import RRDBNet +from ppgan.utils.video import frames2video, video2frames from paddle.utils.download import get_path_from_url parser = argparse.ArgumentParser(description='RealSR') @@ -27,22 +29,6 @@ RealSR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams' -def frames_to_video_ffmpeg(framepath, videopath, r): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - cmd = ffmpeg + [ - ' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ', - ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath - ] - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(videopath)) - - sys.stdout.flush() - - class RealSRPredictor(): def __init__(self, input, output, batch_size=1, weight_path=None): self.input = input @@ -88,7 +74,7 @@ def run(self): cap = cv2.VideoCapture(vid) fps = cap.get(cv2.CAP_PROP_FPS) - out_path = dump_frames_ffmpeg(vid, output_path) + out_path = video2frames(vid, output_path) frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) @@ -102,42 +88,12 @@ def run(self): vid_out_path = os.path.join(output_path, '{}_realsr_out.mp4'.format(base_name)) - frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) return frame_pattern_combined, vid_out_path -def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None): - ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] - vid_name = vid_path.split('/')[-1].split('.')[0] - out_full_path = os.path.join(outpath, 'frames_input') - - if not os.path.exists(out_full_path): - os.makedirs(out_full_path) - - # video file name - outformat = out_full_path + '/%08d.png' - - if ss is not None and t is not None and r is not None: - cmd = ffmpeg + [ - ' -ss ', ss, ' -t ', t, ' -i ', vid_path, ' -r ', r, ' -qscale:v ', - ' 0.1 ', ' -start_number ', ' 0 ', outformat - ] - else: - cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat] - - cmd = ''.join(cmd) - - if os.system(cmd) == 0: - pass - else: - print('ffmpeg process video: {} error'.format(vid_name)) - - sys.stdout.flush() - return out_full_path - - if __name__ == '__main__': paddle.disable_static() args = parser.parse_args() diff --git a/configs/cyclegan_cityscapes.yaml b/configs/cyclegan_cityscapes.yaml index c4facd8bc817f..f74d9e3bdd35e 100644 --- a/configs/cyclegan_cityscapes.yaml +++ b/configs/cyclegan_cityscapes.yaml @@ -41,6 +41,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: False + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) test: name: SingleDataset dataroot: data/cityscapes/testB @@ -55,6 +60,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: True + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) optimizer: diff --git a/configs/cyclegan_horse2zebra.yaml b/configs/cyclegan_horse2zebra.yaml index 1ea5c6d1687c3..0e845bd518342 100644 --- a/configs/cyclegan_horse2zebra.yaml +++ b/configs/cyclegan_horse2zebra.yaml @@ -40,6 +40,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: False + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) test: name: SingleDataset dataroot: data/horse2zebra/testA @@ -54,7 +59,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: True - + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) optimizer: name: Adam diff --git a/configs/pix2pix_cityscapes.yaml b/configs/pix2pix_cityscapes.yaml index 06577f7f1a503..5919ff2e5a5c2 100644 --- a/configs/pix2pix_cityscapes.yaml +++ b/configs/pix2pix_cityscapes.yaml @@ -38,6 +38,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: False + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) test: name: PairedDataset dataroot: data/cityscapes/ @@ -53,6 +58,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: True + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) optimizer: name: Adam diff --git a/configs/pix2pix_cityscapes_2gpus.yaml b/configs/pix2pix_cityscapes_2gpus.yaml index a64b57a8c5e7b..20f494c6fb136 100644 --- a/configs/pix2pix_cityscapes_2gpus.yaml +++ b/configs/pix2pix_cityscapes_2gpus.yaml @@ -37,6 +37,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: False + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) test: name: PairedDataset dataroot: data/cityscapes/ @@ -52,6 +57,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: True + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) optimizer: name: Adam diff --git a/configs/pix2pix_facades.yaml b/configs/pix2pix_facades.yaml index ede78386fdd09..31b5f145dccdf 100644 --- a/configs/pix2pix_facades.yaml +++ b/configs/pix2pix_facades.yaml @@ -37,6 +37,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: False + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) test: name: PairedDataset dataroot: data/facades/ @@ -52,6 +57,11 @@ dataset: crop_size: 256 preprocess: resize_and_crop no_flip: True + normalize: + mean: + (127.5, 127.5, 127.5) + std: + (127.5, 127.5, 127.5) optimizer: name: Adam diff --git a/ppgan/datasets/base_dataset.py b/ppgan/datasets/base_dataset.py index c0036b40d4413..fe93c71e718fa 100644 --- a/ppgan/datasets/base_dataset.py +++ b/ppgan/datasets/base_dataset.py @@ -94,10 +94,9 @@ def get_transform(cfg, if convert: transform_list += [transforms.Permute(to_rgb=True)] - transform_list += [ - transforms.Normalize((0., 0., 0.), (255., 255., 255.)) - ] - # transform_list += [ - # transforms.Normalize((127.5, 127.5, 127.5), (127.5, 127.5, 127.5)) - # ] + if cfg.get('normalize', None): + transform_list += [ + transforms.Normalize(cfg.normalize.mean, cfg.normalize.std) + ] + return transforms.Compose(transform_list) diff --git a/ppgan/datasets/sr_image_dataset.py b/ppgan/datasets/sr_image_dataset.py new file mode 100644 index 0000000000000..977d2657ce10b --- /dev/null +++ b/ppgan/datasets/sr_image_dataset.py @@ -0,0 +1,243 @@ +# import mmcv +import os +import cv2 +import random +import numpy as np +import paddle.vision.transforms as transform + +from pathlib import Path +from paddle.io import Dataset +from .builder import DATASETS + + +def scandir(dir_path, suffix=None, recursive=False): + """Scan a directory to find the interested files. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + root = dir_path + + def _scandir(dir_path, suffix, recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = os.path.relpath(entry.path, root) + if suffix is None: + yield rel_path + elif rel_path.endswith(suffix): + yield rel_path + else: + if recursive: + yield from _scandir( + entry.path, suffix=suffix, recursive=recursive) + else: + continue + + return _scandir(dir_path, suffix=suffix, recursive=recursive) + +def paired_paths_from_folder(folders, keys, filename_tmpl): + """Generate paired paths from folders. + """ + assert len(folders) == 2, ( + 'The len of folders should be 2 with [input_folder, gt_folder]. ' + f'But got {len(folders)}') + assert len(keys) == 2, ( + 'The len of keys should be 2 with [input_key, gt_key]. ' + f'But got {len(keys)}') + input_folder, gt_folder = folders + input_key, gt_key = keys + + input_paths = list(scandir(input_folder)) + gt_paths = list(scandir(gt_folder)) + assert len(input_paths) == len(gt_paths), ( + f'{input_key} and {gt_key} datasets have different number of images: ' + f'{len(input_paths)}, {len(gt_paths)}.') + paths = [] + for gt_path in gt_paths: + basename, ext = os.path.splitext(os.path.basename(gt_path)) + input_name = f'{filename_tmpl.format(basename)}{ext}' + input_path = os.path.join(input_folder, input_name) + assert input_name in input_paths, (f'{input_name} is not in ' + f'{input_key}_paths.') + gt_path = os.path.join(gt_folder, gt_path) + paths.append( + dict([(f'{input_key}_path', input_path), + (f'{gt_key}_path', gt_path)])) + return paths + +def paired_random_crop(img_gts, img_lqs, gt_patch_size, scale, gt_path): + """Paired random crop. + + It crops lists of lq and gt images with corresponding locations. + + Args: + img_gts (list[ndarray] | ndarray): GT images. Note that all images + should have the same shape. If the input is an ndarray, it will + be transformed to a list containing itself. + img_lqs (list[ndarray] | ndarray): LQ images. Note that all images + should have the same shape. If the input is an ndarray, it will + be transformed to a list containing itself. + gt_patch_size (int): GT patch size. + scale (int): Scale factor. + gt_path (str): Path to ground-truth. + + Returns: + list[ndarray] | ndarray: GT images and LQ images. If returned results + only have one element, just return ndarray. + """ + + if not isinstance(img_gts, list): + img_gts = [img_gts] + if not isinstance(img_lqs, list): + img_lqs = [img_lqs] + + h_lq, w_lq, _ = img_lqs[0].shape + h_gt, w_gt, _ = img_gts[0].shape + lq_patch_size = gt_patch_size // scale + + if h_gt != h_lq * scale or w_gt != w_lq * scale: + raise ValueError( + f'Scale mismatches. GT ({h_gt}, {w_gt}) is not {scale}x ', + f'multiplication of LQ ({h_lq}, {w_lq}).') + if h_lq < lq_patch_size or w_lq < lq_patch_size: + raise ValueError(f'LQ ({h_lq}, {w_lq}) is smaller than patch size ' + f'({lq_patch_size}, {lq_patch_size}). ' + f'Please remove {gt_path}.') + + # randomly choose top and left coordinates for lq patch + top = random.randint(0, h_lq - lq_patch_size) + left = random.randint(0, w_lq - lq_patch_size) + + # crop lq patch + img_lqs = [ + v[top:top + lq_patch_size, left:left + lq_patch_size, ...] + for v in img_lqs + ] + + # crop corresponding gt patch + top_gt, left_gt = int(top * scale), int(left * scale) + img_gts = [ + v[top_gt:top_gt + gt_patch_size, left_gt:left_gt + gt_patch_size, ...] + for v in img_gts + ] + if len(img_gts) == 1: + img_gts = img_gts[0] + if len(img_lqs) == 1: + img_lqs = img_lqs[0] + return img_gts, img_lqs + + +def augment(imgs, hflip=True, rotation=True, flows=None): + """Augment: horizontal flips OR rotate (0, 90, 180, 270 degrees). + """ + hflip = hflip and random.random() < 0.5 + vflip = rotation and random.random() < 0.5 + rot90 = rotation and random.random() < 0.5 + + def _augment(img): + if hflip: + cv2.flip(img, 1, img) + if vflip: + cv2.flip(img, 0, img) + if rot90: + img = img.transpose(1, 0, 2) + return img + + def _augment_flow(flow): + if hflip: + cv2.flip(flow, 1, flow) + flow[:, :, 0] *= -1 + if vflip: + cv2.flip(flow, 0, flow) + flow[:, :, 1] *= -1 + if rot90: + flow = flow.transpose(1, 0, 2) + flow = flow[:, :, [1, 0]] + return flow + + if not isinstance(imgs, list): + imgs = [imgs] + imgs = [_augment(img) for img in imgs] + if len(imgs) == 1: + imgs = imgs[0] + + if flows is not None: + if not isinstance(flows, list): + flows = [flows] + flows = [_augment_flow(flow) for flow in flows] + if len(flows) == 1: + flows = flows[0] + return imgs, flows + else: + return imgs + + +@DATASETS.register() +class SRImageDataset(Dataset): + """Paired image dataset for image restoration.""" + + def __init__(self, cfg): + super(SRImageDataset, self).__init__() + self.cfg = cfg + + self.file_client = None + self.io_backend_opt = cfg['io_backend'] + + self.gt_folder, self.lq_folder = cfg['dataroot_gt'], cfg['dataroot_lq'] + if 'filename_tmpl' in cfg: + self.filename_tmpl = cfg['filename_tmpl'] + else: + self.filename_tmpl = '{}' + + if self.io_backend_opt['type'] == 'lmdb': + #TODO: LielinJiang support lmdb to accelerate io + pass + elif 'meta_info_file' in self.cfg and self.cfg[ + 'meta_info_file'] is not None: + #TODO: LielinJiang support lmdb to accelerate io + pass + else: + self.paths = paired_paths_from_folder( + [self.lq_folder, self.gt_folder], ['lq', 'gt'], + self.filename_tmpl) + + def __getitem__(self, index): + scale = self.cfg['scale'] + + # Load gt and lq images. Dimension order: HWC; channel order: BGR; + # image range: [0, 1], float32. + gt_path = self.paths[index]['gt_path'] + lq_path = self.paths[index]['lq_path'] + + img_gt = cv2.imread(gt_path).astype(np.float32) / 255. + img_lq = cv2.imread(lq_path).astype(np.float32) / 255. + + # augmentation for training + if self.cfg['phase'] == 'train': + gt_size = self.cfg['gt_size'] + # random crop + img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, + gt_path) + # flip, rotation + img_gt, img_lq = augment([img_gt, img_lq], self.cfg['use_flip'], + self.cfg['use_rot']) + + # TODO: color space transform + # BGR to RGB, HWC to CHW, numpy to tensor + permute = transform.Permute() + img_gt = permute(img_gt) + img_lq = permute(img_lq) + return { + 'lq': img_lq, + 'gt': img_gt, + 'lq_path': lq_path, + 'gt_path': gt_path + } + + def __len__(self): + return len(self.paths) diff --git a/ppgan/engine/trainer.py b/ppgan/engine/trainer.py index 2e1f839fdeff9..650aab765e484 100644 --- a/ppgan/engine/trainer.py +++ b/ppgan/engine/trainer.py @@ -40,6 +40,9 @@ def __init__(self, cfg): self.weight_interval = cfg.snapshot_config.interval self.log_interval = cfg.log_config.interval self.visual_interval = cfg.log_config.visiual_interval + self.validate_interval = -1 + if cfg.get('validate', None) is not None: + self.validate_interval = cfg.validate.get('interval', -1) self.cfg = cfg self.local_rank = ParallelEnv().local_rank @@ -81,7 +84,8 @@ def train(self): step_start_time = time.time() self.logger.info('train one epoch time: {}'.format(time.time() - start_time)) - self.validate() + if self.validate_interval > -1 and epoch % self.validate_interval: + self.validate() self.model.lr_scheduler.step() if epoch % self.weight_interval == 0: self.save(epoch, 'weight', keep=-1) @@ -103,26 +107,22 @@ def validate(self): current_paths = self.model.get_image_paths() current_visuals = self.model.get_current_visuals() - # print('debug1:', self.cfg.validate.metrics) for j in range(len(current_paths)): short_path = os.path.basename(current_paths[j]) basename = os.path.splitext(short_path)[0] for k, img_tensor in current_visuals.items(): name = '%s_%s' % (basename, k) visual_results.update({name: img_tensor[j]}) - # print('debug2:', self.cfg.validate.metrics) if 'psnr' in self.cfg.validate.metrics: - # args = copy.deepcopy(self.cfg.validate.metrics.pnsr) - # args.pop('name') if 'psnr' not in metric_result: - metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.psnr) + metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr) else: - metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.psnr) + metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr) if 'ssim' in self.cfg.validate.metrics: if 'ssim' not in metric_result: - metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.ssim) + metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim) else: - metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.ssim) + metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim) self.visual('visual_val', visual_results=visual_results) @@ -200,8 +200,11 @@ def visual(self, results_dir, visual_results=None): msg = '' makedirs(os.path.join(self.output_dir, results_dir)) + min_max = self.cfg.get('min_max', None) + if min_max is None: + min_max = (-1., 1.) for label, image in visual_results.items(): - image_numpy = tensor2img(image) + image_numpy = tensor2img(image, min_max) img_path = os.path.join(self.output_dir, results_dir, msg + '%s.png' % (label)) save_image(image_numpy, img_path) diff --git a/ppgan/metric/metric_util.py b/ppgan/metric/metric_util.py new file mode 100644 index 0000000000000..d81c2f64086b2 --- /dev/null +++ b/ppgan/metric/metric_util.py @@ -0,0 +1,78 @@ +import numpy as np + + +def reorder_image(img, input_order='HWC'): + """Reorder images to 'HWC' order. + + If the input_order is (h, w), return (h, w, 1); + If the input_order is (c, h, w), return (h, w, c); + If the input_order is (h, w, c), return as it is. + + Args: + img (ndarray): Input image. + input_order (str): Whether the input order is 'HWC' or 'CHW'. + If the input image shape is (h, w), input_order will not have + effects. Default: 'HWC'. + + Returns: + ndarray: reordered image. + """ + + if input_order not in ['HWC', 'CHW']: + raise ValueError( + f'Wrong input_order {input_order}. Supported input_orders are ' + "'HWC' and 'CHW'") + if len(img.shape) == 2: + img = img[..., None] + return img + if input_order == 'CHW': + img = img.transpose(1, 2, 0) + return img + +def bgr2ycbcr(img, y_only=False): + """Convert a BGR image to YCbCr image. + + The bgr version of rgb2ycbcr. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 + else: + out_img = np.matmul( + img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], + [65.481, -37.797, 112.0]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + +def to_y_channel(img): + """Change to Y channel of YCbCr. + + Args: + img (ndarray): Images with range [0, 255]. + + Returns: + (ndarray): Images with range [0, 255] (float type) without round. + """ + img = img.astype(np.float32) / 255. + if img.ndim == 3 and img.shape[2] == 3: + img = bgr2ycbcr(img, y_only=True) + img = img[..., None] + return img * 255. diff --git a/ppgan/metric/psnr_ssim.py b/ppgan/metric/psnr_ssim.py new file mode 100644 index 0000000000000..65cfde310cc1e --- /dev/null +++ b/ppgan/metric/psnr_ssim.py @@ -0,0 +1,137 @@ +import cv2 +import numpy as np + +from .metric_util import reorder_image, to_y_channel + + +def calculate_psnr(img1, + img2, + crop_border, + input_order='HWC', + test_y_channel=False): + """Calculate PSNR (Peak Signal-to-Noise Ratio). + + Ref: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio + + Args: + img1 (ndarray): Images with range [0, 255]. + img2 (ndarray): Images with range [0, 255]. + crop_border (int): Cropped pixels in each edge of an image. These + pixels are not involved in the PSNR calculation. + input_order (str): Whether the input order is 'HWC' or 'CHW'. + Default: 'HWC'. + test_y_channel (bool): Test on Y channel of YCbCr. Default: False. + + Returns: + float: psnr result. + """ + + assert img1.shape == img2.shape, ( + f'Image shapes are differnet: {img1.shape}, {img2.shape}.') + if input_order not in ['HWC', 'CHW']: + raise ValueError( + f'Wrong input_order {input_order}. Supported input_orders are ' + '"HWC" and "CHW"') + img1 = reorder_image(img1, input_order=input_order) + img2 = reorder_image(img2, input_order=input_order) + + if crop_border != 0: + img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...] + img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...] + + if test_y_channel: + img1 = to_y_channel(img1) + img2 = to_y_channel(img2) + + mse = np.mean((img1 - img2)**2) + if mse == 0: + return float('inf') + return 20. * np.log10(255. / np.sqrt(mse)) + + +def _ssim(img1, img2): + """Calculate SSIM (structural similarity) for one channel images. + + It is called by func:`calculate_ssim`. + + Args: + img1 (ndarray): Images with range [0, 255] with order 'HWC'. + img2 (ndarray): Images with range [0, 255] with order 'HWC'. + + Returns: + float: ssim result. + """ + + C1 = (0.01 * 255)**2 + C2 = (0.03 * 255)**2 + + img1 = img1.astype(np.float64) + img2 = img2.astype(np.float64) + kernel = cv2.getGaussianKernel(11, 1.5) + window = np.outer(kernel, kernel.transpose()) + + mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] + mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5] + mu1_sq = mu1**2 + mu2_sq = mu2**2 + mu1_mu2 = mu1 * mu2 + sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq + sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq + sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 + + ssim_map = ((2 * mu1_mu2 + C1) * + (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * + (sigma1_sq + sigma2_sq + C2)) + return ssim_map.mean() + + +def calculate_ssim(img1, + img2, + crop_border, + input_order='HWC', + test_y_channel=False): + """Calculate SSIM (structural similarity). + + Ref: + Image quality assessment: From error visibility to structural similarity + + The results are the same as that of the official released MATLAB code in + https://ece.uwaterloo.ca/~z70wang/research/ssim/. + + For three-channel images, SSIM is calculated for each channel and then + averaged. + + Args: + img1 (ndarray): Images with range [0, 255]. + img2 (ndarray): Images with range [0, 255]. + crop_border (int): Cropped pixels in each edge of an image. These + pixels are not involved in the SSIM calculation. + input_order (str): Whether the input order is 'HWC' or 'CHW'. + Default: 'HWC'. + test_y_channel (bool): Test on Y channel of YCbCr. Default: False. + + Returns: + float: ssim result. + """ + + assert img1.shape == img2.shape, ( + f'Image shapes are differnet: {img1.shape}, {img2.shape}.') + if input_order not in ['HWC', 'CHW']: + raise ValueError( + f'Wrong input_order {input_order}. Supported input_orders are ' + '"HWC" and "CHW"') + img1 = reorder_image(img1, input_order=input_order) + img2 = reorder_image(img2, input_order=input_order) + + if crop_border != 0: + img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...] + img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...] + + if test_y_channel: + img1 = to_y_channel(img1) + img2 = to_y_channel(img2) + + ssims = [] + for i in range(img1.shape[2]): + ssims.append(_ssim(img1[..., i], img2[..., i])) + return np.array(ssims).mean() diff --git a/ppgan/models/backbones/__init__.py b/ppgan/models/backbones/__init__.py new file mode 100644 index 0000000000000..c876f93de18a3 --- /dev/null +++ b/ppgan/models/backbones/__init__.py @@ -0,0 +1 @@ +from .resnet_backbone import resnet18, resnet34, resnet50, resnet101, resnet152 \ No newline at end of file diff --git a/applications/DeOldify/resnet_backbone.py b/ppgan/models/backbones/resnet_backbone.py similarity index 100% rename from applications/DeOldify/resnet_backbone.py rename to ppgan/models/backbones/resnet_backbone.py diff --git a/applications/DeOldify/model.py b/ppgan/models/generators/deoldify.py similarity index 96% rename from applications/DeOldify/model.py rename to ppgan/models/generators/deoldify.py index 9f97ed8667a70..b7f875364dee3 100644 --- a/applications/DeOldify/model.py +++ b/ppgan/models/generators/deoldify.py @@ -3,10 +3,9 @@ import paddle.nn as nn import paddle.nn.functional as F -from resnet_backbone import resnet34, resnet101 -from hook import hook_outputs, model_sizes, dummy_eval -from spectral_norm import Spectralnorm -from paddle import fluid +from .hook import hook_outputs, model_sizes, dummy_eval +from ..backbones import resnet34, resnet101 +from ...modules.nn import Spectralnorm class SequentialEx(nn.Layer): @@ -206,7 +205,7 @@ def forward(self, up_in): return self.conv(cat_x) -class UnetBlockDeep(paddle.fluid.Layer): +class UnetBlockDeep(nn.Layer): "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`." def __init__( @@ -319,7 +318,7 @@ def conv_layer(ni: int, return nn.Sequential(*layers) -class CustomPixelShuffle_ICNR(paddle.fluid.Layer): +class CustomPixelShuffle_ICNR(nn.Layer): "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`." def __init__(self, @@ -349,7 +348,7 @@ def forward(self, x): return self.blur(self.pad(x)) if self.blur else x -class MergeLayer(paddle.fluid.Layer): +class MergeLayer(nn.Layer): "Merge a shortcut with the result of the module by adding them or concatenating thme if `dense=True`." def __init__(self, dense: bool = False): @@ -379,7 +378,7 @@ def res_block(nf, MergeLayer(dense)) -class SigmoidRange(paddle.fluid.Layer): +class SigmoidRange(nn.Layer): "Sigmoid module with range `(low,x_max)`" def __init__(self, low, high): @@ -395,13 +394,13 @@ def sigmoid_range(x, low, high): return F.sigmoid(x) * (high - low) + low -class PixelShuffle(paddle.fluid.Layer): +class PixelShuffle(nn.Layer): def __init__(self, upscale_factor): super(PixelShuffle, self).__init__() self.upscale_factor = upscale_factor def forward(self, x): - return paddle.fluid.layers.pixel_shuffle(x, self.upscale_factor) + return F.pixel_shuffle(x, self.upscale_factor) class ReplicationPad2d(nn.Layer): @@ -410,7 +409,7 @@ def __init__(self, size): self.size = size def forward(self, x): - return paddle.fluid.layers.pad2d(x, self.size, mode="edge") + return F.pad2d(x, self.size, mode="edge") def conv1d(ni: int, diff --git a/applications/DeOldify/hook.py b/ppgan/models/generators/hook.py similarity index 100% rename from applications/DeOldify/hook.py rename to ppgan/models/generators/hook.py diff --git a/applications/DeepRemaster/remasternet.py b/ppgan/models/generators/remaster.py similarity index 100% rename from applications/DeepRemaster/remasternet.py rename to ppgan/models/generators/remaster.py diff --git a/applications/RealSR/sr_model.py b/ppgan/models/generators/rrdb_net.py similarity index 91% rename from applications/RealSR/sr_model.py rename to ppgan/models/generators/rrdb_net.py index c8a730bea00c3..008da739e38e1 100644 --- a/applications/RealSR/sr_model.py +++ b/ppgan/models/generators/rrdb_net.py @@ -3,6 +3,8 @@ import paddle.nn as nn import paddle.nn.functional as F +from .builder import GENERATORS + class ResidualDenseBlock_5C(nn.Layer): def __init__(self, nf=64, gc=32, bias=True): @@ -15,6 +17,7 @@ def __init__(self, nf=64, gc=32, bias=True): self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias_attr=bias) self.lrelu = nn.LeakyReLU(negative_slope=0.2) + def forward(self, x): x1 = self.lrelu(self.conv1(x)) x2 = self.lrelu(self.conv2(paddle.concat((x, x1), 1))) @@ -26,6 +29,7 @@ def forward(self, x): class RRDB(nn.Layer): '''Residual in Residual Dense Block''' + def __init__(self, nf, gc=32): super(RRDB, self).__init__() self.RDB1 = ResidualDenseBlock_5C(nf, gc) @@ -38,7 +42,6 @@ def forward(self, x): out = self.RDB3(out) return out * 0.2 + x - def make_layer(block, n_layers): layers = [] for _ in range(n_layers): @@ -46,6 +49,7 @@ def make_layer(block, n_layers): return nn.Sequential(*layers) +@GENERATORS.register() class RRDBNet(nn.Layer): def __init__(self, in_nc, out_nc, nf, nb, gc=32): super(RRDBNet, self).__init__() @@ -67,10 +71,8 @@ def forward(self, x): trunk = self.trunk_conv(self.RRDB_trunk(fea)) fea = fea + trunk - fea = self.lrelu( - self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) - fea = self.lrelu( - self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) + fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) + fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) out = self.conv_last(self.lrelu(self.HRconv(fea))) return out diff --git a/ppgan/models/sr_model.py b/ppgan/models/sr_model.py new file mode 100644 index 0000000000000..bd255a7397b66 --- /dev/null +++ b/ppgan/models/sr_model.py @@ -0,0 +1,247 @@ +from collections import OrderedDict +import paddle +import paddle.nn as nn +# import torch.nn.parallel as P +# from torch.nn.parallel import DataParallel, DistributedDataParallel +# import models.networks as networks +# import models.lr_scheduler as lr_scheduler +from .generators.builder import build_generator +from .discriminators.builder import build_discriminator +from ..solver import build_optimizer +from .base_model import BaseModel +from .losses import GANLoss +from .builder import MODELS + +import importlib +import mmcv +import torch +from collections import OrderedDict +from copy import deepcopy +from os import path as osp +from .builder import MODELS + + +@MODELS.register() +class SRModel(BaseModel): + """Base SR model for single image super-resolution.""" + + def __init__(self, cfg): + super(SRModel, self).__init__(cfg) + + self.model_names = ['G'] + + self.netG = build_generator(cfg.model.generator) + self.visual_names = ['lq', 'output', 'gt'] + + self.loss_names = ['l_total'] + # define network + # self.net_g = networks.define_net_g(deepcopy(opt['network_g'])) + # self.net_g = self.model_to_device(self.net_g) + # self.print_network(self.net_g) + + # load pretrained models + # load_path = self.opt['path'].get('pretrain_model_g', None) + # if load_path is not None: + # self.load_network(self.net_g, load_path, + # self.opt['path']['strict_load']) + self.optimizers = [] + if self.isTrain: + self.criterionL1 = paddle.nn.L1Loss() + + self.build_lr_scheduler() + self.optimizer_G = build_optimizer( + cfg.optimizer, + self.lr_scheduler, + parameter_list=self.netG.parameters()) + self.optimizers.append(self.optimizer_G) + # self.optimizer_D = build_optimizer( + # opt.optimizer, + # self.lr_scheduler, + # parameter_list=self.netD.parameters()) + + # self.init_training_settings() + + # def init_training_settings(self): + # self.net_g.train() + # train_opt = self.opt['train'] + + # # define losses + # if train_opt.get('pixel_opt'): + # pixel_type = train_opt['pixel_opt'].pop('type') + # cri_pix_cls = getattr(loss_module, pixel_type) + # self.cri_pix = cri_pix_cls(**train_opt['pixel_opt']).to( + # self.device) + # else: + # self.cri_pix = None + + # if train_opt.get('perceptual_opt'): + # percep_type = train_opt['perceptual_opt'].pop('type') + # cri_perceptual_cls = getattr(loss_module, percep_type) + # self.cri_perceptual = cri_perceptual_cls( + # **train_opt['perceptual_opt']).to(self.device) + # else: + # self.cri_perceptual = None + + # if self.cri_pix is None and self.cri_perceptual is None: + # raise ValueError('Both pixel and perceptual losses are None.') + + # # set up optimizers and schedulers + # self.setup_optimizers() + # self.setup_schedulers() + + # def setup_optimizers(self): + # train_opt = self.opt['train'] + # optim_params = [] + # for k, v in self.net_g.named_parameters(): + # if v.requires_grad: + # optim_params.append(v) + # else: + # logger = get_root_logger() + # logger.warning(f'Params {k} will not be optimized.') + + # optim_type = train_opt['optim_g'].pop('type') + # if optim_type == 'Adam': + # self.optimizer_g = torch.optim.Adam(optim_params, + # **train_opt['optim_g']) + # else: + # raise NotImplementedError( + # f'optimizer {optim_type} is not supperted yet.') + # self.optimizers.append(self.optimizer_g) + + def set_input(self, input): + self.lq = paddle.to_tensor(input['lq']) + if 'gt' in input: + self.gt = paddle.to_tensor(input['gt']) + self.image_paths = input['lq_path'] + # self.lq = data['lq'].to(self.device) + # if 'gt' in data: + # self.gt = data['gt'].to(self.device) + + def forward(self): + pass + + def test(self): + """Forward function used in test time. + """ + with paddle.no_grad(): + self.output = self.netG(self.lq) + + def optimize_parameters(self): + self.optimizer_G.clear_grad() + self.output = self.netG(self.lq) + + l_total = 0 + loss_dict = OrderedDict() + # pixel loss + if self.criterionL1: + l_pix = self.criterionL1(self.output, self.gt) + l_total += l_pix + loss_dict['l_pix'] = l_pix + # perceptual loss + # if self.cri_perceptual: + # l_percep, l_style = self.cri_perceptual(self.output, self.gt) + # if l_percep is not None: + # l_total += l_percep + # loss_dict['l_percep'] = l_percep + # if l_style is not None: + # l_total += l_style + # loss_dict['l_style'] = l_style + + l_total.backward() + self.loss_l_total = l_total + self.optimizer_G.step() + + # self.log_dict = self.reduce_loss_dict(loss_dict) + # def get_current_visuals(self): + # out_dict = OrderedDict() + # out_dict['lq'] = self.lq.detach().cpu() + # out_dict['result'] = self.output.detach().cpu() + # if hasattr(self, 'gt'): + # out_dict['gt'] = self.gt.detach().cpu() + # return out_dict + + # def test(self): + # self.net_g.eval() + # with torch.no_grad(): + # self.output = self.net_g(self.lq) + # self.net_g.train() + + # def dist_validation(self, dataloader, current_iter, tb_logger, save_img): + # logger = get_root_logger() + # logger.info('Only support single GPU validation.') + # self.nondist_validation(dataloader, current_iter, tb_logger, save_img) + + # def nondist_validation(self, dataloader, current_iter, tb_logger, + # save_img): + # dataset_name = dataloader.dataset.opt['name'] + # with_metrics = self.opt['val'].get('metrics') is not None + # if with_metrics: + # self.metric_results = { + # metric: 0 + # for metric in self.opt['val']['metrics'].keys() + # } + # pbar = ProgressBar(len(dataloader)) + + # for idx, val_data in enumerate(dataloader): + # img_name = osp.splitext(osp.basename(val_data['lq_path'][0]))[0] + # self.feed_data(val_data) + # self.test() + + # visuals = self.get_current_visuals() + # sr_img = tensor2img([visuals['result']]) + # if 'gt' in visuals: + # gt_img = tensor2img([visuals['gt']]) + # del self.gt + + # # tentative for out of GPU memory + # del self.lq + # del self.output + # torch.cuda.empty_cache() + + # if save_img: + # if self.opt['is_train']: + # save_img_path = osp.join(self.opt['path']['visualization'], + # img_name, + # f'{img_name}_{current_iter}.png') + # else: + # if self.opt['val']['suffix']: + # save_img_path = osp.join( + # self.opt['path']['visualization'], dataset_name, + # f'{img_name}_{self.opt["val"]["suffix"]}.png') + # else: + # save_img_path = osp.join( + # self.opt['path']['visualization'], dataset_name, + # f'{img_name}_{self.opt["name"]}.png') + # mmcv.imwrite(sr_img, save_img_path) + + # if with_metrics: + # # calculate metrics + # opt_metric = deepcopy(self.opt['val']['metrics']) + # for name, opt_ in opt_metric.items(): + # metric_type = opt_.pop('type') + # self.metric_results[name] += getattr( + # metric_module, metric_type)(sr_img, gt_img, **opt_) + # pbar.update(f'Test {img_name}') + + # if with_metrics: + # for metric in self.metric_results.keys(): + # self.metric_results[metric] /= (idx + 1) + + # self._log_validation_metric_values(current_iter, dataset_name, + # tb_logger) + + # def _log_validation_metric_values(self, current_iter, dataset_name, + # tb_logger): + # log_str = f'Validation {dataset_name}\n' + # for metric, value in self.metric_results.items(): + # log_str += f'\t # {metric}: {value:.4f}\n' + # logger = get_root_logger() + # logger.info(log_str) + # if tb_logger: + # for metric, value in self.metric_results.items(): + # tb_logger.add_scalar(f'metrics/{metric}', value, current_iter) + + + # def save(self, epoch, current_iter): + # self.save_network(self.net_g, 'net_g', current_iter) + # self.save_training_state(epoch, current_iter) diff --git a/ppgan/models/srgan_model.py b/ppgan/models/srgan_model.py new file mode 100644 index 0000000000000..762e44af89aac --- /dev/null +++ b/ppgan/models/srgan_model.py @@ -0,0 +1,388 @@ +# import logging +from collections import OrderedDict +import paddle +import paddle.nn as nn +# import torch.nn.parallel as P +# from torch.nn.parallel import DataParallel, DistributedDataParallel +# import models.networks as networks +# import models.lr_scheduler as lr_scheduler +from .generators.builder import build_generator +from .base_model import BaseModel +from .losses import GANLoss +from .builder import MODELS +# logger = logging.getLogger('base') + +@MODELS.register() +class SRGANModel(BaseModel): + def __init__(self, cfg): + super(SRGANModel, self).__init__(cfg) + # if opt['dist']: + # self.rank = torch.distributed.get_rank() + # else: + # self.rank = -1 # non dist training + # train_opt = opt['train'] + + # define networks and load pretrained models + self.model_names = ['G'] + + self.netG = build_generator(cfg.model.generator) + self.visual_names = ['LQ', 'GT', 'fake_H'] + # self.netG = networks.define_G(opt).to(self.device) + # if opt['dist']: + # self.netG = DistributedDataParallel(self.netG, device_ids=[torch.cuda.current_device()]) + # else: + # self.netG = DataParallel(self.netG) + + if False:#self.is_train: + self.netD = build_discriminator(cfg.model.discriminator) + # if opt['dist']: + # self.netD = DistributedDataParallel(self.netD, + # device_ids=[torch.cuda.current_device()]) + # else: + # self.netD = DataParallel(self.netD) + + self.netG.train() + self.netD.train() + + # define losses, optimizer and scheduler + # if self.is_train: + # pass + # G pixel loss + # if train_opt['pixel_weight'] > 0: + # l_pix_type = train_opt['pixel_criterion'] + # if l_pix_type == 'l1': + # self.cri_pix = nn.L1Loss().to(self.device) + # elif l_pix_type == 'l2': + # self.cri_pix = nn.MSELoss().to(self.device) + # else: + # raise NotImplementedError('Loss type [{:s}] not recognized.'.format(l_pix_type)) + # self.l_pix_w = train_opt['pixel_weight'] + # else: + # # logger.info('Remove pixel loss.') + # self.cri_pix = None + + # # G feature loss + # if train_opt['feature_weight'] > 0: + # l_fea_type = train_opt['feature_criterion'] + # if l_fea_type == 'l1': + # self.cri_fea = nn.L1Loss().to(self.device) + # elif l_fea_type == 'l2': + # self.cri_fea = nn.MSELoss().to(self.device) + # else: + # raise NotImplementedError('Loss type [{:s}] not recognized.'.format(l_fea_type)) + # self.l_fea_w = train_opt['feature_weight'] + # else: + # logger.info('Remove feature loss.') + # self.cri_fea = None + # if self.cri_fea: # load VGG perceptual loss + # self.netF = networks.define_F(opt, use_bn=False).to(self.device) + # if opt['dist']: + # self.netF = DistributedDataParallel(self.netF, + # device_ids=[torch.cuda.current_device()]) + # else: + # self.netF = DataParallel(self.netF) + + # # GD gan loss + # self.cri_gan = GANLoss(train_opt['gan_type'], 1.0, 0.0).to(self.device) + # self.l_gan_w = train_opt['gan_weight'] + # # D_update_ratio and D_init_iters + # self.D_update_ratio = train_opt['D_update_ratio'] if train_opt['D_update_ratio'] else 1 + # self.D_init_iters = train_opt['D_init_iters'] if train_opt['D_init_iters'] else 0 + + # # optimizers + # # G + # wd_G = train_opt['weight_decay_G'] if train_opt['weight_decay_G'] else 0 + # optim_params = [] + # for k, v in self.netG.named_parameters(): # can optimize for a part of the model + # if v.requires_grad: + # optim_params.append(v) + # else: + # if self.rank <= 0: + # logger.warning('Params [{:s}] will not optimize.'.format(k)) + # self.optimizer_G = torch.optim.Adam(optim_params, lr=train_opt['lr_G'], + # weight_decay=wd_G, + # betas=(train_opt['beta1_G'], train_opt['beta2_G'])) + # self.optimizers.append(self.optimizer_G) + # # D + # wd_D = train_opt['weight_decay_D'] if train_opt['weight_decay_D'] else 0 + # self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=train_opt['lr_D'], + # weight_decay=wd_D, + # betas=(train_opt['beta1_D'], train_opt['beta2_D'])) + # self.optimizers.append(self.optimizer_D) + + # # schedulers + # if train_opt['lr_scheme'] == 'MultiStepLR': + # for optimizer in self.optimizers: + # self.schedulers.append( + # lr_scheduler.MultiStepLR_Restart(optimizer, train_opt['lr_steps'], + # restarts=train_opt['restarts'], + # weights=train_opt['restart_weights'], + # gamma=train_opt['lr_gamma'], + # clear_state=train_opt['clear_state'])) + # elif train_opt['lr_scheme'] == 'CosineAnnealingLR_Restart': + # for optimizer in self.optimizers: + # self.schedulers.append( + # lr_scheduler.CosineAnnealingLR_Restart( + # optimizer, train_opt['T_period'], eta_min=train_opt['eta_min'], + # restarts=train_opt['restarts'], weights=train_opt['restart_weights'])) + # else: + # raise NotImplementedError('MultiStepLR learning rate scheme is enough.') + + # self.log_dict = OrderedDict() + + # self.print_network() # print network + # self.load() # load G and D if needed + + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input (dict): include the data itself and its metadata information. + + The option 'direction' can be used to swap images in domain A and domain B. + """ + + # AtoB = self.opt.dataset.train.direction == 'AtoB' + if 'A' in input: + self.LQ = paddle.to_tensor(input['A']) + if 'B' in input: + self.GT = paddle.to_tensor(input['B']) + if 'A_paths' in input: + self.image_paths = input['A_paths'] + + # def feed_data(self, data, need_GT=True): + # self.var_L = data['LQ'].to(self.device) # LQ + # if need_GT: + # self.var_H = data['GT'].to(self.device) # GT + # input_ref = data['ref'] if 'ref' in data else data['GT'] + # self.var_ref = input_ref.to(self.device) + + def forward(self): + self.fake_H = self.netG(self.LQ) + + def optimize_parameters(self, step): + pass + # # G + # for p in self.netD.parameters(): + # p.requires_grad = False + + # self.optimizer_G.zero_grad() + # self.fake_H = self.netG(self.var_L.detach()) + + # l_g_total = 0 + # if step % self.D_update_ratio == 0 and step > self.D_init_iters: + # if self.cri_pix: # pixel loss + # l_g_pix = self.l_pix_w * self.cri_pix(self.fake_H, self.var_H) + # l_g_total += l_g_pix + # if self.cri_fea: # feature loss + # real_fea = self.netF(self.var_H).detach() + # fake_fea = self.netF(self.fake_H) + # l_g_fea = self.l_fea_w * self.cri_fea(fake_fea, real_fea) + # l_g_total += l_g_fea + + # pred_g_fake = self.netD(self.fake_H) + # if self.opt['train']['gan_type'] == 'gan': + # l_g_gan = self.l_gan_w * self.cri_gan(pred_g_fake, True) + # elif self.opt['train']['gan_type'] == 'ragan': + # pred_d_real = self.netD(self.var_ref).detach() + # l_g_gan = self.l_gan_w * ( + # self.cri_gan(pred_d_real - torch.mean(pred_g_fake), False) + + # self.cri_gan(pred_g_fake - torch.mean(pred_d_real), True)) / 2 + # l_g_total += l_g_gan + + # l_g_total.backward() + # self.optimizer_G.step() + + # # D + # for p in self.netD.parameters(): + # p.requires_grad = True + + # self.optimizer_D.zero_grad() + # l_d_total = 0 + # pred_d_real = self.netD(self.var_ref) + # pred_d_fake = self.netD(self.fake_H.detach()) # detach to avoid BP to G + # if self.opt['train']['gan_type'] == 'gan': + # l_d_real = self.cri_gan(pred_d_real, True) + # l_d_fake = self.cri_gan(pred_d_fake, False) + # l_d_total = l_d_real + l_d_fake + # elif self.opt['train']['gan_type'] == 'ragan': + # l_d_real = self.cri_gan(pred_d_real - torch.mean(pred_d_fake), True) + # l_d_fake = self.cri_gan(pred_d_fake - torch.mean(pred_d_real), False) + # l_d_total = (l_d_real + l_d_fake) / 2 + + # l_d_total.backward() + # self.optimizer_D.step() + + # # set log + # if step % self.D_update_ratio == 0 and step > self.D_init_iters: + # if self.cri_pix: + # self.log_dict['l_g_pix'] = l_g_pix.item() + # # self.log_dict['l_g_mean_color'] = l_g_mean_color.item() + # if self.cri_fea: + # self.log_dict['l_g_fea'] = l_g_fea.item() + # self.log_dict['l_g_gan'] = l_g_gan.item() + + # self.log_dict['l_d_real'] = l_d_real.item() + # self.log_dict['l_d_fake'] = l_d_fake.item() + # self.log_dict['D_real'] = torch.mean(pred_d_real.detach()) + # self.log_dict['D_fake'] = torch.mean(pred_d_fake.detach()) + + # def test(self): + # self.netG.eval() + # with torch.no_grad(): + # self.fake_H = self.netG(self.var_L) + # self.netG.train() + + # def back_projection(self): + # lr_error = self.var_L - torch.nn.functional.interpolate(self.fake_H, + # scale_factor=1/self.opt['scale'], + # mode='bicubic', + # align_corners=False) + # us_error = torch.nn.functional.interpolate(lr_error, + # scale_factor=self.opt['scale'], + # mode='bicubic', + # align_corners=False) + # self.fake_H += self.opt['back_projection_lamda'] * us_error + # torch.clamp(self.fake_H, 0, 1) + + # def test_chop(self): + # self.netG.eval() + # with torch.no_grad(): + # self.fake_H = self.forward_chop(self.var_L) + # self.netG.train() + + # def forward_chop(self, *args, shave=10, min_size=160000): + # # scale = 1 if self.input_large else self.scale[self.idx_scale] + # scale = self.opt['scale'] + # n_GPUs = min(torch.cuda.device_count(), 4) + # args = [a.squeeze().unsqueeze(0) for a in args] + + # # height, width + # h, w = args[0].size()[-2:] + # # print('len(args)', len(args)) + # # print('args[0].size()', args[0].size()) + + # top = slice(0, h//2 + shave) + # bottom = slice(h - h//2 - shave, h) + # left = slice(0, w//2 + shave) + # right = slice(w - w//2 - shave, w) + # x_chops = [torch.cat([ + # a[..., top, left], + # a[..., top, right], + # a[..., bottom, left], + # a[..., bottom, right] + # ]) for a in args] + # # print('len(x_chops)', len(x_chops)) + # # print('x_chops[0].size()', x_chops[0].size()) + + # y_chops = [] + # if h * w < 4 * min_size: + # for i in range(0, 4, n_GPUs): + # x = [x_chop[i:(i + n_GPUs)] for x_chop in x_chops] + # # print(len(x)) + # # print(x[0].size()) + # y = P.data_parallel(self.netG, *x, range(n_GPUs)) + # if not isinstance(y, list): y = [y] + # if not y_chops: + # y_chops = [[c for c in _y.chunk(n_GPUs, dim=0)] for _y in y] + # else: + # for y_chop, _y in zip(y_chops, y): + # y_chop.extend(_y.chunk(n_GPUs, dim=0)) + # else: + + # # print(x_chops[0].size()) + # for p in zip(*x_chops): + # # print('len(p)', len(p)) + # # print('p[0].size()', p[0].size()) + # y = self.forward_chop(*p, shave=shave, min_size=min_size) + # if not isinstance(y, list): y = [y] + # if not y_chops: + # y_chops = [[_y] for _y in y] + # else: + # for y_chop, _y in zip(y_chops, y): y_chop.append(_y) + + # h *= scale + # w *= scale + # top = slice(0, h//2) + # bottom = slice(h - h//2, h) + # bottom_r = slice(h//2 - h, None) + # left = slice(0, w//2) + # right = slice(w - w//2, w) + # right_r = slice(w//2 - w, None) + + # # batch size, number of color channels + # b, c = y_chops[0][0].size()[:-2] + # y = [y_chop[0].new(b, c, h, w) for y_chop in y_chops] + # for y_chop, _y in zip(y_chops, y): + # _y[..., top, left] = y_chop[0][..., top, left] + # _y[..., top, right] = y_chop[1][..., top, right_r] + # _y[..., bottom, left] = y_chop[2][..., bottom_r, left] + # _y[..., bottom, right] = y_chop[3][..., bottom_r, right_r] + + # if len(y) == 1: + # y = y[0] + + # return y + + # def get_current_log(self): + # return self.log_dict + + # def get_current_visuals(self, need_GT=True): + # out_dict = OrderedDict() + # out_dict['LQ'] = self.var_L.detach()[0].float().cpu() + # out_dict['SR'] = self.fake_H.detach()[0].float().cpu() + # if need_GT: + # out_dict['GT'] = self.var_H.detach()[0].float().cpu() + # return out_dict + + # def print_network(self): + # # Generator + # s, n = self.get_network_description(self.netG) + # if isinstance(self.netG, nn.DataParallel) or isinstance(self.netG, DistributedDataParallel): + # net_struc_str = '{} - {}'.format(self.netG.__class__.__name__, + # self.netG.module.__class__.__name__) + # else: + # net_struc_str = '{}'.format(self.netG.__class__.__name__) + # if self.rank <= 0: + # logger.info('Network G structure: {}, with parameters: {:,d}'.format(net_struc_str, n)) + # logger.info(s) + # if self.is_train: + # # Discriminator + # s, n = self.get_network_description(self.netD) + # if isinstance(self.netD, nn.DataParallel) or isinstance(self.netD, + # DistributedDataParallel): + # net_struc_str = '{} - {}'.format(self.netD.__class__.__name__, + # self.netD.module.__class__.__name__) + # else: + # net_struc_str = '{}'.format(self.netD.__class__.__name__) + # if self.rank <= 0: + # logger.info('Network D structure: {}, with parameters: {:,d}'.format( + # net_struc_str, n)) + # logger.info(s) + + # if self.cri_fea: # F, Perceptual Network + # s, n = self.get_network_description(self.netF) + # if isinstance(self.netF, nn.DataParallel) or isinstance( + # self.netF, DistributedDataParallel): + # net_struc_str = '{} - {}'.format(self.netF.__class__.__name__, + # self.netF.module.__class__.__name__) + # else: + # net_struc_str = '{}'.format(self.netF.__class__.__name__) + # if self.rank <= 0: + # logger.info('Network F structure: {}, with parameters: {:,d}'.format( + # net_struc_str, n)) + # logger.info(s) + + # def load(self): + # load_path_G = self.opt['path']['pretrain_model_G'] + # if load_path_G is not None: + # logger.info('Loading model for G [{:s}] ...'.format(load_path_G)) + # self.load_network(load_path_G, self.netG, self.opt['path']['strict_load']) + # load_path_D = self.opt['path']['pretrain_model_D'] + # if self.opt['is_train'] and load_path_D is not None: + # logger.info('Loading model for D [{:s}] ...'.format(load_path_D)) + # self.load_network(load_path_D, self.netD, self.opt['path']['strict_load']) + + # def save(self, iter_step): + # self.save_network(self.netG, 'G', iter_step) + # self.save_network(self.netD, 'D', iter_step) diff --git a/ppgan/modules/nn.py b/ppgan/modules/nn.py index 9620877a44965..f867b7284efc5 100644 --- a/ppgan/modules/nn.py +++ b/ppgan/modules/nn.py @@ -69,21 +69,59 @@ def __call__(self, x, label): return out -# class BCEWithLogitsLoss(fluid.dygraph.Layer): -# def __init__(self, weight=None, reduction='mean'): -# if reduction not in ['sum', 'mean', 'none']: -# raise ValueError( -# "The value of 'reduction' in bce_loss should be 'sum', 'mean' or 'none', but " -# "received %s, which is not allowed." % reduction) - -# super(BCEWithLogitsLoss, self).__init__() -# # self.weight = weight -# # self.reduction = reduction -# self.bce_loss = paddle.nn.BCELoss(weight, reduction) - -# def forward(self, input, label): -# input = paddle.nn.functional.sigmoid(input, True) -# return self.bce_loss(input, label) +class _SpectralNorm(paddle.nn.SpectralNorm): + def __init__(self, + weight_shape, + dim=0, + power_iters=1, + eps=1e-12, + dtype='float32'): + super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype) + + def forward(self, weight): + paddle.fluid.data_feeder.check_variable_and_dtype(weight, "weight", ['float32', 'float64'], + 'SpectralNorm') + inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} + out = self._helper.create_variable_for_type_inference(self._dtype) + _power_iters = self._power_iters if self.training else 0 + self._helper.append_op( + type="spectral_norm", + inputs=inputs, + outputs={"Out": out, }, + attrs={ + "dim": self._dim, + "power_iters": _power_iters, + "eps": self._eps, + }) + + return out + + +class Spectralnorm(paddle.nn.Layer): + + def __init__(self, + layer, + dim=0, + power_iters=1, + eps=1e-12, + dtype='float32'): + super(Spectralnorm, self).__init__() + self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype) + self.dim = dim + self.power_iters = power_iters + self.eps = eps + self.layer = layer + weight = layer._parameters['weight'] + del layer._parameters['weight'] + self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype) + self.weight_orig.set_value(weight) + + + def forward(self, x): + weight = self.spectral_norm(self.weight_orig) + self.layer.weight = weight + out = self.layer(x) + return out def initial_type( diff --git a/ppgan/utils/video.py b/ppgan/utils/video.py new file mode 100644 index 0000000000000..056e547e13bfc --- /dev/null +++ b/ppgan/utils/video.py @@ -0,0 +1,44 @@ +import os +import sys + +def video2frames(video_path, outpath, **kargs): + def _dict2str(kargs): + cmd_str = '' + for k, v in kargs.items(): + cmd_str += (' ' + str(k) + ' ' + str(v)) + return cmd_str + + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + vid_name = video_path.split('/')[-1].split('.')[0] + out_full_path = os.path.join(outpath, vid_name) + + if not os.path.exists(out_full_path): + os.makedirs(out_full_path) + + # video file name + outformat = out_full_path + '/%08d.png' + + cmd = ffmpeg + cmd = ffmpeg + [' -i ', video_path, ' -start_number ', ' 0 ', outformat] + + cmd = ''.join(cmd) + _dict2str(kargs) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(vid_name)) + + sys.stdout.flush() + return out_full_path + + +def frames2video(frame_path, video_path, r): + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + cmd = ffmpeg + [ + ' -r ', r, ' -f ', ' image2 ', ' -i ', frame_path, ' -vcodec ', + ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', video_path + ] + cmd = ''.join(cmd) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(video_path)) + + sys.stdout.flush() \ No newline at end of file diff --git a/ppgan/utils/visual.py b/ppgan/utils/visual.py index 56639acb52b68..f6c46e2793ce1 100644 --- a/ppgan/utils/visual.py +++ b/ppgan/utils/visual.py @@ -2,7 +2,7 @@ from PIL import Image -def tensor2img(input_image, imtype=np.uint8): +def tensor2img(input_image, min_max=(-1., 1.), imtype=np.uint8): """"Converts a Tensor array into a numpy image array. Parameters: @@ -15,8 +15,8 @@ def tensor2img(input_image, imtype=np.uint8): image_numpy = image_numpy[0] if image_numpy.shape[0] == 1: # grayscale to RGB image_numpy = np.tile(image_numpy, (3, 1, 1)) - # image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 # post-processing: tranpose and scaling - image_numpy = image_numpy.clip(0, 1) + image_numpy = image_numpy.clip(min_max[0], min_max[1]) + image_numpy = (image_numpy - min_max[0]) / (min_max[1] - min_max[0]) image_numpy = (np.transpose(image_numpy, (1, 2, 0))) * 255.0 # post-processing: tranpose and scaling else: # if it is a numpy array, do nothing image_numpy = input_image diff --git a/requirments.txt b/requirments.txt new file mode 100644 index 0000000000000..fa9cf06427ae8 --- /dev/null +++ b/requirments.txt @@ -0,0 +1 @@ +tqdm \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000..3d5adfc0d90f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,49 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup +from io import open + +with open('requirments.txt', encoding="utf-8-sig") as f: + requirements = f.readlines() + + +def readme(): + with open('doc/doc_en/whl_en.md', encoding="utf-8-sig") as f: + README = f.read() + return README + + +setup( + name='ppgan', + packages=['ppgan'], + include_package_data=True, + entry_points={"console_scripts": ["paddlegan= paddlegan.paddlegan:main"]}, + version='0.1.0', + install_requires=requirements, + license='Apache License 2.0', + description='Awesome GAN toolkits based on PaddlePaddle', + url='https://github.com/PaddlePaddle/PaddleGAN', + download_url='https://github.com/PaddlePaddle/PaddleGAN.git', + keywords=[ + 'gan paddlegan' + ], + classifiers=[ + 'Intended Audience :: Developers', 'Operating System :: OS Independent', + 'Natural Language :: Chinese (Simplified)', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Utilities' + ], ) \ No newline at end of file From a0a56e75e1d6b3a914502b1554c647c7d8386591 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Wed, 16 Sep 2020 12:05:30 +0000 Subject: [PATCH 3/6] rm unused code --- ppgan/models/srgan_model.py | 347 +----------------------------------- 1 file changed, 7 insertions(+), 340 deletions(-) diff --git a/ppgan/models/srgan_model.py b/ppgan/models/srgan_model.py index 762e44af89aac..ae5a46190597e 100644 --- a/ppgan/models/srgan_model.py +++ b/ppgan/models/srgan_model.py @@ -12,126 +12,24 @@ from .builder import MODELS # logger = logging.getLogger('base') + @MODELS.register() class SRGANModel(BaseModel): def __init__(self, cfg): super(SRGANModel, self).__init__(cfg) - # if opt['dist']: - # self.rank = torch.distributed.get_rank() - # else: - # self.rank = -1 # non dist training - # train_opt = opt['train'] - # define networks and load pretrained models + # define networks self.model_names = ['G'] - + self.netG = build_generator(cfg.model.generator) self.visual_names = ['LQ', 'GT', 'fake_H'] - # self.netG = networks.define_G(opt).to(self.device) - # if opt['dist']: - # self.netG = DistributedDataParallel(self.netG, device_ids=[torch.cuda.current_device()]) - # else: - # self.netG = DataParallel(self.netG) - if False:#self.is_train: - self.netD = build_discriminator(cfg.model.discriminator) - # if opt['dist']: - # self.netD = DistributedDataParallel(self.netD, - # device_ids=[torch.cuda.current_device()]) - # else: - # self.netD = DataParallel(self.netD) + # TODO: support srgan train. + if False: + # self.netD = build_discriminator(cfg.model.discriminator) self.netG.train() - self.netD.train() - - # define losses, optimizer and scheduler - # if self.is_train: - # pass - # G pixel loss - # if train_opt['pixel_weight'] > 0: - # l_pix_type = train_opt['pixel_criterion'] - # if l_pix_type == 'l1': - # self.cri_pix = nn.L1Loss().to(self.device) - # elif l_pix_type == 'l2': - # self.cri_pix = nn.MSELoss().to(self.device) - # else: - # raise NotImplementedError('Loss type [{:s}] not recognized.'.format(l_pix_type)) - # self.l_pix_w = train_opt['pixel_weight'] - # else: - # # logger.info('Remove pixel loss.') - # self.cri_pix = None - - # # G feature loss - # if train_opt['feature_weight'] > 0: - # l_fea_type = train_opt['feature_criterion'] - # if l_fea_type == 'l1': - # self.cri_fea = nn.L1Loss().to(self.device) - # elif l_fea_type == 'l2': - # self.cri_fea = nn.MSELoss().to(self.device) - # else: - # raise NotImplementedError('Loss type [{:s}] not recognized.'.format(l_fea_type)) - # self.l_fea_w = train_opt['feature_weight'] - # else: - # logger.info('Remove feature loss.') - # self.cri_fea = None - # if self.cri_fea: # load VGG perceptual loss - # self.netF = networks.define_F(opt, use_bn=False).to(self.device) - # if opt['dist']: - # self.netF = DistributedDataParallel(self.netF, - # device_ids=[torch.cuda.current_device()]) - # else: - # self.netF = DataParallel(self.netF) - - # # GD gan loss - # self.cri_gan = GANLoss(train_opt['gan_type'], 1.0, 0.0).to(self.device) - # self.l_gan_w = train_opt['gan_weight'] - # # D_update_ratio and D_init_iters - # self.D_update_ratio = train_opt['D_update_ratio'] if train_opt['D_update_ratio'] else 1 - # self.D_init_iters = train_opt['D_init_iters'] if train_opt['D_init_iters'] else 0 - - # # optimizers - # # G - # wd_G = train_opt['weight_decay_G'] if train_opt['weight_decay_G'] else 0 - # optim_params = [] - # for k, v in self.netG.named_parameters(): # can optimize for a part of the model - # if v.requires_grad: - # optim_params.append(v) - # else: - # if self.rank <= 0: - # logger.warning('Params [{:s}] will not optimize.'.format(k)) - # self.optimizer_G = torch.optim.Adam(optim_params, lr=train_opt['lr_G'], - # weight_decay=wd_G, - # betas=(train_opt['beta1_G'], train_opt['beta2_G'])) - # self.optimizers.append(self.optimizer_G) - # # D - # wd_D = train_opt['weight_decay_D'] if train_opt['weight_decay_D'] else 0 - # self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=train_opt['lr_D'], - # weight_decay=wd_D, - # betas=(train_opt['beta1_D'], train_opt['beta2_D'])) - # self.optimizers.append(self.optimizer_D) - - # # schedulers - # if train_opt['lr_scheme'] == 'MultiStepLR': - # for optimizer in self.optimizers: - # self.schedulers.append( - # lr_scheduler.MultiStepLR_Restart(optimizer, train_opt['lr_steps'], - # restarts=train_opt['restarts'], - # weights=train_opt['restart_weights'], - # gamma=train_opt['lr_gamma'], - # clear_state=train_opt['clear_state'])) - # elif train_opt['lr_scheme'] == 'CosineAnnealingLR_Restart': - # for optimizer in self.optimizers: - # self.schedulers.append( - # lr_scheduler.CosineAnnealingLR_Restart( - # optimizer, train_opt['T_period'], eta_min=train_opt['eta_min'], - # restarts=train_opt['restarts'], weights=train_opt['restart_weights'])) - # else: - # raise NotImplementedError('MultiStepLR learning rate scheme is enough.') - - # self.log_dict = OrderedDict() - - # self.print_network() # print network - # self.load() # load G and D if needed + # self.netD.train() def set_input(self, input): """Unpack input data from the dataloader and perform necessary pre-processing steps. @@ -150,239 +48,8 @@ def set_input(self, input): if 'A_paths' in input: self.image_paths = input['A_paths'] - # def feed_data(self, data, need_GT=True): - # self.var_L = data['LQ'].to(self.device) # LQ - # if need_GT: - # self.var_H = data['GT'].to(self.device) # GT - # input_ref = data['ref'] if 'ref' in data else data['GT'] - # self.var_ref = input_ref.to(self.device) - def forward(self): self.fake_H = self.netG(self.LQ) def optimize_parameters(self, step): pass - # # G - # for p in self.netD.parameters(): - # p.requires_grad = False - - # self.optimizer_G.zero_grad() - # self.fake_H = self.netG(self.var_L.detach()) - - # l_g_total = 0 - # if step % self.D_update_ratio == 0 and step > self.D_init_iters: - # if self.cri_pix: # pixel loss - # l_g_pix = self.l_pix_w * self.cri_pix(self.fake_H, self.var_H) - # l_g_total += l_g_pix - # if self.cri_fea: # feature loss - # real_fea = self.netF(self.var_H).detach() - # fake_fea = self.netF(self.fake_H) - # l_g_fea = self.l_fea_w * self.cri_fea(fake_fea, real_fea) - # l_g_total += l_g_fea - - # pred_g_fake = self.netD(self.fake_H) - # if self.opt['train']['gan_type'] == 'gan': - # l_g_gan = self.l_gan_w * self.cri_gan(pred_g_fake, True) - # elif self.opt['train']['gan_type'] == 'ragan': - # pred_d_real = self.netD(self.var_ref).detach() - # l_g_gan = self.l_gan_w * ( - # self.cri_gan(pred_d_real - torch.mean(pred_g_fake), False) + - # self.cri_gan(pred_g_fake - torch.mean(pred_d_real), True)) / 2 - # l_g_total += l_g_gan - - # l_g_total.backward() - # self.optimizer_G.step() - - # # D - # for p in self.netD.parameters(): - # p.requires_grad = True - - # self.optimizer_D.zero_grad() - # l_d_total = 0 - # pred_d_real = self.netD(self.var_ref) - # pred_d_fake = self.netD(self.fake_H.detach()) # detach to avoid BP to G - # if self.opt['train']['gan_type'] == 'gan': - # l_d_real = self.cri_gan(pred_d_real, True) - # l_d_fake = self.cri_gan(pred_d_fake, False) - # l_d_total = l_d_real + l_d_fake - # elif self.opt['train']['gan_type'] == 'ragan': - # l_d_real = self.cri_gan(pred_d_real - torch.mean(pred_d_fake), True) - # l_d_fake = self.cri_gan(pred_d_fake - torch.mean(pred_d_real), False) - # l_d_total = (l_d_real + l_d_fake) / 2 - - # l_d_total.backward() - # self.optimizer_D.step() - - # # set log - # if step % self.D_update_ratio == 0 and step > self.D_init_iters: - # if self.cri_pix: - # self.log_dict['l_g_pix'] = l_g_pix.item() - # # self.log_dict['l_g_mean_color'] = l_g_mean_color.item() - # if self.cri_fea: - # self.log_dict['l_g_fea'] = l_g_fea.item() - # self.log_dict['l_g_gan'] = l_g_gan.item() - - # self.log_dict['l_d_real'] = l_d_real.item() - # self.log_dict['l_d_fake'] = l_d_fake.item() - # self.log_dict['D_real'] = torch.mean(pred_d_real.detach()) - # self.log_dict['D_fake'] = torch.mean(pred_d_fake.detach()) - - # def test(self): - # self.netG.eval() - # with torch.no_grad(): - # self.fake_H = self.netG(self.var_L) - # self.netG.train() - - # def back_projection(self): - # lr_error = self.var_L - torch.nn.functional.interpolate(self.fake_H, - # scale_factor=1/self.opt['scale'], - # mode='bicubic', - # align_corners=False) - # us_error = torch.nn.functional.interpolate(lr_error, - # scale_factor=self.opt['scale'], - # mode='bicubic', - # align_corners=False) - # self.fake_H += self.opt['back_projection_lamda'] * us_error - # torch.clamp(self.fake_H, 0, 1) - - # def test_chop(self): - # self.netG.eval() - # with torch.no_grad(): - # self.fake_H = self.forward_chop(self.var_L) - # self.netG.train() - - # def forward_chop(self, *args, shave=10, min_size=160000): - # # scale = 1 if self.input_large else self.scale[self.idx_scale] - # scale = self.opt['scale'] - # n_GPUs = min(torch.cuda.device_count(), 4) - # args = [a.squeeze().unsqueeze(0) for a in args] - - # # height, width - # h, w = args[0].size()[-2:] - # # print('len(args)', len(args)) - # # print('args[0].size()', args[0].size()) - - # top = slice(0, h//2 + shave) - # bottom = slice(h - h//2 - shave, h) - # left = slice(0, w//2 + shave) - # right = slice(w - w//2 - shave, w) - # x_chops = [torch.cat([ - # a[..., top, left], - # a[..., top, right], - # a[..., bottom, left], - # a[..., bottom, right] - # ]) for a in args] - # # print('len(x_chops)', len(x_chops)) - # # print('x_chops[0].size()', x_chops[0].size()) - - # y_chops = [] - # if h * w < 4 * min_size: - # for i in range(0, 4, n_GPUs): - # x = [x_chop[i:(i + n_GPUs)] for x_chop in x_chops] - # # print(len(x)) - # # print(x[0].size()) - # y = P.data_parallel(self.netG, *x, range(n_GPUs)) - # if not isinstance(y, list): y = [y] - # if not y_chops: - # y_chops = [[c for c in _y.chunk(n_GPUs, dim=0)] for _y in y] - # else: - # for y_chop, _y in zip(y_chops, y): - # y_chop.extend(_y.chunk(n_GPUs, dim=0)) - # else: - - # # print(x_chops[0].size()) - # for p in zip(*x_chops): - # # print('len(p)', len(p)) - # # print('p[0].size()', p[0].size()) - # y = self.forward_chop(*p, shave=shave, min_size=min_size) - # if not isinstance(y, list): y = [y] - # if not y_chops: - # y_chops = [[_y] for _y in y] - # else: - # for y_chop, _y in zip(y_chops, y): y_chop.append(_y) - - # h *= scale - # w *= scale - # top = slice(0, h//2) - # bottom = slice(h - h//2, h) - # bottom_r = slice(h//2 - h, None) - # left = slice(0, w//2) - # right = slice(w - w//2, w) - # right_r = slice(w//2 - w, None) - - # # batch size, number of color channels - # b, c = y_chops[0][0].size()[:-2] - # y = [y_chop[0].new(b, c, h, w) for y_chop in y_chops] - # for y_chop, _y in zip(y_chops, y): - # _y[..., top, left] = y_chop[0][..., top, left] - # _y[..., top, right] = y_chop[1][..., top, right_r] - # _y[..., bottom, left] = y_chop[2][..., bottom_r, left] - # _y[..., bottom, right] = y_chop[3][..., bottom_r, right_r] - - # if len(y) == 1: - # y = y[0] - - # return y - - # def get_current_log(self): - # return self.log_dict - - # def get_current_visuals(self, need_GT=True): - # out_dict = OrderedDict() - # out_dict['LQ'] = self.var_L.detach()[0].float().cpu() - # out_dict['SR'] = self.fake_H.detach()[0].float().cpu() - # if need_GT: - # out_dict['GT'] = self.var_H.detach()[0].float().cpu() - # return out_dict - - # def print_network(self): - # # Generator - # s, n = self.get_network_description(self.netG) - # if isinstance(self.netG, nn.DataParallel) or isinstance(self.netG, DistributedDataParallel): - # net_struc_str = '{} - {}'.format(self.netG.__class__.__name__, - # self.netG.module.__class__.__name__) - # else: - # net_struc_str = '{}'.format(self.netG.__class__.__name__) - # if self.rank <= 0: - # logger.info('Network G structure: {}, with parameters: {:,d}'.format(net_struc_str, n)) - # logger.info(s) - # if self.is_train: - # # Discriminator - # s, n = self.get_network_description(self.netD) - # if isinstance(self.netD, nn.DataParallel) or isinstance(self.netD, - # DistributedDataParallel): - # net_struc_str = '{} - {}'.format(self.netD.__class__.__name__, - # self.netD.module.__class__.__name__) - # else: - # net_struc_str = '{}'.format(self.netD.__class__.__name__) - # if self.rank <= 0: - # logger.info('Network D structure: {}, with parameters: {:,d}'.format( - # net_struc_str, n)) - # logger.info(s) - - # if self.cri_fea: # F, Perceptual Network - # s, n = self.get_network_description(self.netF) - # if isinstance(self.netF, nn.DataParallel) or isinstance( - # self.netF, DistributedDataParallel): - # net_struc_str = '{} - {}'.format(self.netF.__class__.__name__, - # self.netF.module.__class__.__name__) - # else: - # net_struc_str = '{}'.format(self.netF.__class__.__name__) - # if self.rank <= 0: - # logger.info('Network F structure: {}, with parameters: {:,d}'.format( - # net_struc_str, n)) - # logger.info(s) - - # def load(self): - # load_path_G = self.opt['path']['pretrain_model_G'] - # if load_path_G is not None: - # logger.info('Loading model for G [{:s}] ...'.format(load_path_G)) - # self.load_network(load_path_G, self.netG, self.opt['path']['strict_load']) - # load_path_D = self.opt['path']['pretrain_model_D'] - # if self.opt['is_train'] and load_path_D is not None: - # logger.info('Loading model for D [{:s}] ...'.format(load_path_D)) - # self.load_network(load_path_D, self.netD, self.opt['path']['strict_load']) - - # def save(self, iter_step): - # self.save_network(self.netG, 'G', iter_step) - # self.save_network(self.netD, 'D', iter_step) From 99d092167e5cb3b42747fa64346134ea96172fd6 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Wed, 16 Sep 2020 12:56:30 +0000 Subject: [PATCH 4/6] rm unused code --- ppgan/models/sr_model.py | 116 +----------------------------------- ppgan/models/srgan_model.py | 8 +-- 2 files changed, 4 insertions(+), 120 deletions(-) diff --git a/ppgan/models/sr_model.py b/ppgan/models/sr_model.py index bd255a7397b66..118f49fe11da1 100644 --- a/ppgan/models/sr_model.py +++ b/ppgan/models/sr_model.py @@ -1,10 +1,7 @@ from collections import OrderedDict import paddle import paddle.nn as nn -# import torch.nn.parallel as P -# from torch.nn.parallel import DataParallel, DistributedDataParallel -# import models.networks as networks -# import models.lr_scheduler as lr_scheduler + from .generators.builder import build_generator from .discriminators.builder import build_discriminator from ..solver import build_optimizer @@ -13,8 +10,6 @@ from .builder import MODELS import importlib -import mmcv -import torch from collections import OrderedDict from copy import deepcopy from os import path as osp @@ -24,12 +19,11 @@ @MODELS.register() class SRModel(BaseModel): """Base SR model for single image super-resolution.""" - def __init__(self, cfg): super(SRModel, self).__init__(cfg) self.model_names = ['G'] - + self.netG = build_generator(cfg.model.generator) self.visual_names = ['lq', 'output', 'gt'] @@ -119,7 +113,7 @@ def set_input(self, input): def forward(self): pass - + def test(self): """Forward function used in test time. """ @@ -137,111 +131,7 @@ def optimize_parameters(self): l_pix = self.criterionL1(self.output, self.gt) l_total += l_pix loss_dict['l_pix'] = l_pix - # perceptual loss - # if self.cri_perceptual: - # l_percep, l_style = self.cri_perceptual(self.output, self.gt) - # if l_percep is not None: - # l_total += l_percep - # loss_dict['l_percep'] = l_percep - # if l_style is not None: - # l_total += l_style - # loss_dict['l_style'] = l_style l_total.backward() self.loss_l_total = l_total self.optimizer_G.step() - - # self.log_dict = self.reduce_loss_dict(loss_dict) - # def get_current_visuals(self): - # out_dict = OrderedDict() - # out_dict['lq'] = self.lq.detach().cpu() - # out_dict['result'] = self.output.detach().cpu() - # if hasattr(self, 'gt'): - # out_dict['gt'] = self.gt.detach().cpu() - # return out_dict - - # def test(self): - # self.net_g.eval() - # with torch.no_grad(): - # self.output = self.net_g(self.lq) - # self.net_g.train() - - # def dist_validation(self, dataloader, current_iter, tb_logger, save_img): - # logger = get_root_logger() - # logger.info('Only support single GPU validation.') - # self.nondist_validation(dataloader, current_iter, tb_logger, save_img) - - # def nondist_validation(self, dataloader, current_iter, tb_logger, - # save_img): - # dataset_name = dataloader.dataset.opt['name'] - # with_metrics = self.opt['val'].get('metrics') is not None - # if with_metrics: - # self.metric_results = { - # metric: 0 - # for metric in self.opt['val']['metrics'].keys() - # } - # pbar = ProgressBar(len(dataloader)) - - # for idx, val_data in enumerate(dataloader): - # img_name = osp.splitext(osp.basename(val_data['lq_path'][0]))[0] - # self.feed_data(val_data) - # self.test() - - # visuals = self.get_current_visuals() - # sr_img = tensor2img([visuals['result']]) - # if 'gt' in visuals: - # gt_img = tensor2img([visuals['gt']]) - # del self.gt - - # # tentative for out of GPU memory - # del self.lq - # del self.output - # torch.cuda.empty_cache() - - # if save_img: - # if self.opt['is_train']: - # save_img_path = osp.join(self.opt['path']['visualization'], - # img_name, - # f'{img_name}_{current_iter}.png') - # else: - # if self.opt['val']['suffix']: - # save_img_path = osp.join( - # self.opt['path']['visualization'], dataset_name, - # f'{img_name}_{self.opt["val"]["suffix"]}.png') - # else: - # save_img_path = osp.join( - # self.opt['path']['visualization'], dataset_name, - # f'{img_name}_{self.opt["name"]}.png') - # mmcv.imwrite(sr_img, save_img_path) - - # if with_metrics: - # # calculate metrics - # opt_metric = deepcopy(self.opt['val']['metrics']) - # for name, opt_ in opt_metric.items(): - # metric_type = opt_.pop('type') - # self.metric_results[name] += getattr( - # metric_module, metric_type)(sr_img, gt_img, **opt_) - # pbar.update(f'Test {img_name}') - - # if with_metrics: - # for metric in self.metric_results.keys(): - # self.metric_results[metric] /= (idx + 1) - - # self._log_validation_metric_values(current_iter, dataset_name, - # tb_logger) - - # def _log_validation_metric_values(self, current_iter, dataset_name, - # tb_logger): - # log_str = f'Validation {dataset_name}\n' - # for metric, value in self.metric_results.items(): - # log_str += f'\t # {metric}: {value:.4f}\n' - # logger = get_root_logger() - # logger.info(log_str) - # if tb_logger: - # for metric, value in self.metric_results.items(): - # tb_logger.add_scalar(f'metrics/{metric}', value, current_iter) - - - # def save(self, epoch, current_iter): - # self.save_network(self.net_g, 'net_g', current_iter) - # self.save_training_state(epoch, current_iter) diff --git a/ppgan/models/srgan_model.py b/ppgan/models/srgan_model.py index ae5a46190597e..32ca581f8fcaa 100644 --- a/ppgan/models/srgan_model.py +++ b/ppgan/models/srgan_model.py @@ -1,16 +1,11 @@ -# import logging from collections import OrderedDict import paddle import paddle.nn as nn -# import torch.nn.parallel as P -# from torch.nn.parallel import DataParallel, DistributedDataParallel -# import models.networks as networks -# import models.lr_scheduler as lr_scheduler + from .generators.builder import build_generator from .base_model import BaseModel from .losses import GANLoss from .builder import MODELS -# logger = logging.getLogger('base') @MODELS.register() @@ -27,7 +22,6 @@ def __init__(self, cfg): # TODO: support srgan train. if False: # self.netD = build_discriminator(cfg.model.discriminator) - self.netG.train() # self.netD.train() From 93d8fca181abe4d93f7f18fbfc9b871388bde98a Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Wed, 16 Sep 2020 13:00:17 +0000 Subject: [PATCH 5/6] rm unused code --- ppgan/datasets/builder.py | 4 --- ppgan/models/sr_model.py | 65 --------------------------------------- 2 files changed, 69 deletions(-) diff --git a/ppgan/datasets/builder.py b/ppgan/datasets/builder.py index 284c774214371..62b5346795c13 100644 --- a/ppgan/datasets/builder.py +++ b/ppgan/datasets/builder.py @@ -111,8 +111,4 @@ def build_dataloader(cfg, is_train=True): dataloader = DictDataLoader(dataset, batch_size, is_train, num_workers) - # for i, item in enumerate(dataloader): - # print(i, item.keys()) - # # break - # print('dataset build success!') return dataloader diff --git a/ppgan/models/sr_model.py b/ppgan/models/sr_model.py index 118f49fe11da1..a7f912431d3f8 100644 --- a/ppgan/models/sr_model.py +++ b/ppgan/models/sr_model.py @@ -28,16 +28,7 @@ def __init__(self, cfg): self.visual_names = ['lq', 'output', 'gt'] self.loss_names = ['l_total'] - # define network - # self.net_g = networks.define_net_g(deepcopy(opt['network_g'])) - # self.net_g = self.model_to_device(self.net_g) - # self.print_network(self.net_g) - # load pretrained models - # load_path = self.opt['path'].get('pretrain_model_g', None) - # if load_path is not None: - # self.load_network(self.net_g, load_path, - # self.opt['path']['strict_load']) self.optimizers = [] if self.isTrain: self.criterionL1 = paddle.nn.L1Loss() @@ -48,68 +39,12 @@ def __init__(self, cfg): self.lr_scheduler, parameter_list=self.netG.parameters()) self.optimizers.append(self.optimizer_G) - # self.optimizer_D = build_optimizer( - # opt.optimizer, - # self.lr_scheduler, - # parameter_list=self.netD.parameters()) - - # self.init_training_settings() - - # def init_training_settings(self): - # self.net_g.train() - # train_opt = self.opt['train'] - - # # define losses - # if train_opt.get('pixel_opt'): - # pixel_type = train_opt['pixel_opt'].pop('type') - # cri_pix_cls = getattr(loss_module, pixel_type) - # self.cri_pix = cri_pix_cls(**train_opt['pixel_opt']).to( - # self.device) - # else: - # self.cri_pix = None - - # if train_opt.get('perceptual_opt'): - # percep_type = train_opt['perceptual_opt'].pop('type') - # cri_perceptual_cls = getattr(loss_module, percep_type) - # self.cri_perceptual = cri_perceptual_cls( - # **train_opt['perceptual_opt']).to(self.device) - # else: - # self.cri_perceptual = None - - # if self.cri_pix is None and self.cri_perceptual is None: - # raise ValueError('Both pixel and perceptual losses are None.') - - # # set up optimizers and schedulers - # self.setup_optimizers() - # self.setup_schedulers() - - # def setup_optimizers(self): - # train_opt = self.opt['train'] - # optim_params = [] - # for k, v in self.net_g.named_parameters(): - # if v.requires_grad: - # optim_params.append(v) - # else: - # logger = get_root_logger() - # logger.warning(f'Params {k} will not be optimized.') - - # optim_type = train_opt['optim_g'].pop('type') - # if optim_type == 'Adam': - # self.optimizer_g = torch.optim.Adam(optim_params, - # **train_opt['optim_g']) - # else: - # raise NotImplementedError( - # f'optimizer {optim_type} is not supperted yet.') - # self.optimizers.append(self.optimizer_g) def set_input(self, input): self.lq = paddle.to_tensor(input['lq']) if 'gt' in input: self.gt = paddle.to_tensor(input['gt']) self.image_paths = input['lq_path'] - # self.lq = data['lq'].to(self.device) - # if 'gt' in data: - # self.gt = data['gt'].to(self.device) def forward(self): pass From d5b9c2ea54f1e747c8c6669e37ef23ccd4563cc7 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Fri, 18 Sep 2020 15:26:47 +0000 Subject: [PATCH 6/6] fix url name --- applications/DeOldify/predict.py | 8 +++----- applications/DeepRemaster/predict.py | 4 ++-- applications/EDVR/predict.py | 7 +++---- applications/RealSR/predict.py | 7 +++---- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/applications/DeOldify/predict.py b/applications/DeOldify/predict.py index 35ca9c9ab5351..fd94970f38ada 100644 --- a/applications/DeOldify/predict.py +++ b/applications/DeOldify/predict.py @@ -18,7 +18,6 @@ from ppgan.utils.video import frames2video, video2frames from ppgan.models.generators.deoldify import build_model - parser = argparse.ArgumentParser(description='DeOldify') parser.add_argument('--input', type=str, default='none', help='Input video') parser.add_argument('--output', type=str, default='output', help='output dir') @@ -31,7 +30,7 @@ default=None, help='Path to the reference image directory') -DeOldify_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' +DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' class DeOldifyPredictor(): @@ -46,7 +45,7 @@ def __init__(self, self.render_factor = render_factor self.model = build_model() if weight_path is None: - weight_path = get_path_from_url(DeOldify_weight_url, cur_path) + weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path) state_dict, _ = paddle.load(weight_path) self.model.load_dict(state_dict) @@ -127,8 +126,7 @@ def run(self): vid_out_path = os.path.join(output_path, '{}_deoldify_out.mp4'.format(base_name)) - frames2video(frame_pattern_combined, vid_out_path, - str(int(fps))) + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) return frame_pattern_combined, vid_out_path diff --git a/applications/DeepRemaster/predict.py b/applications/DeepRemaster/predict.py index 8a4777fbf3ca1..baa8b7fd68d46 100644 --- a/applications/DeepRemaster/predict.py +++ b/applications/DeepRemaster/predict.py @@ -17,7 +17,7 @@ from ppgan.models.generators.remaster import NetworkR, NetworkC from paddle.utils.download import get_path_from_url -DeepRemaster_weight_url = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams' +DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams' parser = argparse.ArgumentParser(description='Remastering') parser.add_argument('--input', type=str, default=None, help='Input video') @@ -51,7 +51,7 @@ def __init__(self, self.mindim = mindim if weight_path is None: - weight_path = get_path_from_url(DeepRemaster_weight_url, cur_path) + weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path) state_dict, _ = paddle.load(weight_path) diff --git a/applications/EDVR/predict.py b/applications/EDVR/predict.py index 8a7f5aa6adc6a..5f95714cea667 100644 --- a/applications/EDVR/predict.py +++ b/applications/EDVR/predict.py @@ -32,7 +32,7 @@ from paddle.utils.download import get_path_from_url from ppgan.utils.video import frames2video, video2frames -EDVR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' +EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' def parse_args(): @@ -82,7 +82,7 @@ def __init__(self, input, output, weight_path=None): self.exe = fluid.Executor(place) if weight_path is None: - weight_path = get_path_from_url(EDVR_weight_url, cur_path) + weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path) model_filename = 'EDVR_model.pdmodel' params_filename = 'EDVR_params.pdparams' @@ -141,8 +141,7 @@ def run(self): frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') vid_out_path = os.path.join(self.output, '{}_edvr_out.mp4'.format(base_name)) - frames2video(frame_pattern_combined, vid_out_path, - str(int(fps))) + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) return frame_pattern_combined, vid_out_path diff --git a/applications/RealSR/predict.py b/applications/RealSR/predict.py index e74e0ca0b0b16..d032bc2a78029 100644 --- a/applications/RealSR/predict.py +++ b/applications/RealSR/predict.py @@ -26,7 +26,7 @@ default=None, help='Path to the reference image directory') -RealSR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams' +REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams' class RealSRPredictor(): @@ -35,7 +35,7 @@ def __init__(self, input, output, batch_size=1, weight_path=None): self.output = os.path.join(output, 'RealSR') self.model = RRDBNet(3, 3, 64, 23) if weight_path is None: - weight_path = get_path_from_url(RealSR_weight_url, cur_path) + weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path) state_dict, _ = paddle.load(weight_path) self.model.load_dict(state_dict) @@ -88,8 +88,7 @@ def run(self): vid_out_path = os.path.join(output_path, '{}_realsr_out.mp4'.format(base_name)) - frames2video(frame_pattern_combined, vid_out_path, - str(int(fps))) + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) return frame_pattern_combined, vid_out_path