topdu
/

MDiff4STR

Model card Files Files and versions

MDiff4STR / mdiff4str_small_ch /config.yml

topdu's picture

Upload folder using huggingface_hub

6730f86 verified about 1 month ago

history blame contribute delete

4.06 kB

	Global:
	device: gpu
	epoch_num: 100
	log_smooth_window: 20
	print_batch_step: 10
	output_dir: ./output/rec/ch/svtrv2_llada_test_lr00005_fs_reflect_onedecoder_semiar_withoutmask_sample3_3633_ch/
	save_epoch_step:
	- 10
	- 1
	eval_batch_step:
	- 0
	- 2000
	eval_epoch_step:
	- 0
	- 1
	cal_metric_during_train: false
	pretrained_model: null
	checkpoints: null
	use_tensorboard: false
	infer_img: ./img_44_1.jpg
	character_dict_path: ./tools/utils/ppocr_keys_v1.txt
	max_text_length: 25
	use_space_char: false
	save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_nrtr.txt
	use_amp: true
	grad_clip_val: 20.0
	distributed: true
	Optimizer:
	name: AdamW
	lr: 0.0005
	weight_decay: 0.05
	filter_bias_and_bn: true
	LRScheduler:
	name: OneCycleLR
	warmup_epoch: 5
	cycle_momentum: false
	Architecture:
	model_type: rec
	algorithm: NRTR
	in_channels: 3
	Transform: null
	Encoder:
	name: SVTRv2LNConvTwo33
	use_pos_embed: false
	dims:
	- 128
	- 256
	- 384
	depths:
	- 3
	- 6
	- 3
	num_heads:
	- 4
	- 8
	- 12
	mixer:
	- - Conv
	- Conv
	- Conv
	- Conv
	- Conv
	- Conv
	- - Conv
	- Conv
	- FGlobal
	- Global
	- Global
	- Global
	- - Global
	- Global
	- Global
	- Global
	- Global
	- Global
	local_k:
	- - 5
	- 5
	- - 5
	- 5
	- - -1
	- -1
	sub_k:
	- - 1
	- 1
	- - 2
	- 1
	- - -1
	- -1
	last_stage: false
	feat2d: false
	Decoder:
	name: MDiffDecoder
	num_decoder_layers: 3
	nhead: 6
	max_len: 25
	parallel_decoding: false
	autoregressive_decoding: false
	low_confidence_decoding: false
	random_mask_decoding: false
	semi_autoregressive_decoding: true
	cloze_mask_decoding: false
	sampler_step: 3
	sample_k: 3
	Loss:
	name: NoneLoss
	PostProcess:
	name: ARLabelDecode
	character_dict_path: ./tools/utils/ppocr_keys_v1.txt
	use_space_char: false
	Metric:
	name: RecMetric
	main_indicator: acc
	is_filter: false
	Train:
	dataset:
	name: RatioDataSetTVResize
	ds_width: true
	padding: false
	data_dir_list:
	- ../benchmark_bctr/benchmark_bctr_train/document_train
	- ../benchmark_bctr/benchmark_bctr_train/handwriting_train
	- ../benchmark_bctr/benchmark_bctr_train/scene_train
	- ../benchmark_bctr/benchmark_bctr_train/web_train
	transforms:
	- DecodeImagePIL:
	img_mode: RGB
	- PARSeqAugPIL: null
	- LLaDALabelEncode:
	character_dict_path: ./tools/utils/ppocr_keys_v1.txt
	use_space_char: false
	max_text_length: 25
	train_all_layer: true
	sample_num: 3
	- KeepKeys:
	keep_keys:
	- image
	- label
	- reflect_ids
	- noisy_batch
	- masked_indices
	- p_mask
	- length
	sampler:
	name: RatioSampler
	scales:
	- - 128
	- 32
	first_bs: 128
	fix_bs: false
	divided_factor:
	- 4
	- 16
	is_training: true
	loader:
	shuffle: true
	batch_size_per_card: 128
	drop_last: true
	max_ratio: 8
	num_workers: 4
	Eval:
	dataset:
	name: RatioDataSetTVResize
	ds_width: true
	padding: false
	data_dir_list:
	- ../benchmark_bctr/benchmark_bctr_test/scene_test
	transforms:
	- DecodeImagePIL:
	img_mode: RGB
	- ARLabelEncode:
	character_dict_path: ./tools/utils/ppocr_keys_v1.txt
	use_space_char: false
	max_text_length: 25
	- KeepKeys:
	keep_keys:
	- image
	- label
	- length
	sampler:
	name: RatioSampler
	scales:
	- - 128
	- 32
	first_bs: 128
	fix_bs: false
	divided_factor:
	- 4
	- 16
	is_training: false
	loader:
	shuffle: false
	drop_last: false
	batch_size_per_card: 128
	max_ratio: 8
	num_workers: 4
	filename: svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633
	config: configs/rec/llada/svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633.yml
	local_rank: '4'
	eval: true