topdu's picture
Upload folder using huggingface_hub
6730f86 verified
Global:
device: gpu
epoch_num: 100
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec/ch/svtrv2_llada_test_lr00005_fs_reflect_onedecoder_semiar_withoutmask_sample3_3633_ch/
save_epoch_step:
- 10
- 1
eval_batch_step:
- 0
- 2000
eval_epoch_step:
- 0
- 1
cal_metric_during_train: false
pretrained_model: null
checkpoints: null
use_tensorboard: false
infer_img: ./img_44_1.jpg
character_dict_path: ./tools/utils/ppocr_keys_v1.txt
max_text_length: 25
use_space_char: false
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_nrtr.txt
use_amp: true
grad_clip_val: 20.0
distributed: true
Optimizer:
name: AdamW
lr: 0.0005
weight_decay: 0.05
filter_bias_and_bn: true
LRScheduler:
name: OneCycleLR
warmup_epoch: 5
cycle_momentum: false
Architecture:
model_type: rec
algorithm: NRTR
in_channels: 3
Transform: null
Encoder:
name: SVTRv2LNConvTwo33
use_pos_embed: false
dims:
- 128
- 256
- 384
depths:
- 3
- 6
- 3
num_heads:
- 4
- 8
- 12
mixer:
- - Conv
- Conv
- Conv
- Conv
- Conv
- Conv
- - Conv
- Conv
- FGlobal
- Global
- Global
- Global
- - Global
- Global
- Global
- Global
- Global
- Global
local_k:
- - 5
- 5
- - 5
- 5
- - -1
- -1
sub_k:
- - 1
- 1
- - 2
- 1
- - -1
- -1
last_stage: false
feat2d: false
Decoder:
name: MDiffDecoder
num_decoder_layers: 3
nhead: 6
max_len: 25
parallel_decoding: false
autoregressive_decoding: false
low_confidence_decoding: false
random_mask_decoding: false
semi_autoregressive_decoding: true
cloze_mask_decoding: false
sampler_step: 3
sample_k: 3
Loss:
name: NoneLoss
PostProcess:
name: ARLabelDecode
character_dict_path: ./tools/utils/ppocr_keys_v1.txt
use_space_char: false
Metric:
name: RecMetric
main_indicator: acc
is_filter: false
Train:
dataset:
name: RatioDataSetTVResize
ds_width: true
padding: false
data_dir_list:
- ../benchmark_bctr/benchmark_bctr_train/document_train
- ../benchmark_bctr/benchmark_bctr_train/handwriting_train
- ../benchmark_bctr/benchmark_bctr_train/scene_train
- ../benchmark_bctr/benchmark_bctr_train/web_train
transforms:
- DecodeImagePIL:
img_mode: RGB
- PARSeqAugPIL: null
- LLaDALabelEncode:
character_dict_path: ./tools/utils/ppocr_keys_v1.txt
use_space_char: false
max_text_length: 25
train_all_layer: true
sample_num: 3
- KeepKeys:
keep_keys:
- image
- label
- reflect_ids
- noisy_batch
- masked_indices
- p_mask
- length
sampler:
name: RatioSampler
scales:
- - 128
- 32
first_bs: 128
fix_bs: false
divided_factor:
- 4
- 16
is_training: true
loader:
shuffle: true
batch_size_per_card: 128
drop_last: true
max_ratio: 8
num_workers: 4
Eval:
dataset:
name: RatioDataSetTVResize
ds_width: true
padding: false
data_dir_list:
- ../benchmark_bctr/benchmark_bctr_test/scene_test
transforms:
- DecodeImagePIL:
img_mode: RGB
- ARLabelEncode:
character_dict_path: ./tools/utils/ppocr_keys_v1.txt
use_space_char: false
max_text_length: 25
- KeepKeys:
keep_keys:
- image
- label
- length
sampler:
name: RatioSampler
scales:
- - 128
- 32
first_bs: 128
fix_bs: false
divided_factor:
- 4
- 16
is_training: false
loader:
shuffle: false
drop_last: false
batch_size_per_card: 128
max_ratio: 8
num_workers: 4
filename: svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633
config: configs/rec/llada/svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633.yml
local_rank: '4'
eval: true