PaddlePaddle · Svtter · Oct 25, 2024 · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,18 @@ paddleocr.egg-info/
 /deploy/android_demo/app/cache/
 test_tipc/web/models/
 test_tipc/web/node_modules/
+
+
+pretrain_models
+train_data
+debug
+core
+data
+ckpt
+
+ch_ppocr_mobile_v2.0_rec_infer
+*.tar
+
+*.tgz
+resnet50
+*.whl
diff --git a/.pdm-python b/.pdm-python
@@ -0,0 +1 @@
+/data2/xiuhao-new/work/meter-project/meterhub-pretrain/src/PaddleOCR/.venv/bin/python
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,16 @@
+# FROM paddlecloud/paddleocr:2.6-gpu-cuda10.2-cudnn7-latest
+
+# build from paddle
+# FROM paddlepaddle/paddle:2.6.2-gpu-cuda12.0-cudnn8.9-trt8.6
+FROM svtter/paddle:cuda11.2-paddle2.4.2
+
+COPY requirements.txt .
+RUN pip install jupyterlab && pip install -r requirements.txt
+
+RUN jupyter lab --generate-config
+
+RUN mkdir -p /root/.jupyter
+EXPOSE 8888
+
+WORKDIR /opt/PaddleOCR
+CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='abcd'"]
diff --git a/Dockerfile.inference b/Dockerfile.inference
@@ -0,0 +1,14 @@
+# FROM python:3.10-slim
+# 先用一个大的，如果跑通，再用小的。
+FROM registry.baidubce.com/paddlepaddle/paddle:2.6.2
+
+
+# RUN python3 -m pip install paddlepaddle==2.4.2 -i https://pypi.tuna.tsinghua.edu.cn/simple && \
+#     python3 -m pip install jupyterlab -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+RUN pip install jupyterlab -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+WORKDIR /opt/PaddleOCR
+
+CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]
+
diff --git a/atrain_script/.gitignore b/atrain_script/.gitignore
@@ -0,0 +1 @@
+*.tar
diff --git a/atrain_script/CHANGELOG.md b/atrain_script/CHANGELOG.md
diff --git a/atrain_script/README.md b/atrain_script/README.md
@@ -0,0 +1,9 @@
+# Train PPOCR
+
+My training script.
+
+start with `./atrain_script/train.sh`.
+
+## Why `atrain`?
+
+Start with `a` could make the folder found fast.
diff --git a/atrain_script/configs/ch_PP-OCRv3_rec_distillation-meter.yml b/atrain_script/configs/ch_PP-OCRv3_rec_distillation-meter.yml
@@ -0,0 +1,213 @@
+Global:
+  debug: false
+  use_gpu: true
+  epoch_num: 800
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_ppocr_v3_distillation_meter
+  save_epoch_step: 3
+  eval_batch_step: 500
+  cal_metric_during_train: true
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: false
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  character_dict_path: atrain_script/meter_dict.txt
+  # character_dict_path: ./atrain_script/meter_dict.txt # seems not work.
+  max_text_length: &max_text_length 8
+  infer_mode: false
+  use_space_char: false
+  distributed: true
+  save_res_path: ./output/rec/predicts_ppocrv3_distillation_meter.txt
+  d2s_train_image_shape: [3, 32, 300]
+  use_wandb: True
+
+wandb:  
+  project: ProjectMeter  # (optional) this is the wandb project name
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Piecewise
+    decay_epochs : [700]
+    values : [0.0005, 0.00005]
+    warmup_epoch: 5
+  regularizer:
+    name: L2
+    factor: 3.0e-05
+
+
+Architecture:
+  model_type: &model_type "rec"
+  name: DistillationModel
+  algorithm: Distillation
+  Models:
+    Teacher:
+      pretrained:
+      freeze_params: false
+      return_all_feats: true
+      model_type: *model_type
+      algorithm: SVTR_LCNet
+      Transform:
+      Backbone:
+        name: MobileNetV1Enhance
+        scale: 0.5
+        last_conv_stride: [1, 2]
+        last_pool_type: avg
+        last_pool_kernel_size: [2, 2]
+      Head:
+        name: MultiHead
+        head_list:
+          - CTCHead:
+              Neck:
+                name: svtr
+                dims: 64
+                depth: 2
+                hidden_dims: 120
+                use_guide: True
+              Head:
+                fc_decay: 0.00001
+          - SARHead:
+              enc_dim: 512
+              max_text_length: *max_text_length
+    Student:
+      pretrained:
+      freeze_params: false
+      return_all_feats: true
+      model_type: *model_type
+      algorithm: SVTR_LCNet
+      Transform:
+      Backbone:
+        name: MobileNetV1Enhance
+        scale: 0.5
+        last_conv_stride: [1, 2]
+        last_pool_type: avg
+        last_pool_kernel_size: [2, 2]
+      Head:
+        name: MultiHead
+        head_list:
+          - CTCHead:
+              Neck:
+                name: svtr
+                dims: 64
+                depth: 2
+                hidden_dims: 120
+                use_guide: True
+              Head:
+                fc_decay: 0.00001
+          - SARHead:
+              enc_dim: 512
+              max_text_length: *max_text_length
+Loss:
+  name: CombinedLoss
+  loss_config_list:
+  - DistillationDMLLoss:
+      weight: 1.0
+      act: "softmax"
+      use_log: true
+      model_name_pairs:
+      - ["Student", "Teacher"]
+      key: head_out
+      multi_head: True
+      dis_head: ctc
+      name: dml_ctc
+  - DistillationDMLLoss:
+      weight: 0.5
+      act: "softmax"
+      use_log: true
+      model_name_pairs:
+      - ["Student", "Teacher"]
+      key: head_out
+      multi_head: True
+      dis_head: sar
+      name: dml_sar
+  - DistillationDistanceLoss:
+      weight: 1.0
+      mode: "l2"
+      model_name_pairs:
+      - ["Student", "Teacher"]
+      key: backbone_out
+  - DistillationCTCLoss:
+      weight: 1.0
+      model_name_list: ["Student", "Teacher"]
+      key: head_out
+      multi_head: True
+  - DistillationSARLoss:
+      weight: 1.0
+      model_name_list: ["Student", "Teacher"]
+      key: head_out
+      multi_head: True
+
+PostProcess:
+  name: DistillationCTCLabelDecode
+  model_name: ["Student", "Teacher"]
+  key: head_out
+  multi_head: True
+
+Metric:
+  name: DistillationMetric
+  base_metric_name: RecMetric
+  main_indicator: acc
+  key: "Student"
+  ignore_space: False
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    ext_op_transform_idx: 1
+    label_file_list:
+    - ./train_data/train_list.txt
+    transforms:
+    - DecodeImage:
+        img_mode: BGR
+        channel_first: false
+    # - RecConAug:
+    #     prob: 0.5
+    #     ext_data_num: 2
+    #     image_shape: [48, 320, 3]
+    #     max_text_length: *max_text_length
+    - RecAug:
+    - MultiLabelEncode:
+    - RecResizeImg:
+        image_shape: [3, 32, 300]
+    - KeepKeys:
+        keep_keys:
+        - image
+        - label_ctc
+        - label_sar
+        - length
+        - valid_ratio
+  loader:
+    shuffle: true
+    batch_size_per_card: 128
+    drop_last: true
+    num_workers: 4
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data
+    label_file_list:
+    - ./train_data/val_list.txt
+    transforms:
+    - DecodeImage:
+        img_mode: BGR
+        channel_first: false
+    - MultiLabelEncode:
+    - RecResizeImg:
+        image_shape: [3, 32, 300]
+    - KeepKeys:
+        keep_keys:
+        - image
+        - label_ctc
+        - label_sar
+        - length
+        - valid_ratio
+  loader:
+    shuffle: false
+    drop_last: false
+    batch_size_per_card: 128
+    num_workers: 4
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/data2/xiuhao-new/work/meter-project/meterhub-pretrain/src/PaddleOCR/.venv/bin/python