config (4785B)
1 # Configuration for Urdu->Hindi 2 3 split "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]+" 4 beforeword "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]" 5 afterword "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]" 6 7 ignore "data/ignore.txt" 8 table misc_beginword "data/misc_beginword.txt" nodisplay 9 table misc_endword "data/misc_endword.txt" nodisplay 10 table special "data/special.txt" nodisplay 11 12 13 # Verbs 14 15 table verbs_irregular "data/verbs/irregular.txt" 16 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt" 17 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay 18 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt" 19 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay 20 21 # Nouns/Adjectives 22 23 table na_ifemshort "data/nouns_adjectives/ifemshort.txt" 24 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt" 25 table na_irregular "data/nouns_adjectives/irregular.txt" 26 table na_ahmasc "data/nouns_adjectives/ahmasc.txt" 27 table na_aimasc "data/nouns_adjectives/aimasc.txt" 28 table na_amasc "data/nouns_adjectives/amasc.txt" 29 table na_an "data/nouns_adjectives/an.txt" 30 table na_cfem "data/nouns_adjectives/cfem.txt" 31 table na_cmasc "data/nouns_adjectives/cmasc.txt" 32 table na_ifem "data/nouns_adjectives/ifem.txt" 33 table na_imasc "data/nouns_adjectives/imasc.txt" 34 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt" 35 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt" 36 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt" 37 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt" 38 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt" 39 40 table na_ifemshort_forms "data/nouns_adjectives/ifemshort_forms.txt" nodisplay 41 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay 42 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay 43 table na_aimasc_forms "data/nouns_adjectives/aimasc_forms.txt" nodisplay 44 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay 45 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay 46 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay 47 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay 48 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay 49 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay 50 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay 51 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay 52 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay 53 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay 54 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay 55 56 # Punctuation 57 58 table punctuation "data/punctuation.txt" nodisplay 59 60 # Regular verb expansions 61 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms 62 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms 63 64 # Regular noun/adjective expansions 65 66 expand na_ifemshort na_ifemshort_forms noroot 67 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot 68 expand na_ahmasc na_ahmasc_forms noroot 69 expand na_aimasc na_aimasc_forms noroot 70 expand na_amasc na_amasc_forms noroot 71 expand na_an na_an_forms noroot 72 expand na_cfem na_cfem_forms 73 expand na_cmasc na_cmasc_forms 74 expand na_ifem na_ifem_forms noroot 75 expand na_imasc na_imasc_forms noroot 76 expand na_o_a_staysfem na_o_a_staysfem_forms 77 expand na_u_staysfem na_u_staysfem_forms noroot 78 expand na_o_a_staysmasc na_o_a_staysmasc_forms 79 expand na_u_staysmasc na_u_staysmasc_forms noroot 80 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot 81 82 # Conversion rules 83 84 match "(?<=ی) و " "ओ-" 85 match "(?<=[ہی])ٔ" "ए-" endword 86 match "(?<=[ہی])ِ" "ए-" endword 87 match "(?<=ا) و " "ओ-" 88 match "(?<=[0123456789])واں" "वाँ" 89 match "(?<=[0123456789])ویں" "वें$वीं" 90 match "(?<=[0123456789])ء" " ई." 91 match "(?<![0123456789]):" " :" 92 93 matchignore "\d+" beginword endword 94 95 group beginword 96 replace misc_beginword 97 endgroup 98 99 group endword 100 replace misc_endword 101 endgroup 102 103 group 104 replace special 105 replace punctuation 106 endgroup 107 108 group beginword endword 109 replace na_ifemshort 110 replace na_adjectiveregular_a_i 111 replace na_irregular 112 replace na_ahmasc 113 replace na_aimasc 114 replace na_amasc 115 replace na_an 116 replace na_cfem 117 replace na_cmasc 118 replace na_ifem 119 replace na_imasc 120 replace na_o_a_staysfem 121 replace na_u_staysfem 122 replace na_o_a_staysmasc 123 replace na_u_staysmasc 124 replace na_ui_oi_ai_mascfem 125 126 replace verbs_irregular 127 replace verbs_regular_consonant_ending 128 replace verbs_regular_ending_in_a_o 129 endgroup 130 131 diacritics "ُ" "ِ" "ّ" "َ"