config.ur_hi (6316B)
1 # Configuration for Urdu->Hindi 2 3 split "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]+" 4 beforeword "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]" 5 afterword "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]" 6 7 ignore "data/ignore.txt" 8 table misc_beginword.ur_hi "data/misc_beginword.ur_hi.txt" nodisplay 9 table misc_endword "data/misc_endword.txt" nodisplay 10 table special.ur_hi "data/special.ur_hi.txt" nodisplay 11 table exceptions_beginword_endword.ur_hi "data/exceptions_beginword_endword.ur_hi.txt" 12 table pairs_middle_e_o "data/pairs_middle_e_o.txt" nodisplay 13 14 # Verbs 15 16 table verbs_irregular "data/verbs/irregular.txt" 17 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt" 18 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay 19 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt" 20 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay 21 22 # Nouns/Adjectives 23 24 table na_imascfemshort "data/nouns_adjectives/imascfemshort.txt" 25 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt" 26 table na_irregular "data/nouns_adjectives/irregular.txt" 27 table na_ahmasc "data/nouns_adjectives/ahmasc.txt" 28 table na_yahmasc "data/nouns_adjectives/yahmasc.txt" 29 table na_aishortmasc "data/nouns_adjectives/aishortmasc.txt" 30 table na_amasc "data/nouns_adjectives/amasc.txt" 31 table na_an "data/nouns_adjectives/an.txt" 32 table na_cfem "data/nouns_adjectives/cfem.txt" 33 table na_cmasc "data/nouns_adjectives/cmasc.txt" 34 table na_ifem "data/nouns_adjectives/ifem.txt" 35 table na_imasc "data/nouns_adjectives/imasc.txt" 36 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt" 37 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt" 38 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt" 39 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt" 40 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt" 41 42 table na_imascfemshort_forms "data/nouns_adjectives/imascfemshort_forms.txt" nodisplay 43 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay 44 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay 45 table na_yahmasc_forms "data/nouns_adjectives/yahmasc_forms.txt" nodisplay 46 table na_aishortmasc_forms "data/nouns_adjectives/aishortmasc_forms.txt" nodisplay 47 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay 48 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay 49 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay 50 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay 51 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay 52 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay 53 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay 54 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay 55 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay 56 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay 57 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay 58 59 # Punctuation 60 61 table punctuation "data/punctuation.txt" nodisplay 62 63 # Regular verb expansions 64 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms 65 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms 66 67 # Regular noun/adjective expansions 68 69 expand na_imascfemshort na_imascfemshort_forms noroot 70 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot 71 expand na_ahmasc na_ahmasc_forms noroot 72 expand na_yahmasc na_yahmasc_forms noroot 73 expand na_aishortmasc na_aishortmasc_forms noroot 74 expand na_amasc na_amasc_forms noroot 75 expand na_an na_an_forms noroot 76 expand na_cfem na_cfem_forms 77 expand na_cmasc na_cmasc_forms 78 expand na_ifem na_ifem_forms noroot 79 expand na_imasc na_imasc_forms noroot 80 expand na_o_a_staysfem na_o_a_staysfem_forms 81 expand na_u_staysfem na_u_staysfem_forms noroot 82 expand na_o_a_staysmasc na_o_a_staysmasc_forms 83 expand na_u_staysmasc na_u_staysmasc_forms noroot 84 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot 85 86 # Conversion rules 87 88 match "نشو و نما" "नशो-नुमा" 89 match "مطیع و " "मतीओ-" 90 match "صحیح و " "सहीओ-" 91 match "وی سی آر" "वीसीआर" 92 match "محوِ " "महवे-" #exception to ए rule 93 match "ابھر کر" "उभरकर" 94 match "اُبھر کر" "उभरकर" 95 match "بھر کر" "भरकर" 96 match "بھر پور" "भरपूर" 97 match "بھر پُور" "भरपूर" 98 match "چوں و چرا" "चूँओ-चरा" 99 matchignore "[a-zA-Z]+" beginword endword 100 match "(?<=[یٰیاو]) و " "ओ-" 101 match "(?<=[عوی])ِ " "ए-" 102 match "ِ والا" "े-वाला" 103 match "ِ والو" "े-वालो" 104 match "ِ والے" "े-वाले" 105 match "ہی والا" "ही वाला" 106 match "ہی والے" "ही वाले" 107 match "ہی والوں" "ही वालों" 108 match "ہی والی" "ही वाली" 109 match "ِ " "े-" 110 match "(?<=[ہی])ٔ " "ए-" 111 match "(?<=[0123456789])واں" "वाँ" 112 match "(?<=[0123456789])ویں" "वें$वीं" 113 match "(?<=[0123456789])ء" " ई." 114 match "(?<![0123456789]):" "~:" 115 116 group beginword 117 replace misc_beginword.ur_hi 118 endgroup 119 120 group endword 121 replace misc_endword 122 endgroup 123 124 group 125 replace special.ur_hi 126 endgroup 127 128 group beginword endword 129 replace pairs_middle_e_o 130 replace na_imascfemshort 131 replace na_adjectiveregular_a_i 132 replace na_irregular 133 replace na_ahmasc 134 replace na_yahmasc 135 replace na_aishortmasc 136 replace na_amasc 137 replace na_an 138 replace na_cfem 139 replace na_cmasc 140 replace na_ifem 141 replace na_imasc 142 replace na_o_a_staysfem 143 replace na_u_staysfem 144 replace na_o_a_staysmasc 145 replace na_u_staysmasc 146 replace na_ui_oi_ai_mascfem 147 148 replace verbs_irregular 149 replace verbs_regular_consonant_ending 150 replace verbs_regular_ending_in_a_o 151 152 replace exceptions_beginword_endword.ur_hi override 153 endgroup 154 155 #because of numbers before Bible books, this needs to come after the tables above 156 matchignore "[0123456789]+" beginword endword 157 158 group 159 replace punctuation 160 endgroup 161 162 retrywithout "_diacritics" "ُ" "ِ" "ّ" "َ" "ٰ" 163 retrywithout "spac_e" " " 164 retrywithout "nothing" 165 comment "#"