config.hi_ur (6948B)
1 # Configuration for Hindi->Urdu 2 3 split "[-.?,;।\s\\۔،؟―!—‘’“”:؛()[\]{}%―]+" 4 beforeword "[-.?,;।\s\\۔،؟!—‘’“”:؛()[\]{}%―]" 5 afterword "[-.?,;।\s\\۔،؟!—‘’“”:؛()[\]{}%―]" 6 7 ignore "data/ignore.txt" 8 table misc_endword "data/misc_endword.txt" nodisplay revert 9 table special.hi_ur "data/special.hi_ur.txt" nodisplay revert 10 table exceptions_beginword.hi_ur "data/exceptions_beginword.hi_ur.txt" revert 11 table exceptions_beginword_endword.hi_ur "data/exceptions_beginword_endword.hi_ur.txt" revert 12 table pairs_middle_e_o "data/pairs_middle_e_o.txt" nodisplay revert 13 14 # Verbs 15 16 table verbs_irregular "data/verbs/irregular.txt" revert 17 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt" revert 18 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay revert 19 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt" revert 20 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay revert 21 22 # Nouns/Adjectives 23 24 table na_imascfemshort "data/nouns_adjectives/imascfemshort.txt" revert 25 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt" revert 26 table na_irregular "data/nouns_adjectives/irregular.txt" revert 27 table na_ahmasc "data/nouns_adjectives/ahmasc.txt" revert 28 table na_aishortmasc "data/nouns_adjectives/aishortmasc.txt" revert 29 table na_amasc "data/nouns_adjectives/amasc.txt" revert 30 table na_an "data/nouns_adjectives/an.txt" revert 31 table na_cfem "data/nouns_adjectives/cfem.txt" revert 32 table na_cmasc "data/nouns_adjectives/cmasc.txt" revert 33 table na_ifem "data/nouns_adjectives/ifem.txt" revert 34 table na_imasc "data/nouns_adjectives/imasc.txt" revert 35 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt" revert 36 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt" revert 37 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt" revert 38 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt" revert 39 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt" revert 40 41 table na_imascfemshort_forms "data/nouns_adjectives/imascfemshort_forms.txt" nodisplay revert 42 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay revert 43 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay revert 44 table na_aishortmasc_forms "data/nouns_adjectives/aishortmasc_forms.txt" nodisplay revert 45 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay revert 46 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay revert 47 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay revert 48 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay revert 49 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay revert 50 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay revert 51 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay revert 52 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay revert 53 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay revert 54 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay revert 55 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay revert 56 57 # Punctuation 58 59 table punctuation "data/punctuation.txt" nodisplay revert 60 61 # Regular verb expansions 62 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms 63 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms 64 65 # Regular noun/adjective expansions 66 67 expand na_imascfemshort na_imascfemshort_forms noroot 68 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot 69 expand na_ahmasc na_ahmasc_forms noroot 70 expand na_aishortmasc na_aishortmasc_forms noroot 71 expand na_amasc na_amasc_forms noroot 72 expand na_an na_an_forms noroot 73 expand na_cfem na_cfem_forms 74 expand na_cmasc na_cmasc_forms 75 expand na_ifem na_ifem_forms noroot 76 expand na_imasc na_imasc_forms noroot 77 expand na_o_a_staysfem na_o_a_staysfem_forms 78 expand na_u_staysfem na_u_staysfem_forms noroot 79 expand na_o_a_staysmasc na_o_a_staysmasc_forms 80 expand na_u_staysmasc na_u_staysmasc_forms noroot 81 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot 82 83 # Conversion rules 84 85 matchignore "[a-zA-Z=]+" beginword endword 86 87 group beginword 88 replace exceptions_beginword.hi_ur 89 endgroup 90 91 match "(?<=[ाीू])ओ-" " و " # the letters ी ा ू 92 match "(?<=ा)ए-" "ٔ " 93 match "(?<=[ीूुअ])ए-" "ِ " # the letters ी ू ु अ 94 match "(?<=[0123456789])वाँ" "واں" endword 95 match "(?<=[0123456789])वें" "ویں" endword 96 match "(?<=[0123456789])वीं" "ویں" endword 97 match "(?<=[0123456789]) ई." "ء" endword 98 match "(?<![0123456789]) :" ":" endword 99 100 #The Persian Genetive े- conflicts with word pairs containing regular inflections and a dash. 101 group beginword endword 102 replace pairs_middle_e_o 103 endgroup 104 105 group 106 replace special.hi_ur 107 endgroup 108 109 match "बा-" "با " beginword 110 match "ता-" "تا " beginword 111 112 group endword 113 replace misc_endword 114 endgroup 115 116 group beginword endword 117 replace na_imascfemshort 118 replace na_adjectiveregular_a_i 119 replace na_irregular 120 replace na_ahmasc 121 replace na_aishortmasc 122 replace na_amasc 123 replace na_an 124 replace na_cfem 125 replace na_cmasc 126 replace na_ifem 127 replace na_imasc 128 replace na_o_a_staysfem 129 replace na_u_staysfem 130 replace na_o_a_staysmasc 131 replace na_u_staysmasc 132 replace na_ui_oi_ai_mascfem 133 134 replace verbs_irregular 135 replace verbs_regular_consonant_ending 136 replace verbs_regular_ending_in_a_o 137 replace exceptions_beginword_endword.hi_ur override #override multiple choices for common words 138 endgroup 139 140 #In the above tables are words that begin with the prefixes below but don't contain them as prefixes. Therefore they are replaced first. 141 match "बे" "بے" beginword 142 match "ग़ैर" "غیر" beginword 143 144 #because of numbers before Bible books, this needs to come after the tables above 145 matchignore "[0123456789]+" beginword endword 146 147 #After replacing "बे" and "ग़ैर" a second run is needed to replace the rest of the words. 148 group beginword endword 149 replace na_imascfemshort 150 replace na_adjectiveregular_a_i 151 replace na_irregular 152 replace na_ahmasc 153 replace na_aishortmasc 154 replace na_amasc 155 replace na_an 156 replace na_cfem 157 replace na_cmasc 158 replace na_ifem 159 replace na_imasc 160 replace na_o_a_staysfem 161 replace na_u_staysfem 162 replace na_o_a_staysmasc 163 replace na_u_staysmasc 164 replace na_ui_oi_ai_mascfem 165 166 replace verbs_irregular 167 replace verbs_regular_consonant_ending 168 replace verbs_regular_ending_in_a_o 169 replace exceptions_beginword_endword.hi_ur override #override multiple choices for common words 170 endgroup 171 172 group 173 replace punctuation 174 endgroup 175 176 targetdiacritics "ُ" "ِ" "ّ" "َ" "ٰ" 177 178 retrywithout "_diacritics" "ُ" "ِ" "ّ" "َ" "ٰ" 179 retrywithout "spac_e" " " 180 retrywithout "nothing" 181 comment "#"