transliterate_data

Data for Urdu<->Hindi transliteration
git clone git://lumidify.org/transliterate_data.git (fast, but not encrypted)
git clone https://lumidify.org/transliterate_data.git (encrypted, but very slow)
git clone git://4kcetb7mo7hj6grozzybxtotsub5bempzo4lirzc3437amof2c2impyd.onion/transliterate_data.git (over tor)
Log | Files | Refs | README

config.ur_hi (6316B)


      1 # Configuration for Urdu->Hindi
      2 
      3 split "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]+"
      4 beforeword "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]"
      5 afterword "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]"
      6 
      7 ignore "data/ignore.txt"
      8 table misc_beginword.ur_hi "data/misc_beginword.ur_hi.txt" nodisplay
      9 table misc_endword "data/misc_endword.txt" nodisplay
     10 table special.ur_hi "data/special.ur_hi.txt" nodisplay
     11 table exceptions_beginword_endword.ur_hi "data/exceptions_beginword_endword.ur_hi.txt"
     12 table pairs_middle_e_o "data/pairs_middle_e_o.txt" nodisplay
     13 
     14 # Verbs
     15 
     16 table verbs_irregular "data/verbs/irregular.txt"
     17 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt"
     18 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay
     19 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt"
     20 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay
     21 
     22 # Nouns/Adjectives
     23 
     24 table na_imascfemshort "data/nouns_adjectives/imascfemshort.txt"
     25 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt"
     26 table na_irregular "data/nouns_adjectives/irregular.txt"
     27 table na_ahmasc "data/nouns_adjectives/ahmasc.txt"
     28 table na_yahmasc "data/nouns_adjectives/yahmasc.txt"
     29 table na_aishortmasc "data/nouns_adjectives/aishortmasc.txt"
     30 table na_amasc "data/nouns_adjectives/amasc.txt"
     31 table na_an "data/nouns_adjectives/an.txt"
     32 table na_cfem "data/nouns_adjectives/cfem.txt"
     33 table na_cmasc "data/nouns_adjectives/cmasc.txt"
     34 table na_ifem "data/nouns_adjectives/ifem.txt"
     35 table na_imasc "data/nouns_adjectives/imasc.txt"
     36 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt"
     37 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt"
     38 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt"
     39 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt"
     40 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt"
     41 
     42 table na_imascfemshort_forms "data/nouns_adjectives/imascfemshort_forms.txt" nodisplay
     43 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay
     44 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay
     45 table na_yahmasc_forms "data/nouns_adjectives/yahmasc_forms.txt" nodisplay
     46 table na_aishortmasc_forms "data/nouns_adjectives/aishortmasc_forms.txt" nodisplay
     47 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay
     48 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay
     49 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay
     50 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay
     51 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay
     52 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay
     53 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay
     54 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay
     55 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay
     56 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay
     57 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay
     58 
     59 # Punctuation
     60 
     61 table punctuation "data/punctuation.txt" nodisplay
     62 
     63 # Regular verb expansions
     64 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms
     65 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms
     66 
     67 # Regular noun/adjective expansions
     68 
     69 expand na_imascfemshort na_imascfemshort_forms noroot
     70 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot
     71 expand na_ahmasc na_ahmasc_forms noroot
     72 expand na_yahmasc na_yahmasc_forms noroot
     73 expand na_aishortmasc na_aishortmasc_forms noroot
     74 expand na_amasc na_amasc_forms noroot
     75 expand na_an na_an_forms noroot
     76 expand na_cfem na_cfem_forms
     77 expand na_cmasc na_cmasc_forms
     78 expand na_ifem na_ifem_forms noroot
     79 expand na_imasc na_imasc_forms noroot
     80 expand na_o_a_staysfem na_o_a_staysfem_forms
     81 expand na_u_staysfem na_u_staysfem_forms noroot
     82 expand na_o_a_staysmasc na_o_a_staysmasc_forms
     83 expand na_u_staysmasc na_u_staysmasc_forms noroot
     84 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot
     85 
     86 # Conversion rules
     87 
     88 match "نشو و نما" "नशो-नुमा"
     89 match "مطیع و " "मतीओ-"
     90 match "صحیح و " "सहीओ-"
     91 match "وی سی آر" "वीसीआर"
     92 match "محوِ " "महवे-"  #exception to ए rule
     93 match "ابھر کر" "उभरकर"
     94 match "اُبھر کر" "उभरकर"
     95 match "بھر کر" "भरकर"
     96 match "بھر پور" "भरपूर"
     97  match "بھر پُور" "भरपूर"
     98 match "چوں و چرا" "चूँओ-चरा"
     99 matchignore "[a-zA-Z]+" beginword endword
    100 match "(?<=[یٰیاو]) و " "ओ-"
    101 match "(?<=[عوی])ِ " "ए-" 
    102 match "ِ والا" "े-वाला" 
    103 match "ِ والو" "े-वालो"
    104 match "ِ والے" "े-वाले"
    105 match "ہی والا" "ही वाला"
    106 match "ہی والے" "ही वाले"
    107 match "ہی والوں" "ही वालों"
    108 match "ہی والی" "ही वाली"
    109 match "ِ " "े-" 
    110 match "(?<=[ہی])ٔ " "ए-" 
    111 match "(?<=[0123456789])واں" "वाँ"
    112 match "(?<=[0123456789])ویں" "वें$वीं"
    113 match "(?<=[0123456789])ء" " ई."
    114 match "(?<![0123456789]):" "~:"
    115 
    116 group beginword
    117 replace misc_beginword.ur_hi
    118 endgroup
    119 
    120 group endword
    121 replace misc_endword
    122 endgroup
    123 
    124 group 
    125 replace special.ur_hi
    126 endgroup
    127 
    128 group beginword endword
    129 replace pairs_middle_e_o
    130 replace na_imascfemshort
    131 replace na_adjectiveregular_a_i
    132 replace na_irregular
    133 replace na_ahmasc
    134 replace na_yahmasc
    135 replace na_aishortmasc
    136 replace na_amasc
    137 replace na_an
    138 replace na_cfem
    139 replace na_cmasc
    140 replace na_ifem
    141 replace na_imasc
    142 replace na_o_a_staysfem
    143 replace na_u_staysfem
    144 replace na_o_a_staysmasc
    145 replace na_u_staysmasc
    146 replace na_ui_oi_ai_mascfem
    147 
    148 replace verbs_irregular
    149 replace verbs_regular_consonant_ending
    150 replace verbs_regular_ending_in_a_o
    151 
    152 replace exceptions_beginword_endword.ur_hi override
    153 endgroup
    154 
    155 #because of numbers before Bible books, this needs to come after the tables above
    156 matchignore "[0123456789]+" beginword endword
    157 
    158 group
    159 replace punctuation
    160 endgroup
    161 
    162 retrywithout "_diacritics" "ُ" "ِ" "ّ" "َ" "ٰ"
    163 retrywithout "spac_e" " "
    164 retrywithout "nothing"
    165 comment "#"