transliterate_data

Data for Urdu<->Hindi transliteration
git clone git://lumidify.org/transliterate_data.git (fast, but not encrypted)
git clone https://lumidify.org/transliterate_data.git (encrypted, but very slow)
git clone git://4kcetb7mo7hj6grozzybxtotsub5bempzo4lirzc3437amof2c2impyd.onion/transliterate_data.git (over tor)
Log | Files | Refs | README

config.hi_ur (6948B)


      1 # Configuration for Hindi->Urdu
      2 
      3 split "[-.?,;।\s\\۔،؟―!—‘’“”:؛()[\]{}%―]+"
      4 beforeword "[-.?,;।\s\\۔،؟!—‘’“”:؛()[\]{}%―]"
      5 afterword "[-.?,;।\s\\۔،؟!—‘’“”:؛()[\]{}%―]"
      6 
      7 ignore "data/ignore.txt"
      8 table misc_endword "data/misc_endword.txt" nodisplay revert
      9 table special.hi_ur "data/special.hi_ur.txt" nodisplay revert
     10 table exceptions_beginword.hi_ur "data/exceptions_beginword.hi_ur.txt" revert
     11 table exceptions_beginword_endword.hi_ur "data/exceptions_beginword_endword.hi_ur.txt" revert
     12 table pairs_middle_e_o "data/pairs_middle_e_o.txt" nodisplay revert
     13 
     14 # Verbs
     15 
     16 table verbs_irregular "data/verbs/irregular.txt" revert
     17 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt" revert
     18 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay revert
     19 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt" revert
     20 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay revert
     21 
     22 # Nouns/Adjectives
     23 
     24 table na_imascfemshort "data/nouns_adjectives/imascfemshort.txt" revert
     25 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt" revert
     26 table na_irregular "data/nouns_adjectives/irregular.txt" revert
     27 table na_ahmasc "data/nouns_adjectives/ahmasc.txt" revert
     28 table na_aishortmasc "data/nouns_adjectives/aishortmasc.txt" revert
     29 table na_amasc "data/nouns_adjectives/amasc.txt" revert
     30 table na_an "data/nouns_adjectives/an.txt" revert
     31 table na_cfem "data/nouns_adjectives/cfem.txt" revert
     32 table na_cmasc "data/nouns_adjectives/cmasc.txt" revert
     33 table na_ifem "data/nouns_adjectives/ifem.txt" revert
     34 table na_imasc "data/nouns_adjectives/imasc.txt" revert
     35 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt" revert
     36 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt" revert
     37 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt" revert
     38 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt" revert
     39 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt" revert
     40 
     41 table na_imascfemshort_forms "data/nouns_adjectives/imascfemshort_forms.txt" nodisplay revert
     42 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay revert
     43 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay revert
     44 table na_aishortmasc_forms "data/nouns_adjectives/aishortmasc_forms.txt" nodisplay revert
     45 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay revert
     46 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay revert
     47 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay revert
     48 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay revert
     49 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay revert
     50 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay revert
     51 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay revert
     52 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay revert
     53 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay revert
     54 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay revert
     55 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay revert
     56 
     57 # Punctuation
     58 
     59 table punctuation "data/punctuation.txt" nodisplay revert
     60 
     61 # Regular verb expansions
     62 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms
     63 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms
     64 
     65 # Regular noun/adjective expansions
     66 
     67 expand na_imascfemshort na_imascfemshort_forms noroot
     68 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot
     69 expand na_ahmasc na_ahmasc_forms noroot
     70 expand na_aishortmasc na_aishortmasc_forms noroot
     71 expand na_amasc na_amasc_forms noroot
     72 expand na_an na_an_forms noroot
     73 expand na_cfem na_cfem_forms
     74 expand na_cmasc na_cmasc_forms
     75 expand na_ifem na_ifem_forms noroot
     76 expand na_imasc na_imasc_forms noroot
     77 expand na_o_a_staysfem na_o_a_staysfem_forms
     78 expand na_u_staysfem na_u_staysfem_forms noroot
     79 expand na_o_a_staysmasc na_o_a_staysmasc_forms
     80 expand na_u_staysmasc na_u_staysmasc_forms noroot
     81 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot
     82 
     83 # Conversion rules
     84 
     85 matchignore "[a-zA-Z=]+" beginword endword
     86 
     87 group beginword
     88 replace exceptions_beginword.hi_ur
     89 endgroup
     90 
     91 match "(?<=[ाीू])ओ-" " و "  # the letters ी ा ू
     92 match "(?<=ा)ए-" "ٔ "
     93 match "(?<=[ीूुअ])ए-" "ِ " # the letters ी ू ु अ
     94 match "(?<=[0123456789])वाँ" "واں" endword
     95 match "(?<=[0123456789])वें" "ویں" endword
     96 match "(?<=[0123456789])वीं" "ویں" endword
     97 match "(?<=[0123456789]) ई." "ء" endword
     98 match "(?<![0123456789]) :" ":" endword
     99 
    100 #The Persian Genetive े-  conflicts with word pairs containing regular inflections and a dash.
    101 group beginword endword
    102 replace pairs_middle_e_o
    103 endgroup
    104 
    105 group
    106 replace special.hi_ur
    107 endgroup
    108 
    109 match "बा-" "با " beginword
    110 match "ता-" "تا " beginword
    111 
    112 group endword
    113 replace misc_endword
    114 endgroup
    115 
    116 group beginword endword
    117 replace na_imascfemshort
    118 replace na_adjectiveregular_a_i
    119 replace na_irregular
    120 replace na_ahmasc
    121 replace na_aishortmasc
    122 replace na_amasc
    123 replace na_an
    124 replace na_cfem
    125 replace na_cmasc
    126 replace na_ifem
    127 replace na_imasc
    128 replace na_o_a_staysfem
    129 replace na_u_staysfem
    130 replace na_o_a_staysmasc
    131 replace na_u_staysmasc
    132 replace na_ui_oi_ai_mascfem
    133 
    134 replace verbs_irregular
    135 replace verbs_regular_consonant_ending
    136 replace verbs_regular_ending_in_a_o
    137 replace exceptions_beginword_endword.hi_ur override #override multiple choices for common words
    138 endgroup
    139 
    140 #In the above tables are words that begin with the prefixes below but don't contain them as prefixes. Therefore they are replaced first.
    141 match "बे" "بے" beginword
    142 match "ग़ैर" "غیر" beginword
    143 
    144 #because of numbers before Bible books, this needs to come after the tables above
    145 matchignore "[0123456789]+" beginword endword
    146 
    147 #After replacing "बे" and "ग़ैर" a second run is needed to replace the rest of the words.
    148 group beginword endword
    149 replace na_imascfemshort
    150 replace na_adjectiveregular_a_i
    151 replace na_irregular
    152 replace na_ahmasc
    153 replace na_aishortmasc
    154 replace na_amasc
    155 replace na_an
    156 replace na_cfem
    157 replace na_cmasc
    158 replace na_ifem
    159 replace na_imasc
    160 replace na_o_a_staysfem
    161 replace na_u_staysfem
    162 replace na_o_a_staysmasc
    163 replace na_u_staysmasc
    164 replace na_ui_oi_ai_mascfem
    165 
    166 replace verbs_irregular
    167 replace verbs_regular_consonant_ending
    168 replace verbs_regular_ending_in_a_o
    169 replace exceptions_beginword_endword.hi_ur override #override multiple choices for common words
    170 endgroup
    171 
    172 group
    173 replace punctuation
    174 endgroup
    175 
    176 targetdiacritics "ُ" "ِ" "ّ" "َ" "ٰ"
    177 
    178 retrywithout "_diacritics" "ُ" "ِ" "ّ" "َ" "ٰ"
    179 retrywithout "spac_e" " "
    180 retrywithout "nothing"
    181 comment "#"