transliterate_data

Data for Urdu->Hindi transliteration
git clone git://lumidify.org/transliterate_data.git
Log | Files | Refs | README

config (4785B)


      1 # Configuration for Urdu->Hindi
      2 
      3 split "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]+"
      4 beforeword "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]"
      5 afterword "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]"
      6 
      7 ignore "data/ignore.txt"
      8 table misc_beginword "data/misc_beginword.txt" nodisplay
      9 table misc_endword "data/misc_endword.txt" nodisplay
     10 table special "data/special.txt" nodisplay
     11 
     12 
     13 # Verbs
     14 
     15 table verbs_irregular "data/verbs/irregular.txt"
     16 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt"
     17 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay
     18 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt"
     19 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay
     20 
     21 # Nouns/Adjectives
     22 
     23 table na_ifemshort "data/nouns_adjectives/ifemshort.txt"
     24 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt"
     25 table na_irregular "data/nouns_adjectives/irregular.txt"
     26 table na_ahmasc "data/nouns_adjectives/ahmasc.txt"
     27 table na_aimasc "data/nouns_adjectives/aimasc.txt"
     28 table na_amasc "data/nouns_adjectives/amasc.txt"
     29 table na_an "data/nouns_adjectives/an.txt"
     30 table na_cfem "data/nouns_adjectives/cfem.txt"
     31 table na_cmasc "data/nouns_adjectives/cmasc.txt"
     32 table na_ifem "data/nouns_adjectives/ifem.txt"
     33 table na_imasc "data/nouns_adjectives/imasc.txt"
     34 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt"
     35 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt"
     36 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt"
     37 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt"
     38 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt"
     39 
     40 table na_ifemshort_forms "data/nouns_adjectives/ifemshort_forms.txt" nodisplay
     41 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay
     42 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay
     43 table na_aimasc_forms "data/nouns_adjectives/aimasc_forms.txt" nodisplay
     44 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay
     45 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay
     46 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay
     47 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay
     48 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay
     49 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay
     50 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay
     51 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay
     52 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay
     53 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay
     54 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay
     55 
     56 # Punctuation
     57 
     58 table punctuation "data/punctuation.txt" nodisplay
     59 
     60 # Regular verb expansions
     61 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms
     62 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms
     63 
     64 # Regular noun/adjective expansions
     65 
     66 expand na_ifemshort na_ifemshort_forms noroot
     67 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot
     68 expand na_ahmasc na_ahmasc_forms noroot
     69 expand na_aimasc na_aimasc_forms noroot
     70 expand na_amasc na_amasc_forms noroot
     71 expand na_an na_an_forms noroot
     72 expand na_cfem na_cfem_forms
     73 expand na_cmasc na_cmasc_forms
     74 expand na_ifem na_ifem_forms noroot
     75 expand na_imasc na_imasc_forms noroot
     76 expand na_o_a_staysfem na_o_a_staysfem_forms
     77 expand na_u_staysfem na_u_staysfem_forms noroot
     78 expand na_o_a_staysmasc na_o_a_staysmasc_forms
     79 expand na_u_staysmasc na_u_staysmasc_forms noroot
     80 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot
     81 
     82 # Conversion rules
     83 
     84 match "(?<=ی) و " "ओ-"
     85 match "(?<=[ہی])ٔ" "ए-" endword
     86 match "(?<=[ہی])ِ" "ए-" endword
     87 match "(?<=ا) و " "ओ-"
     88 match "(?<=[0123456789])واں" "वाँ"
     89 match "(?<=[0123456789])ویں" "वें$वीं"
     90 match "(?<=[0123456789])ء" " ई."
     91 match "(?<![0123456789]):" " :"
     92 
     93 matchignore "\d+" beginword endword
     94 
     95 group beginword
     96 replace misc_beginword
     97 endgroup
     98 
     99 group endword
    100 replace misc_endword
    101 endgroup
    102 
    103 group
    104 replace special
    105 replace punctuation
    106 endgroup
    107 
    108 group beginword endword
    109 replace na_ifemshort
    110 replace na_adjectiveregular_a_i
    111 replace na_irregular
    112 replace na_ahmasc
    113 replace na_aimasc
    114 replace na_amasc
    115 replace na_an
    116 replace na_cfem
    117 replace na_cmasc
    118 replace na_ifem
    119 replace na_imasc
    120 replace na_o_a_staysfem
    121 replace na_u_staysfem
    122 replace na_o_a_staysmasc
    123 replace na_u_staysmasc
    124 replace na_ui_oi_ai_mascfem
    125 
    126 replace verbs_irregular
    127 replace verbs_regular_consonant_ending
    128 replace verbs_regular_ending_in_a_o
    129 endgroup
    130 
    131 diacritics "ُ" "ِ" "ّ" "َ"