# File lib/language_detector.rb, line 65
  def self.train_fm
    # For a full list of ISO 639 language tags visit:
    # http://www.loc.gov/standards/iso639-2/englangn.html
    # http://www.loc.gov/standards/iso639-2/php/English_list.php

    #LARGE profiles follow:

    #NOTE: These profiles taken from the "World War II" node on wikipedia
    #with the 'lang' and ?action=raw URI which results in a UTF8 encoded
    #file.  If we need to get more profile data for a language this is
    #always a good source of data.
    #
    # http:#en.wikipedia.org/wiki/World_War_II
    # EU corpus: http://wt.jrc.it/lt/Acquis/
    # 

    training_data = [
      [ "ar", "ar-utf8.txt", "utf8", "arabic" ],
      [ "bg", "bg-utf8.txt", "utf8", "bulgarian" ],
      [ "cs", "cs-utf8.txt", "utf8", "czech" ],
      [ "da", "da-utf8.txt", "utf8", "danish" ],
      [ "de", "de-utf8.txt", "utf8", "german" ],
      [ "el", "el-utf8.txt", "utf8", "greek" ],
      [ "en", "en-utf8.txt", "utf8", "english" ],
      [ "et", "et-utf8.txt", "utf8", "estonian" ],
      [ "es", "es-utf8.txt", "utf8", "spanish" ],
      [ "fa", "fa-utf8.txt", "utf8", "farsi" ],
      [ "fi", "fi-utf8.txt", "utf8", "finnish" ],
      [ "fr", "fr-utf8.txt", "utf8", "french" ],
      [ "ga", "ga-utf8.txt", "utf8", "irish" ],
      [ "he", "he-utf8.txt", "utf8", "hebrew" ],
      [ "hi", "hi-utf8.txt", "utf8", "hindi" ],
      [ "hr", "hr-utf8.txt", "utf8", "croatian" ],
      [ "it", "it-utf8.txt", "utf8", "italian" ],
      [ "ja", "ja-utf8.txt", "utf8", "japanese" ],
      [ "ko", "ko-utf8.txt", "utf8", "korean" ],
      [ "hu", "hu-utf8.txt", "utf8", "hungarian" ],
      [ "tk", "tk-utf8.txt", "utf8", "turkish" ],
      [ "nl", "nl-utf8.txt", "utf8", "dutch" ],
      [ "no", "no-utf8.txt", "utf8", "norwegian" ],
      [ "pl", "pl-utf8.txt", "utf8", "polish" ],
      [ "pt", "pt-utf8.txt", "utf8", "portuguese" ],
      [ "ro", "ro-utf8.txt", "utf8", "romanian" ],
      [ "ru", "ru-utf8.txt", "utf8", "russian" ],
      [ "sl", "sl-utf8.txt", "utf8", "slovenian" ],
      [ "sv", "sv-utf8.txt", "utf8", "swedish" ],
      [ "th", "th-utf8.txt", "utf8", "thai" ],
      [ "uk", "uk-utf8.txt", "utf8", "ukraninan" ],
      [ "vi", "vi-utf8.txt", "utf8", "vietnamese" ],
      [ "zh", "zh-utf8.txt", "utf8", "chinese" ]
      # id (indonesian)
      # ku (kurdish)
      # lt (lithuanian)
      # lv (latvian)
      # mk (macedonian)
      # ms (malay)
      # sr (serbian)
      # my (burmese)
      # [ "fy", "fy-utf8.txt", "utf8", "frisian" ],
      # [ "io", "io-utf8.txt", "utf8", "ido" ],
      # [ "is", "is-utf8.txt", "utf8", "icelandic" ],
    ]

    profiles = []
    training_data.each do |data|
      p = LanguageDetector::Profile.new(:name => data.last, :file => data[1])
      profiles.push p
    end

    puts 'saving model...'
    filename = File.expand_path(File.join(File.dirname(__FILE__), "model-fm.yml"))
    File.open(filename, 'w') {|f| YAML.dump(profiles, f)}
  end