Compare commits

...

62 Commits

Author SHA1 Message Date
2949c48ed9 Merge branch 'master' into resizeSvg 2025-04-12 10:57:55 -05:00
6aedbbc942 Styling changes mostly done 2025-04-12 11:55:40 -04:00
977cd1aa62 Add parentheses around arithmetic operation 2025-04-11 10:17:56 -04:00
81fe778212 Works even better now 2025-04-11 10:12:57 -04:00
bf8825294b Mostly working 2025-04-10 11:37:55 -04:00
6e9e11c41c Made some progress 2025-04-09 10:34:52 -04:00
66d88c133b Started working on resizing SVG 2025-04-08 23:53:13 -04:00
f62697d03a Kept changes from production use 2025-04-06 11:24:56 -05:00
ad254d2760 Added separate styling for mobile 2025-04-06 12:01:20 -04:00
49b4820c2d Removed Santali language (since it doesnt' have transliteration support) 2025-04-06 12:01:04 -04:00
3d0c20ae48 Added signal handling 2025-03-29 11:59:36 -05:00
3d06971e77 Remove testing code; use minified map JSON; hide spinner after map has loaded 2025-03-28 14:41:51 -05:00
50a476edad Change function name; add code to hide, show and change text for the 'fetching translations/romanizations' text 2025-03-28 14:41:02 -05:00
2814f80fc8 Update translations DB 2025-03-28 14:40:39 -05:00
74f3cb9740 Added minified version of JSON 2025-03-28 14:40:26 -05:00
2f7cdd2ea7 Add loading spinner while map loads; fade in map and fade out spinner; load HTMX locally; show text while translations and romanizations are being fetched 2025-03-28 14:40:10 -05:00
1fbe5a1abf Download HTMX locally 2025-03-28 13:46:31 -05:00
c15bd02c17 Added pointer-events: none for state boundaries, updated translations 2025-03-28 10:02:49 -05:00
bf453f9934 Added romanization server code 2025-03-27 08:47:36 -04:00
a4a2451edd Updated translations DB 2025-03-25 11:50:21 -04:00
56cae83339 Move Tulu language name a little 2025-03-18 15:57:56 -04:00
53cd9b9465 More styling - increase language font size 2025-03-18 15:57:34 -04:00
fe2aaef413 Updated DB 2025-03-18 15:36:17 -04:00
f26a5b8c59 Fix ordering of languages when inserting into DB 2025-03-18 15:36:02 -04:00
2d810bb661 Updated references to classname 2025-03-18 15:34:51 -04:00
cf5b2e40a0 Adjusted positioning of some language translations; added new text element for the language name 2025-03-18 15:34:25 -04:00
0c11a302e3 Added styling for language text 2025-03-18 15:33:34 -04:00
9cc650a467 Fix ordering of languages when inserting into DB 2025-03-18 15:32:23 -04:00
0e26b2345a Show all texts at once; hide them while the results are being fetched; show them after the results have been fetched 2025-03-16 22:15:25 -04:00
38316b0ea9 Cleared romanizations, because of an out-of-order thing 2025-03-16 22:14:52 -04:00
b2055b11d9 Fixed issues with fetching romanizations from DB 2025-03-12 16:06:52 -04:00
f0395763e5 Updated translations DB 2025-03-12 16:06:23 -04:00
776e45686e Updated DB 2025-03-10 06:33:36 -04:00
ab4acd189d Started working on adding romanizations to DB 2025-03-10 06:33:31 -04:00
93ff0d172e Added how-to file 2025-03-08 12:31:10 -05:00
f495d939cb Updated TODO 2025-03-08 12:30:57 -05:00
d8c2b22a46 Changed Rajasthan's language from Marwari to Hindi 2025-03-08 12:30:47 -05:00
f65ea1dd8f Added more transliterations (some languages use the same script as others) 2025-03-08 12:30:18 -05:00
da993bf1aa Removed marwari (Rajasthan mostly speaks Hindi) 2025-03-08 12:29:23 -05:00
7110b0e0e3 Set romanization text relative to translation text; Fill languages instead of the individual districts 2025-03-07 15:46:14 -05:00
d1feca7003 Make districts transparent; dim other languages when one is hovered; change default opacity of language 2025-03-07 15:45:27 -05:00
3aa4135a81 Put languages before districts, so that district boundaries show up even with full opacity 2025-03-07 15:44:39 -05:00
72747015ed Check if romanizations already exist in database; transliterate the whole sentence instead of just one word 2025-03-06 21:21:50 -05:00
d006ee8887 Update translations DB 2025-03-06 21:21:05 -05:00
6cc8069c23 Moved translations DB to root, updated references to it 2025-03-06 09:26:35 -05:00
0add8244e6 Added gitignore 2025-03-06 09:25:29 -05:00
02c47cc218 Updated TODO 2025-03-05 16:47:09 -05:00
f20c17337b Added styling for romanizations (same as translations) 2025-03-05 16:46:22 -05:00
4fece83e43 Show romanizations on map 2025-03-05 16:46:12 -05:00
f861b73846 Updated TODO 2025-03-05 14:45:40 -05:00
0f92b11009 Add more text below the translation, in brackets - will be used for romanization 2025-03-05 14:44:58 -05:00
6899c2ce86 Updated translation DB 2025-03-05 14:44:37 -05:00
5cb766c2b8 Updated DB 2025-03-05 07:07:47 -05:00
c5ff92f941 Added JS to fetch romanization from an endpoint and log it; added a CSS class to the loading bar to show while the data is being fetched 2025-03-05 07:07:40 -05:00
4671ba40e7 Updated translations 2025-03-04 16:43:33 -05:00
dfdd9326be Added modified xlit server file with new endpoint '/romanize' - should go in venv for transliteration server 2025-03-04 16:38:48 -05:00
64f4f5f80b Fixed typo in manipuri language tag; regenerated database 2025-03-04 16:37:01 -05:00
ca7f6ad212 Fixed typo with struct tag; cleared DB because this bug resulted in incomplete translations 2025-03-03 16:53:26 -05:00
7112874a06 General refactoring 2025-03-03 16:52:32 -05:00
a38657ddf1 Updated TODO 2025-03-03 16:40:11 -05:00
6ba1189c6c Define color directly in the 'languages' object, instead of separately 2025-03-03 16:39:41 -05:00
d8bdb048c4 Added sqlite DB to cache translations; retrieve translations from DB if they're cached, otherwise fetch from API and cache results 2025-03-03 16:16:53 -05:00
17 changed files with 17034 additions and 21730 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
ai4bharat-transliteration

View File

@@ -15,6 +15,8 @@ require (
github.com/google/s2a-go v0.1.8 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/googleapis/gax-go/v2 v2.14.0 // indirect
github.com/jmoiron/sqlx v1.4.0 // indirect
github.com/mattn/go-sqlite3 v1.14.24 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
go.opentelemetry.io/otel v1.29.0 // indirect

View File

@@ -10,6 +10,7 @@ cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
cloud.google.com/go/translate v1.12.3 h1:XJ7LipYJi80BCgVk2lx1fwc7DIYM6oV2qx1G4IAGQ5w=
cloud.google.com/go/translate v1.12.3/go.mod h1:qINOVpgmgBnY4YTFHdfVO4nLrSBlpvlIyosqpGEgyEg=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -17,12 +18,19 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=

View File

@@ -2,10 +2,17 @@ package main
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
translate "cloud.google.com/go/translate/apiv3"
"cloud.google.com/go/translate/apiv3/translatepb"
@@ -13,23 +20,47 @@ import (
const project_id string = "india-translate-testing-452100"
type translationStruct struct {
En string `db:"english" json:"en"`
Hi string `db:"hindi" json:"hi"`
Bn string `db:"bengali" json:"bn"`
Mr string `db:"marathi" json:"mr"`
Ta string `db:"tamil" json:"ta"`
Te string `db:"telugu" json:"te"`
Kn string `db:"kannada" json:"kn"`
Ml string `db:"malayalam" json:"ml"`
Or string `db:"oriya" json:"or"`
Gu string `db:"gujarati" json:"gu"`
Ur string `db:"urdu" json:"ur"`
Lus string `db:"mizo" json:"lus"`
As string `db:"assamese" json:"as"`
Pa string `db:"punjabi" json:"pa"`
Mai string `db:"maithili" json:"mai"`
Ne string `db:"nepali" json:"ne"`
Gom string `db:"konkani" json:"gom"`
Tcy string `db:"tulu" json:"tcy"`
Bho string `db:"bhojpuri" json:"bho"`
Doi string `db:"dogri" json:"doi"`
Mni_mtei string `db:"manipuri" json:"mni-Mtei"`
Sd string `db:"sindhi" json:"sd"`
Awa string `db:"awadhi" json:"awa"`
}
var lang_codes []string = []string{
"hi", // Hindi
"bn", // Bengali
"mr", // Marathi
"ta", // Tamil
"te", // Telugu
"ml", // Malayalam
"kn", // Kannada
"gu", // Gujarati
"ml", // Malayalam
"or", // Oriya
"gu", // Gujarati
"ur", // Urdu
"lus", // Mizo
"as", // Assamese
"pa", // Punjabi
"mai", // Maithili
"mwr", // Marwari
"sat", // Santali
"ne", // Nepali
"gom", // Konkani
"tcy", // Tulu
@@ -40,6 +71,45 @@ var lang_codes []string = []string{
"awa", // Awadhi
}
var db *sqlx.DB
func cleanup() {
db.Close()
log.Printf("Shutting down...\n")
return
}
/*
Returns the cached translation from the database, for the given english text. The first parameter
indicates whether or not the translation exists.
*/
func getCachedTranslation(data string) (bool, translationStruct) {
prepared, err := db.Preparex("SELECT * from TRANSLATIONS WHERE english = ? COLLATE NOCASE")
if err != nil {
panic(err)
}
translations := translationStruct{}
err = prepared.Get(&translations, data)
if err != nil {
if err == sql.ErrNoRows {
return false, translations
} else {
panic(err)
}
} else {
return true, translations
}
}
func addToDatabase(translation translationStruct) {
_, err := db.NamedExec(`INSERT INTO translations VALUES (:english, :hindi, :bengali, :marathi, :tamil, :telugu, :kannada, :malayalam, :oriya, :gujarati, :urdu, :mizo, :assamese, :punjabi, :maithili, :nepali, :konkani, :tulu, :bhojpuri, :dogri, :manipuri, :sindhi, :awadhi)`, &translation)
if err != nil {
panic(err)
}
}
func translateText(text string, targetLang string) (result string, err error) {
return translateTextHelper(project_id, "en-US", targetLang, text)
}
@@ -80,22 +150,54 @@ func handler(w http.ResponseWriter, r *http.Request) {
queries := r.URL.Query()
toTranslate := queries["query"][0]
langToTranslation := make(map[string]string)
for _, lang_code := range lang_codes {
translation, err := translateText(toTranslate, lang_code)
if ok, translation := getCachedTranslation(toTranslate); ok {
translationJson, _ := json.Marshal(translation)
fmt.Fprintf(w, "%v", string(translationJson))
} else {
langToTranslation := make(map[string]string)
for _, lang_code := range lang_codes {
translation, err := translateText(toTranslate, lang_code)
if err != nil {
panic(err)
}
langToTranslation[lang_code] = translation
}
langToTranslation["en"] = toTranslate
langToTranslationJson, _ := json.Marshal(langToTranslation)
translation := translationStruct{}
err := json.Unmarshal(langToTranslationJson, &translation)
addToDatabase(translation)
if err != nil {
panic(err)
}
langToTranslation[lang_code] = translation
fmt.Fprintf(w, "%v", string(langToTranslationJson))
}
langToTranslationJson, _ := json.Marshal(langToTranslation)
fmt.Fprintf(w, "%v", string(langToTranslationJson))
// fmt.Fprintf(w, "Hi there, I love %s!", r.URL.Path[1:])
}
func main() {
log.Printf("Starting server...")
var err error
db, err = sqlx.Connect("sqlite3", "../translations.db")
if err != nil {
panic(err)
}
defer db.Close()
// Catch signal
sigs := make(chan os.Signal, 1)
signal.Notify(sigs)
go func() {
for sig := range sigs {
log.Printf("Received signal: %s", sig)
switch sig {
case syscall.SIGURG:
log.Printf("Ignoring sigurg")
case syscall.SIGTERM, syscall.SIGINT:
cleanup()
os.Exit(1)
}
}
}()
http.HandleFunc("/", handler)
log.Fatal(http.ListenAndServe(":9090", nil))
}

View File

@@ -0,0 +1,7 @@
(Very jank)
1. Make changes in the code eg. changing the language of a state or district.
2. Uncomment the snippet of code at the bottom of index.js. This is responsible for printing out the language boundaries as JSON.
3. Get the JSON from the browser console, put it in a file, and un-minifiy it.
4. Copy the resulting JSON's text into 'india_with_districts_with_languages.json'. Put it at the start of the file, replacing the old language boundaries.
5. Fix any remaining errors that pop up (should be nothing major).
a. If Tulu and Sindhi are not loading, it's because, for some reason, the JSON definition for these languages includes the district that they're spoken in (because they're only spoken in 1 district). So the code assumes that it's a _district_, rather than a _language_ definition.

1
htmx.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -4,13 +4,13 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap" rel="stylesheet">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap" rel="stylesheet">
<title>Indian Translate</title>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@turf/turf@7/turf.min.js" charset="utf-8"></script>
<script src="https://unpkg.com/htmx.org@2.0.4" integrity="sha384-HGfztofotfshcF7+8n44JQL2oJmowVChPTg48S+jvZoztPfvwD79OC/LTtG6dMp+" crossorigin="anonymous"></script>
<script src="./htmx.min.js"></script>
<style>
body {
background-color: #f4f4f4;
@@ -19,44 +19,70 @@
display: flex;
align-items: flex-start;
justify-content: space-evenly;
gap: 2em 2em;
}
svg {
border: 1px solid;
padding: 2em;
visibility: hidden;
opacity: 0;
transition: opacity 1s, visibility 2s;
}
svg.show {
visibility: visible;
opacity: 1;
}
h1 {
margin-block: 0.67em;
font-size: 2em;
}
.state {
stroke: black;
fill: none;
stroke-width: 0.5;
pointer-events: none;
}
.language {
stroke: red;
/* There has to be a fill, even if it's transparent, to allow
hover events to be recognized on the inside. */
fill: black;
fill-opacity: 0.0;
/* fill: black;
fill-opacity: 0.0; */
fill-opacity: 0.8;
stroke-width: 1;
}
.languageText {
.translationText, .romanizationText {
visibility: hidden;
font-family: "Noto Sans";
font-size:1.25em;
pointer-events: none;
}
.language:hover ~ .languageText {
visibility: visible;
}
.testClass:hover {
fill: red;
cursor: default;
font-size:1.2em;
}
.romanizationText {
pointer-events: none;
}
.languageText {
pointer-events: none;
font-weight: bold;
font-size: 1em;
font-family: sans-serif;
visibility: hidden;
}
.language:hover ~ .languageText {
visibility: visible;
}
.district {
stroke: white;
stroke-width: 0.25;
transition: fill 0.3s;
fill: none;
pointer-events: none;
}
.language:hover {
stroke-width: 2;
}
@@ -71,24 +97,125 @@
.loading-indicator {
display: none;
}
.htmx-request.loading-indicator /* While request is being made */ {
.loading, .htmx-request.loading-indicator /* While request is being made */ {
display: inline;
}
/* Dim all other states */
/* Kinda wild that you can do this in plain CSS */
#indiaMap:has(.language:hover) .language:not(:hover) {
fill-opacity: 0.5;
transition: fill-opacity 0.3s;
}
#indiaMap .language:hover {
fill-opacity: 1;
transition: fill-opacity 0.3s;
}
/* Position map load spinner in the middle of the SVG */
#svgContainer {
position: relative;
display: inline-block;
flex-grow: 1;
max-width: 100%;
}
#svgContainer .mapLoadSpinner {
position: absolute;
top: 40%;
left: 40%;
}
/* Credit to https://lukehaas.me/projects/css-loaders/ */
.mapLoadSpinner,
.mapLoadSpinner:after {
border-radius: 50%;
width: 10em;
height: 10em;
transition: opacity 1s, visibility 2s;
}
.mapLoadSpinner.hide {
opacity: 0;
visibility: hidden;
}
.mapLoadSpinner {
margin: 60px auto;
font-size: 10px;
position: relative;
text-indent: -9999em;
border-top: 1.1em solid rgba(255,158,83, 0.2);
border-right: 1.1em solid rgba(255,158,83, 0.2);
border-bottom: 1.1em solid rgba(255,158,83, 0.2);
border-left: 1.1em solid #ff9e53;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear;
}
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
#fetchingText {
visibility: hidden;
}
@media only screen and (max-width: 768px) {
main {
flex-direction: column;
align-items: stretch;
}
.district {
stroke-width: 0.1;
}
.translationText, .romanizationText {
font-size: 0.7em;
}
.languageText {
font-size: 0.6em;
}
}
</style>
</head>
<body>
<main>
<section id="textStuff">
<h1>Indian Translate</h1>
<form hx-get="/submit" hx-swap=none hx-indicator="#loading-screen" hx-on::after-request="updateTranslations(event.detail.xhr.response)" class="translateForm" method="get">
<form hx-get="/submit" hx-swap=none hx-indicator="#loading-screen" hx-on::before-request="hideTranslationsAndShowText(event.detail.xhr.response)" hx-on::after-request="updateTranslations(event.detail.xhr.response)" class="translateForm" method="get">
<label for="query">Enter text to translate:</label>
<input type="text" name="query" id="query" required/>
<input type="submit" value="Translate"/>
</form>
<progress id="loading-screen" class="loading-indicator"></progress>
<h3 id="fetchingText">Fetching translations...</h3>
<noscript>
<h2>This website requires javascript to work.</h2>
</noscript>
</section>
<svg id = "indiaMap" width="1000" height="1000"></svg>
<section id="svgContainer">
<div id="mapLoadSpinner" class="mapLoadSpinner"></div>
</section>
</main>
<script type="text/javascript" src="index.js"></script>

346
index.js
View File

@@ -1,57 +1,28 @@
const svg = d3.select("svg")
const tamilColor = "#75d795" // Tamil
const malayalamColor = "#ff7c7c" // Malayalam
const kannadaColor = "#ffe77c" // Kannada
const teluguColor = "#7c9dff" // Telugu
const marathiColor = "#e0ff7c" // Marathi
const konkaniColor = "#9b7cff" // Konkani
const hindiColor = "#d17cff" // Hindi
const gujaratiColor = "#7cffee" // Gujarati
const marwariColor = "#7bc4c9" // Marwari
const oriyaColor = "#9bcc9f" // Oriya
const bengaliColor = "#bf9a77" // Bengali
const punjabiColor = "#e84a35" // Punjabi
const mizoColor = "#a6a4de" // Mizo
const assameseColor = "#c9535b" // Assamese
const bhojpuriColor = "#b3b876" // Bhojpuri
const manipuriColor = "#c9afad" // Manipuri
const dogriColor = "#9595e6" // Dogri (near Kashmir)
const nepaliColor = "#71998e" // Nepali
const urduColor = "#3fa179" // Urdu
const tuluColor = "#dedc52" // Tulu
const maithaliColor = "#4472a6" // Maithali
const santaliColor = "#96bf60" // Santhali
const sindhiColor = "#e89931" // Sindhi
const awadhiColor = "#847fb5" // Awadhi
const defaultColor = "#555555"
const languages = {
tamil: {name: "Tamil", color: tamilColor, code: "ta", districts: []},
malayalam: {name: "Malayalam", color: malayalamColor, code: "ml", districts: []},
kannada: {name: "Kannada", color: kannadaColor, code: "kn", districts: []},
telugu: {name: "Telugu", color: teluguColor, code: "te", districts: []},
marathi: {name: "Marathi", color: marathiColor, code: "mr", districts: []},
konkani: {name: "Konkani", color: konkaniColor, code: "gom", districts: []},
hindi: {name: "Hindi", color: hindiColor, code: "hi", districts: []},
gujarati: {name: "Gujarati", color: gujaratiColor, code: "gu", districts: []},
marwari: {name: "Marwari", color: marwariColor, code: "mwr", districts: []},
oriya: {name: "Oriya", color: oriyaColor, code: "or", districts: []},
bengali: {name: "Bengali", color: bengaliColor, code: "bn", districts: []},
punjabi: {name: "Punjabi", color: punjabiColor, code: "pa", districts: []},
mizo: {name: "Mizo", color: mizoColor, code: "lus", districts: []},
assamese: {name: "Assamese", color: assameseColor, code: "as", districts: []},
bhojpuri: {name: "Bhojpuri", color: bhojpuriColor, code: "bho", districts: []},
manipuri: {name: "Manipuri", color: manipuriColor, code: "mni-Mtei", districts: []},
dogri: {name: "Dogri", color: dogriColor, code: "doi", districts: []},
nepali: {name: "Nepali", color: nepaliColor, code: "ne", districts: []},
urdu: {name: "Urdu", color: urduColor, code: "ur", districts: []},
tulu: {name: "Tulu", color: tuluColor, code: "tcy", districts: []},
maithali: {name: "Maithali", color: maithaliColor, code: "mai", districts: []},
santali: {name: "Santali", color: santaliColor, code: "sat", districts: []},
sindhi: {name: "Sindhi", color: sindhiColor, code: "sd", districts: []},
awadhi: {name: "Awadhi", color: awadhiColor, code: "awa", districts: []},
tamil: {name: "Tamil", color: "#75d795", code: "ta", districts: []},
malayalam: {name: "Malayalam", color: "#ff7c7c", code: "ml", districts: []},
kannada: {name: "Kannada", color: "#ffe77c", code: "kn", districts: []},
telugu: {name: "Telugu", color: "#7c9dff", code: "te", districts: []},
marathi: {name: "Marathi", color: "#e0ff7c", code: "mr", districts: []},
konkani: {name: "Konkani", color: "#9b7cff", code: "gom", districts: []},
hindi: {name: "Hindi", color: "#d17cff", code: "hi", districts: []},
gujarati: {name: "Gujarati", color: "#7cffee", code: "gu", districts: []},
oriya: {name: "Oriya", color: "#9bcc9f", code: "or", districts: []},
bengali: {name: "Bengali", color: "#bf9a77", code: "bn", districts: []},
punjabi: {name: "Punjabi", color: "#e84a35", code: "pa", districts: []},
mizo: {name: "Mizo", color: "#a6a4de", code: "lus", districts: []},
assamese: {name: "Assamese", color: "#c9535b", code: "as", districts: []},
bhojpuri: {name: "Bhojpuri", color: "#b3b876", code: "bho", districts: []},
manipuri: {name: "Manipuri", color: "#c9afad", code: "mni-Mtei", districts: []},
dogri: {name: "Dogri", color: "#9595e6", code: "doi", districts: []},
nepali: {name: "Nepali", color: "#71998e", code: "ne", districts: []},
urdu: {name: "Urdu", color: "#3fa179", code: "ur", districts: []},
tulu: {name: "Tulu", color: "#dedc52", code: "tcy", districts: []},
maithali: {name: "Maithali", color: "#4472a6", code: "mai", districts: []},
sindhi: {name: "Sindhi", color: "#e89931", code: "sd", districts: []},
awadhi: {name: "Awadhi", color: "#847fb5", code: "awa", districts: []},
};
// Credit: https://www.artcraftblend.com/blogs/colors/shades-of-pastel
@@ -65,7 +36,7 @@ const state2lang = {
"Goa": languages["konkani"],
"Odisha": languages["oriya"],
"Gujarat": languages["gujarati"],
"Rajasthan": languages["marwari"],
"Rajasthan": languages["hindi"],
"Chhattisgarh": languages["hindi"],
"Jharkhand": languages["hindi"], // DEFAULT
"West Bengal": languages["bengali"],
@@ -89,7 +60,6 @@ const state2lang = {
"Lakshadweep": languages["malayalam"],
"Delhi": languages["hindi"],
"Chandigarh": languages["hindi"]
}
const district2lang = { // Should override state colors
@@ -144,17 +114,6 @@ const district2lang = { // Should override state colors
"Kutch": languages["sindhi"],
"Godda": languages["santali"],
"Deoghar": languages["santali"],
"Dumka": languages["santali"],
"Jamtara": languages["santali"],
"Sahibganj": languages["santali"],
"Pakur": languages["santali"],
"East Singhbhum": languages["santali"],
"Jhargram": languages["santali"],
"Bankura": languages["santali"],
"Purulia": languages["santali"],
"Kanpur": languages["awadhi"],
"Lakhimpur Kheri": languages["awadhi"],
"Sitapur": languages["awadhi"],
@@ -166,11 +125,36 @@ const district2lang = { // Should override state colors
"Rae Bareli": languages["awadhi"],
"Amethi": languages["awadhi"],
"Bahraich": languages["awadhi"],
}
function responsivefy(svg) {
// get container + svg aspect ratio
var container = d3.select(svg.node().parentNode),
width = parseInt(svg.style("width")),
height = parseInt(svg.style("height")),
aspect = width / height;
// add viewBox and preserveAspectRatio properties,
// and call resize so that svg resizes on inital page load
svg.attr("viewBox", "0 0 " + width + " " + height)
.attr("perserveAspectRatio", "xMinYMid")
.call(resize);
// to register multiple listeners for same event type,
// you need to add namespace, i.e., 'click.foo'
// necessary if you call invoke this function for multiple svgs
// api docs: https://github.com/mbostock/d3/wiki/Selections#on
d3.select(window).on("resize." + container.attr("id"), resize);
// get width of container and resize svg to fit it
function resize() {
var targetWidth = Math.floor(container.node().getBoundingClientRect().width);
svg.attr("width", targetWidth);
svg.attr("height", Math.round(targetWidth / aspect));
}
}
// Functions for calculating and dealing with language boundaries
function reverseCoordArrays(coords) {
if (!Array.isArray(coords)) {
return coords;
@@ -186,7 +170,6 @@ function reverseCoordArrays(coords) {
return coords.map(reverseCoordArrays);
}
function getOuterBoundaryPolygon(features) {
// Check if we have features to process
if (!features || features.length === 0) {
@@ -205,6 +188,15 @@ function getOuterBoundaryPolygon(features) {
return combined;
}
function district2langFunc(d) {
if (district2lang.hasOwnProperty(d.properties.district)) {
return district2lang[d.properties.district];
} else if (state2lang.hasOwnProperty(d.properties.st_nm)) {
return state2lang[d.properties.st_nm];
} else {
return undefined;
}
}
function stateOrDistrictOrLanguage(d) {
if (typeof d.properties.district !== 'undefined') {
@@ -218,12 +210,40 @@ function stateOrDistrictOrLanguage(d) {
}
}
// const mapWidth = document.getElementById("indiaMap").getAttribute("width")
// const mapHeight = document.getElementById("indiaMap").getAttribute("height")
const mapWidth = /*window.innerWidth - */document.querySelector("#svgContainer").offsetWidth * 0.85;
// const mapHeight = document.querySelector("#svgContainer").offsetHeight;
const mapHeight = (window.innerHeight - document.querySelector("#svgContainer").getBoundingClientRect().top);
const svg = d3.select("#svgContainer")
.append('svg')
.attr('width', mapWidth.toString())
.attr('height', (mapHeight).toString())
// .attr('viewbox', '0 0 ' + mapWidth.toString() + ' ' + mapHeight.toString())
// .attr('preserveAspectRatio', "xMidYMin")
.attr('id', 'indiaMap')
// .call(responsivefy);
function drawMap(world) {
const mapWidth = document.getElementById("indiaMap").getAttribute("width")
const mapHeight = document.getElementById("indiaMap").getAttribute("height")
const projection = d3.geoMercator().fitSize([mapWidth, mapHeight], world)
const path = d3.geoPath().projection(projection);
requestAnimationFrame(() => {
const newSvg = d3.select('svg');
const bbox = newSvg.node().getBBox();
const originalWidth = +newSvg.attr("width");
newSvg
// .attr("viewBox", `${bbox.x} ${bbox.y} ${bbox.width} ${bbox.height}`)
.attr("viewBox", `0 ${bbox.y} ${originalWidth} ${bbox.height}`)
// .attr("width", mapHeight.toString())
.attr("height", bbox.height)
// .style("display", "block")
// .style("max-height", "100%"); // optional: keep it scalable in a flexbox
});
const states = svg.selectAll("g")
.data(world.features)
.enter()
@@ -233,39 +253,75 @@ function drawMap(world) {
.attr("d", path)
.attr("class", d => stateOrDistrictOrLanguage(d))
.attr("fill", function(d) {
if (stateOrDistrictOrLanguage(d) === "district") {
if (district2lang.hasOwnProperty(d.properties.district)) {
return district2lang[d.properties.district].color;
} else if (state2lang.hasOwnProperty(d.properties.st_nm)) {
return state2lang[d.properties.st_nm].color;
} else {
return defaultColor;
}
}
if (stateOrDistrictOrLanguage(d) === "language") {
return languages[d.properties.lang_name.toLowerCase()].color;
}
})
.each(function(d) {
if (stateOrDistrictOrLanguage(d) === "district") {
let districtLang;
if (district2lang.hasOwnProperty(d.properties.district)) {
districtLang = district2lang[d.properties.district];
} else if (state2lang.hasOwnProperty(d.properties.st_nm)) {
districtLang = state2lang[d.properties.st_nm];
}
const districtLang = district2langFunc(d);
if (typeof districtLang !== 'undefined') {
districtLang.districts.push(d)
}
}
// Hide map load spinner after map has loaded
document.getElementById("mapLoadSpinner").classList.add("hide");
document.querySelector("svg").classList.add("show");
})
.append("title") // Tooltip
.text(d => d.properties.district);
states.append("text")
.attr("x", d => projection(d3.geoCentroid(d))[0])
.attr("y", d => projection(d3.geoCentroid(d))[1])
.attr("x", function(d) {
if (stateOrDistrictOrLanguage(d) == "language") {
rtv = projection(d3.geoCentroid(d))[0];
if (d.properties.lang_name == "Kannada") {
rtv -= 20;
}
if (d.properties.lang_name == "Tamil") {
rtv += 20;
}
if (d.properties.lang_name == "Maithali") {
rtv += 10;
}
if (d.properties.lang_name == "Konkani") {
rtv -= 15;
}
if (d.properties.lang_name == "Bengali") {
rtv -= 15;
}
return rtv
}
})
.attr("y", function(d) {
if (stateOrDistrictOrLanguage(d) == "language") {
rtv = projection(d3.geoCentroid(d))[1]
if (d.properties.lang_name == "Kannada") {
rtv += 15;
}
if (d.properties.lang_name == "Tamil") {
rtv -= 20;
}
if (d.properties.lang_name == "Gujarati") {
rtv -= 10;
}
if (d.properties.lang_name == "Mizo") {
rtv += 20;
}
if (d.properties.lang_name == "Nepali") {
rtv -= 10;
}
if (d.properties.lang_name == "Bengali") {
rtv += 25;
}
return rtv
}
})
.attr("text-anchor", "middle")
.attr("font-size", "12px")
.attr("fill", "black")
.attr("class", "languageText")
.attr("class", "translationText")
.attr("id", function(d) {
if (stateOrDistrictOrLanguage(d) == "language") {
return d.properties.lang_code+"Text"
@@ -277,45 +333,99 @@ function drawMap(world) {
if (stateOrDistrictOrLanguage(d) == "language") {
return d.properties.lang_name;
} else {
d3.select(this).remove() // Only add text if the element is a language
d3.select(this).remove() // Only add this attribute if the element is a language
}
});
// Romanization
states.append("text")
.attr("x", d => stateOrDistrictOrLanguage(d) == "language" ?
document.getElementById(d.properties.lang_code + "Text").getAttribute("x") :
projection(d3.geoCentroid(d))[0])
.attr("y", d => stateOrDistrictOrLanguage(d) == "language" ?
parseFloat(document.getElementById(d.properties.lang_code + "Text").getAttribute("y")) + parseFloat(getComputedStyle(document.getElementsByClassName('translationText')[0]).getPropertyValue('font-size')) :
projection(d3.geoCentroid(d))[1])
.attr("text-anchor", "middle")
.attr("fill", "black")
.attr("class", "romanizationText")
.attr("id", function(d) {
if (stateOrDistrictOrLanguage(d) == "language") {
return d.properties.lang_code+"Romanization"
} else {
d3.select(this).remove()
}
})
.each(function(d) {
if (!stateOrDistrictOrLanguage(d) == "language") {
d3.select(this).remove() // Only add this attribute if the element is a language
}
});
// Language
states.append("text")
.attr("x", d => stateOrDistrictOrLanguage(d) == "language" ?
document.getElementById(d.properties.lang_code + "Text").getAttribute("x") :
projection(d3.geoCentroid(d))[0])
.attr("y", d => stateOrDistrictOrLanguage(d) == "language" ?
parseFloat(document.getElementById(d.properties.lang_code + "Text").getAttribute("y")) - parseFloat(getComputedStyle(document.getElementsByClassName('translationText')[0]).getPropertyValue('font-size')) :
projection(d3.geoCentroid(d))[1])
.attr("text-anchor", "middle")
.attr("fill", "black")
.attr("class", "languageText")
.attr("id", function(d) {
if (stateOrDistrictOrLanguage(d) == "language") {
return d.properties.lang_code+"Language"
} else {
d3.select(this).remove()
}
})
.each(function(d) {
if (!stateOrDistrictOrLanguage(d) == "language") {
d3.select(this).remove() // Only add this attribute if the element is a language
}
})
.text(function(d) {
if (stateOrDistrictOrLanguage(d) == "language") {
return d.properties.lang_name;
} else {
d3.select(this).remove() // Only add this attribute if the element is a language
}
})
let allLangs = []
const coordinates = [77.69916967457782,23.389970772934166];
const [xCoord, yCoord] = projection(coordinates);
svg.append("text")
.attr("x", xCoord)
.attr("y", yCoord)
.attr("class", "testClass")
.attr("text-anchor", "middle")
.attr("font-size", "12px")
.attr("fill", "black")
.text("Hello, Map!");
// for (const [langId,lang] of Object.entries(languages)) {
// let geojson = {
// "type": "FeatureCollection",
// "features": lang.districts
// };
//
// let outerBound = getOuterBoundaryPolygon(geojson.features)
// outerBound["id"] = "lang" + lang.name
// outerBound.properties["lang_name"]= lang.name
// outerBound.properties["lang_code"]= lang.code
// allLangs.push(outerBound);
// svg.append("text")
// .attr("x", xCoord)
// .attr("y", yCoord)
// .attr("class", "testClass")
// .attr("text-anchor", "middle")
// .attr("font-size", "12px")
// .attr("fill", "black")
// .text("Hello, Map!");
// svg.append("path")
// .datum(outerBound)
// .attr("d", path)
// .attr("fill", "none")
// .attr("stroke", "red")
// .attr("stroke-width", 2)
// }
// console.log(JSON.stringify(allLangs))
// for (const [langId,lang] of Object.entries(languages)) {
// let geojson = {
// "type": "FeatureCollection",
// "features": lang.districts
// };
//
// let outerBound = getOuterBoundaryPolygon(geojson.features)
// outerBound["id"] = "lang" + lang.name
// outerBound.properties["lang_name"]= lang.name
// outerBound.properties["lang_code"]= lang.code
// allLangs.push(outerBound);
//
// svg.append("path")
// .datum(outerBound)
// .attr("d", path)
// .attr("fill", "none")
// .attr("stroke", "red")
// .attr("stroke-width", 2)
// }
// console.log(JSON.stringify(allLangs))
}
d3.json("india_with_districts_with_languages.json").then(drawMap)
d3.json("india_with_districts_with_languages_min.json").then(drawMap)

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,6 @@ Mizo
Assamese
Punjabi
Maithili
Santali
Nepali
Konkani
Tulu

3
server.py Normal file
View File

@@ -0,0 +1,3 @@
from ai4bharat.transliteration import xlit_server
app, engine = xlit_server.get_app()
app.run(host='0.0.0.0', port=10000)

View File

@@ -1,6 +1,7 @@
1. REFACTOR
2. Cache results in a database instead of fetching them each time, use cached results if possible
3. Figure out positioning of the different translations, show all of them instead of having to hover
4. Show romanizations
5. General beautification
6. TTS?
2. Show language names as well, along with translation and romanization
3. Figure out positioning of the different translations, show all of them instead of having to hover; for smaller languages it's hard to move the mouse around to see the full word.
4. Better translations (some of them are just terrible with Google Translate - use the ai4bharat one instead?)
4. Cache romanizations in DB
6. General beautification
7. TTS?

BIN
translations.db Normal file

Binary file not shown.

View File

@@ -1,6 +1,37 @@
function hideTranslationsAndShowText(request) {
document.querySelectorAll(".translationText, .romanizationText").forEach(element => element.style.visibility = 'hidden')
document.getElementById("fetchingText").style.visibility = "visible";
}
function updateTranslations(response) {
const translations = JSON.parse(response);
document.querySelectorAll(".languageText").forEach(element => {
document.querySelectorAll(".translationText").forEach(element => {
element.textContent = translations[element.id.replace("Text", "")];
});
// Send result to romanization sever
// Since this is an asynchronous opreation, there is no indication that
// it's being performed in the backend. So I add the 'loading' class to the progress
// bar used to track translations, and then remove it in the 'resolved' handler.
const elem = document.getElementById("loading-screen")
elem.classList.toggle("loading")
document.getElementById("fetchingText").textContent = "Fetching romanizations..."
fetch("/romanize", {
method: "POST",
body: response
}).then((newResponse) => newResponse.json()) // It looks like response.json() returns another promise, since the _body_ of the response may not have loaded yet. Do not confuse this json() with the static Response.json() method. Since it returns another promise, I have to call .then() to actually get the result.
.then((data) => {
elem.classList.toggle("loading");
console.log(data);
const romanizations = data;
document.querySelectorAll(".romanizationText").forEach(element => {
if (element.id.replace("Romanization", "") in romanizations) {
element.textContent = "(" + romanizations[element.id.replace("Romanization", "")] + ")";
}
});
// Show elements again
document.querySelectorAll(".translationText, .romanizationText").forEach(element => element.style.visibility = 'visible')
document.getElementById("fetchingText").textContent = "Fetching translations..." // Restore the original text content
document.getElementById("fetchingText").style.visibility = "hidden"
});
}

287
xlit_server.py Normal file
View File

@@ -0,0 +1,287 @@
"""
Expose Transliteration Engine as an HTTP API.
USAGE:
```
from ai4bharat.transliteration import xlit_server
app, engine = xlit_server.get_app()
app.run(host='0.0.0.0', port=8000)
```
Sample URLs:
http://localhost:8000/tl/ta/amma
http://localhost:8000/languages
FORMAT:
Based on the Varnam API standard
https://api.varnamproject.com/tl/hi/bharat
"""
from flask import Flask, jsonify, request, make_response
from uuid import uuid4
from datetime import datetime
import traceback
import enum
import sqlite3
from .utils import LANG_CODE_TO_DISPLAY_NAME, RTL_LANG_CODES, LANG_CODE_TO_SCRIPT_CODE
class XlitError(enum.Enum):
lang_err = "Unsupported langauge ID requested ;( Please check available languages."
string_err = "String passed is incompatable ;("
internal_err = "Internal crash ;("
unknown_err = "Unknown Failure"
loading_err = "Loading failed ;( Check if metadata/paths are correctly configured."
app = Flask(__name__)
app.config['JSON_AS_ASCII'] = False
## ----------------------------- Xlit Engine -------------------------------- ##
from .xlit_src import XlitEngine
MAX_SUGGESTIONS = 8
DEFAULT_NUM_SUGGESTIONS = 5
ENGINE = {
"en2indic": XlitEngine(beam_width=MAX_SUGGESTIONS, rescore=True, model_type="transformer", src_script_type = "roman"),
"indic2en": XlitEngine(beam_width=MAX_SUGGESTIONS, rescore=True, model_type="transformer", src_script_type = "indic"),
}
EXPOSED_LANGS = [
{
"LangCode": lang_code, # ISO-639 code
"Identifier": lang_code, # ISO-639 code
"DisplayName": LANG_CODE_TO_DISPLAY_NAME[lang_code],
"Author": "AI4Bharat", # Name of developer / team
"CompiledDate": "09-April-2022", # date on which model was trained
"IsStable": True, # Set `False` if the model is experimental
"Direction": "rtl" if lang_code in RTL_LANG_CODES else "ltr",
"ScriptCode": LANG_CODE_TO_SCRIPT_CODE[lang_code],
} for lang_code in sorted(ENGINE["en2indic"].all_supported_langs)
]
def get_app():
return app, ENGINE
## ---------------------------- API End-points ------------------------------ ##
@app.route('/languages', methods = ['GET', 'POST'])
def supported_languages():
# Format - https://xlit-api.ai4bharat.org/languages
response = make_response(jsonify(EXPOSED_LANGS))
if 'xlit_user_id' not in request.cookies:
# host = request.environ['HTTP_ORIGIN'].split('://')[1]
host = '.ai4bharat.org'
response.set_cookie('xlit_user_id', uuid4().hex, max_age=365*24*60*60, domain=host, samesite='None', secure=True, httponly=True)
return response
@app.route('/tl/<lang_code>/<eng_word>', methods = ['GET', 'POST'])
def xlit_api(lang_code, eng_word):
# Format: https://xlit-api.ai4bharat.org/tl/ta/bharat
response = {
'success': False,
'error': '',
'at': str(datetime.utcnow()) + ' +0000 UTC',
'input': eng_word.strip(),
'result': ''
}
transliterate_numerals = request.args.get('transliterate_numerals', default=False, type=lambda v: v.lower() == 'true')
num_suggestions = request.args.get('num_suggestions', default=DEFAULT_NUM_SUGGESTIONS, type=int)
if lang_code not in ENGINE["en2indic"].all_supported_langs:
response['error'] = 'Invalid scheme identifier. Supported languages are: '+ str(ENGINE["en2indic"].all_supported_langs)
return jsonify(response)
try:
## Limit char count to --> 70
xlit_result = ENGINE["en2indic"].translit_word(eng_word[:70], lang_code, topk=num_suggestions, transliterate_numerals=transliterate_numerals)
except Exception as e:
xlit_result = XlitError.internal_err
if isinstance(xlit_result, XlitError):
response['error'] = xlit_result.value
print("XlitError:", traceback.format_exc())
else:
response['result'] = xlit_result
response['success'] = True
return jsonify(response)
@app.route('/rtl/<lang_code>/<word>', methods = ['GET', 'POST'])
def reverse_xlit_api(lang_code, word):
# Format: https://api.varnamproject.com/rtl/hi/%E0%A4%AD%E0%A4%BE%E0%A4%B0%E0%A4%A4
response = {
'success': False,
'error': '',
'at': str(datetime.utcnow()) + ' +0000 UTC',
'input': word.strip(),
'result': ''
}
if lang_code not in ENGINE["indic2en"].all_supported_langs:
response['error'] = 'Invalid scheme identifier. Supported languages are: '+ str(ENGINE["indic2en"].all_supported_langs)
return jsonify(response)
num_suggestions = request.args.get('num_suggestions', default=DEFAULT_NUM_SUGGESTIONS, type=int)
try:
## Limit char count to --> 70
xlit_result = ENGINE["indic2en"].translit_sentence(word, lang_code)
except Exception as e:
xlit_result = XlitError.internal_err
if isinstance(xlit_result, XlitError):
response['error'] = xlit_result.value
print("XlitError:", traceback.format_exc())
else:
response['result'] = xlit_result
response['success'] = True
return jsonify(response)
@app.route('/transliterate', methods=['POST'])
def ulca_api():
'''
ULCA-compliant endpoint. See for sample request-response:
https://github.com/ULCA-IN/ulca/tree/master/specs/examples/model/transliteration-model
'''
data = request.get_json(force=True)
if "input" not in data or "config" not in data:
return jsonify({
"status": {
"statusCode": 400,
"message": "Ensure `input` and `config` fields missing."
}
}), 400
if (data["config"]["language"]["sourceLanguage"] == "en" and data["config"]["language"]["targetLanguage"] in ENGINE["en2indic"].all_supported_langs) or (data["config"]["language"]["sourceLanguage"] in ENGINE["indic2en"].all_supported_langs and data["config"]["language"]["targetLanguage"] == 'en'):
pass
else:
return jsonify({
"status": {
"statusCode": 501,
"message": "The mentioned language-pair is not supported yet."
}
}), 501
is_sentence = data["config"]["isSentence"] if "isSentence" in data["config"] else False
num_suggestions = 1 if is_sentence else (data["config"]["numSuggestions"] if "numSuggestions" in data["config"] else 5)
if data["config"]["language"]["targetLanguage"] == "en":
engine = ENGINE["indic2en"]
lang_code = data["config"]["language"]["sourceLanguage"]
else:
engine = ENGINE["en2indic"]
lang_code = data["config"]["language"]["targetLanguage"]
outputs = []
for item in data["input"]:
if is_sentence:
item["target"] = [engine.translit_sentence(item["source"], lang_code=lang_code)]
else:
item["source"] = item["source"][:32]
item["target"] = engine.translit_word(item["source"], lang_code=lang_code, topk=num_suggestions)
return {
"output": data["input"],
# "status": {
# "statusCode": 200,
# "message" : "success"
# }
}, 200
@app.route('/romanize', methods=['POST'])
def romanizeHandler():
langCodeLookup = {
"hi": "hi",
"bn": "bn",
"mr": "mr",
"ta": "ta",
"te": "te",
"kn": "kn",
"ml": "ml",
"or": "or",
"gu": "gu",
"ur": "ur",
"as": "as",
"pa": "pa",
"mai": "mai",
"ne": "ne",
"gom": "gom",
"tcy": "kn", # Tulu uses Kannada script
"bho": "hi", # Bhojpuri uses Hindi script
"doi": "hi", # Dogri uses Hindi script
"mni-Mtei": "mni",
"sd": "sd",
"awa": "hi", # Awadhi uses Hindi script
}
lang2code = {
"hindi": "hi",
"bengali": "bn",
"marathi": "mr",
"tamil": "ta",
"telugu": "te",
"malayalam": "ml",
"kannada": "kn",
"oriya": "or",
"gujarati": "gu",
"urdu": "ur",
"assamese": "as",
"punjabi": "pa",
"maithili": "mai",
"nepali": "ne",
"konkani": "gom",
"tulu": "tcy",
"bhojpuri": "bho",
"dogri": "doi",
"manipuri": "mni-Mtei",
"sindhi": "sd",
"awadhi": "awa",
"english": "en",
}
code2lang = {v:k for k,v in lang2code.items()}
rtv = dict()
data = request.get_json(force=True)
# Check if database contains the romanizations already
englishWord = data['en']
rtv["en"] = englishWord
print(englishWord)
con = sqlite3.connect("../translations.db")
cur = con.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS romanizations AS SELECT * FROM translations WHERE 0") # Copy schema from 'translations' table
cur.execute('SELECT * FROM romanizations WHERE english = ?', (englishWord,))
romanizations = cur.fetchall()
columnNames = [column[0] for column in cur.description]
romanizationsDict = []
if len(romanizations) > 0:
for row in romanizations:
row_dict = {lang2code[columnNames[i]]: row[i] for i in range(len(langCodeLookup)+1)} # The '+1' is because of English, which isn't in langCodeLookup
romanizationsDict.append(row_dict)
json_data = jsonify(romanizationsDict[0])
con.close()
return json_data
# if len(romanizations) != 0:
# Assuming the romanizations didn't exist before
for key in data:
if key in langCodeLookup:
langCode = langCodeLookup[key]
text = data[key]
response = reverse_xlit_api(langCode, text)
responseJson = response.get_json()
rtv[key] = responseJson['result']
rtvJson = jsonify(rtv)
rtv["en"] = englishWord
cur.execute("INSERT INTO romanizations " + str(tuple([code2lang[val] for val in rtv.keys()])) + " VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", tuple(rtv.values()))
con.commit()
con.close()
return rtvJson