Compare commits
62 Commits
a39fcc69a4
...
resizeSvg
| Author | SHA1 | Date | |
|---|---|---|---|
| 2949c48ed9 | |||
| 6aedbbc942 | |||
| 977cd1aa62 | |||
| 81fe778212 | |||
| bf8825294b | |||
| 6e9e11c41c | |||
| 66d88c133b | |||
| f62697d03a | |||
| ad254d2760 | |||
| 49b4820c2d | |||
| 3d0c20ae48 | |||
| 3d06971e77 | |||
| 50a476edad | |||
| 2814f80fc8 | |||
| 74f3cb9740 | |||
| 2f7cdd2ea7 | |||
| 1fbe5a1abf | |||
| c15bd02c17 | |||
| bf453f9934 | |||
| a4a2451edd | |||
| 56cae83339 | |||
| 53cd9b9465 | |||
| fe2aaef413 | |||
| f26a5b8c59 | |||
| 2d810bb661 | |||
| cf5b2e40a0 | |||
| 0c11a302e3 | |||
| 9cc650a467 | |||
| 0e26b2345a | |||
| 38316b0ea9 | |||
| b2055b11d9 | |||
| f0395763e5 | |||
| 776e45686e | |||
| ab4acd189d | |||
| 93ff0d172e | |||
| f495d939cb | |||
| d8c2b22a46 | |||
| f65ea1dd8f | |||
| da993bf1aa | |||
| 7110b0e0e3 | |||
| d1feca7003 | |||
| 3aa4135a81 | |||
| 72747015ed | |||
| d006ee8887 | |||
| 6cc8069c23 | |||
| 0add8244e6 | |||
| 02c47cc218 | |||
| f20c17337b | |||
| 4fece83e43 | |||
| f861b73846 | |||
| 0f92b11009 | |||
| 6899c2ce86 | |||
| 5cb766c2b8 | |||
| c5ff92f941 | |||
| 4671ba40e7 | |||
| dfdd9326be | |||
| 64f4f5f80b | |||
| ca7f6ad212 | |||
| 7112874a06 | |||
| a38657ddf1 | |||
| 6ba1189c6c | |||
| d8bdb048c4 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
ai4bharat-transliteration
|
||||
@@ -15,6 +15,8 @@ require (
|
||||
github.com/google/s2a-go v0.1.8 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.14.0 // indirect
|
||||
github.com/jmoiron/sqlx v1.4.0 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.24 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
|
||||
go.opentelemetry.io/otel v1.29.0 // indirect
|
||||
|
||||
@@ -10,6 +10,7 @@ cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0
|
||||
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
|
||||
cloud.google.com/go/translate v1.12.3 h1:XJ7LipYJi80BCgVk2lx1fwc7DIYM6oV2qx1G4IAGQ5w=
|
||||
cloud.google.com/go/translate v1.12.3/go.mod h1:qINOVpgmgBnY4YTFHdfVO4nLrSBlpvlIyosqpGEgyEg=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
@@ -17,12 +18,19 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
|
||||
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
|
||||
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
|
||||
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
|
||||
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
|
||||
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
|
||||
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
||||
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
|
||||
|
||||
@@ -2,10 +2,17 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
|
||||
translate "cloud.google.com/go/translate/apiv3"
|
||||
"cloud.google.com/go/translate/apiv3/translatepb"
|
||||
@@ -13,23 +20,47 @@ import (
|
||||
|
||||
const project_id string = "india-translate-testing-452100"
|
||||
|
||||
type translationStruct struct {
|
||||
En string `db:"english" json:"en"`
|
||||
Hi string `db:"hindi" json:"hi"`
|
||||
Bn string `db:"bengali" json:"bn"`
|
||||
Mr string `db:"marathi" json:"mr"`
|
||||
Ta string `db:"tamil" json:"ta"`
|
||||
Te string `db:"telugu" json:"te"`
|
||||
Kn string `db:"kannada" json:"kn"`
|
||||
Ml string `db:"malayalam" json:"ml"`
|
||||
Or string `db:"oriya" json:"or"`
|
||||
Gu string `db:"gujarati" json:"gu"`
|
||||
Ur string `db:"urdu" json:"ur"`
|
||||
Lus string `db:"mizo" json:"lus"`
|
||||
As string `db:"assamese" json:"as"`
|
||||
Pa string `db:"punjabi" json:"pa"`
|
||||
Mai string `db:"maithili" json:"mai"`
|
||||
Ne string `db:"nepali" json:"ne"`
|
||||
Gom string `db:"konkani" json:"gom"`
|
||||
Tcy string `db:"tulu" json:"tcy"`
|
||||
Bho string `db:"bhojpuri" json:"bho"`
|
||||
Doi string `db:"dogri" json:"doi"`
|
||||
Mni_mtei string `db:"manipuri" json:"mni-Mtei"`
|
||||
Sd string `db:"sindhi" json:"sd"`
|
||||
Awa string `db:"awadhi" json:"awa"`
|
||||
}
|
||||
|
||||
var lang_codes []string = []string{
|
||||
"hi", // Hindi
|
||||
"bn", // Bengali
|
||||
"mr", // Marathi
|
||||
"ta", // Tamil
|
||||
"te", // Telugu
|
||||
"ml", // Malayalam
|
||||
"kn", // Kannada
|
||||
"gu", // Gujarati
|
||||
"ml", // Malayalam
|
||||
"or", // Oriya
|
||||
"gu", // Gujarati
|
||||
"ur", // Urdu
|
||||
"lus", // Mizo
|
||||
"as", // Assamese
|
||||
"pa", // Punjabi
|
||||
"mai", // Maithili
|
||||
"mwr", // Marwari
|
||||
"sat", // Santali
|
||||
"ne", // Nepali
|
||||
"gom", // Konkani
|
||||
"tcy", // Tulu
|
||||
@@ -40,6 +71,45 @@ var lang_codes []string = []string{
|
||||
"awa", // Awadhi
|
||||
}
|
||||
|
||||
var db *sqlx.DB
|
||||
|
||||
func cleanup() {
|
||||
db.Close()
|
||||
log.Printf("Shutting down...\n")
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Returns the cached translation from the database, for the given english text. The first parameter
|
||||
|
||||
indicates whether or not the translation exists.
|
||||
*/
|
||||
func getCachedTranslation(data string) (bool, translationStruct) {
|
||||
prepared, err := db.Preparex("SELECT * from TRANSLATIONS WHERE english = ? COLLATE NOCASE")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
translations := translationStruct{}
|
||||
err = prepared.Get(&translations, data)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return false, translations
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
} else {
|
||||
return true, translations
|
||||
}
|
||||
}
|
||||
|
||||
func addToDatabase(translation translationStruct) {
|
||||
_, err := db.NamedExec(`INSERT INTO translations VALUES (:english, :hindi, :bengali, :marathi, :tamil, :telugu, :kannada, :malayalam, :oriya, :gujarati, :urdu, :mizo, :assamese, :punjabi, :maithili, :nepali, :konkani, :tulu, :bhojpuri, :dogri, :manipuri, :sindhi, :awadhi)`, &translation)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func translateText(text string, targetLang string) (result string, err error) {
|
||||
return translateTextHelper(project_id, "en-US", targetLang, text)
|
||||
}
|
||||
@@ -80,6 +150,10 @@ func handler(w http.ResponseWriter, r *http.Request) {
|
||||
queries := r.URL.Query()
|
||||
toTranslate := queries["query"][0]
|
||||
|
||||
if ok, translation := getCachedTranslation(toTranslate); ok {
|
||||
translationJson, _ := json.Marshal(translation)
|
||||
fmt.Fprintf(w, "%v", string(translationJson))
|
||||
} else {
|
||||
langToTranslation := make(map[string]string)
|
||||
for _, lang_code := range lang_codes {
|
||||
translation, err := translateText(toTranslate, lang_code)
|
||||
@@ -88,14 +162,42 @@ func handler(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
langToTranslation[lang_code] = translation
|
||||
}
|
||||
|
||||
langToTranslation["en"] = toTranslate
|
||||
langToTranslationJson, _ := json.Marshal(langToTranslation)
|
||||
translation := translationStruct{}
|
||||
err := json.Unmarshal(langToTranslationJson, &translation)
|
||||
addToDatabase(translation)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Fprintf(w, "%v", string(langToTranslationJson))
|
||||
|
||||
// fmt.Fprintf(w, "Hi there, I love %s!", r.URL.Path[1:])
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.Printf("Starting server...")
|
||||
var err error
|
||||
db, err = sqlx.Connect("sqlite3", "../translations.db")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer db.Close()
|
||||
// Catch signal
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs)
|
||||
go func() {
|
||||
for sig := range sigs {
|
||||
log.Printf("Received signal: %s", sig)
|
||||
switch sig {
|
||||
case syscall.SIGURG:
|
||||
log.Printf("Ignoring sigurg")
|
||||
case syscall.SIGTERM, syscall.SIGINT:
|
||||
cleanup()
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
http.HandleFunc("/", handler)
|
||||
log.Fatal(http.ListenAndServe(":9090", nil))
|
||||
}
|
||||
|
||||
7
how_to_update_language_boundaries.txt
Normal file
7
how_to_update_language_boundaries.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
(Very jank)
|
||||
1. Make changes in the code eg. changing the language of a state or district.
|
||||
2. Uncomment the snippet of code at the bottom of index.js. This is responsible for printing out the language boundaries as JSON.
|
||||
3. Get the JSON from the browser console, put it in a file, and un-minifiy it.
|
||||
4. Copy the resulting JSON's text into 'india_with_districts_with_languages.json'. Put it at the start of the file, replacing the old language boundaries.
|
||||
5. Fix any remaining errors that pop up (should be nothing major).
|
||||
a. If Tulu and Sindhi are not loading, it's because, for some reason, the JSON definition for these languages includes the district that they're spoken in (because they're only spoken in 1 district). So the code assumes that it's a _district_, rather than a _language_ definition.
|
||||
1
htmx.min.js
vendored
Normal file
1
htmx.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
157
index.html
157
index.html
@@ -4,13 +4,13 @@
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap" rel="stylesheet">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap" rel="stylesheet">
|
||||
|
||||
<title>Indian Translate</title>
|
||||
<script src="https://d3js.org/d3.v7.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@turf/turf@7/turf.min.js" charset="utf-8"></script>
|
||||
<script src="https://unpkg.com/htmx.org@2.0.4" integrity="sha384-HGfztofotfshcF7+8n44JQL2oJmowVChPTg48S+jvZoztPfvwD79OC/LTtG6dMp+" crossorigin="anonymous"></script>
|
||||
<script src="./htmx.min.js"></script>
|
||||
<style>
|
||||
body {
|
||||
background-color: #f4f4f4;
|
||||
@@ -19,44 +19,70 @@
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
justify-content: space-evenly;
|
||||
gap: 2em 2em;
|
||||
}
|
||||
|
||||
svg {
|
||||
border: 1px solid;
|
||||
padding: 2em;
|
||||
visibility: hidden;
|
||||
opacity: 0;
|
||||
transition: opacity 1s, visibility 2s;
|
||||
}
|
||||
|
||||
svg.show {
|
||||
visibility: visible;
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
h1 {
|
||||
margin-block: 0.67em;
|
||||
font-size: 2em;
|
||||
}
|
||||
|
||||
.state {
|
||||
stroke: black;
|
||||
fill: none;
|
||||
stroke-width: 0.5;
|
||||
pointer-events: none;
|
||||
}
|
||||
.language {
|
||||
stroke: red;
|
||||
/* There has to be a fill, even if it's transparent, to allow
|
||||
hover events to be recognized on the inside. */
|
||||
fill: black;
|
||||
fill-opacity: 0.0;
|
||||
/* fill: black;
|
||||
fill-opacity: 0.0; */
|
||||
fill-opacity: 0.8;
|
||||
stroke-width: 1;
|
||||
}
|
||||
.languageText {
|
||||
.translationText, .romanizationText {
|
||||
visibility: hidden;
|
||||
font-family: "Noto Sans";
|
||||
font-size:1.25em;
|
||||
font-size:1.2em;
|
||||
}
|
||||
|
||||
.romanizationText {
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.languageText {
|
||||
pointer-events: none;
|
||||
font-weight: bold;
|
||||
font-size: 1em;
|
||||
font-family: sans-serif;
|
||||
visibility: hidden;
|
||||
}
|
||||
.language:hover ~ .languageText {
|
||||
visibility: visible;
|
||||
}
|
||||
.testClass:hover {
|
||||
fill: red;
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
.district {
|
||||
stroke: white;
|
||||
stroke-width: 0.25;
|
||||
transition: fill 0.3s;
|
||||
fill: none;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.language:hover {
|
||||
stroke-width: 2;
|
||||
}
|
||||
@@ -71,24 +97,125 @@
|
||||
.loading-indicator {
|
||||
display: none;
|
||||
}
|
||||
.htmx-request.loading-indicator /* While request is being made */ {
|
||||
.loading, .htmx-request.loading-indicator /* While request is being made */ {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
/* Dim all other states */
|
||||
/* Kinda wild that you can do this in plain CSS */
|
||||
#indiaMap:has(.language:hover) .language:not(:hover) {
|
||||
fill-opacity: 0.5;
|
||||
transition: fill-opacity 0.3s;
|
||||
}
|
||||
|
||||
#indiaMap .language:hover {
|
||||
fill-opacity: 1;
|
||||
transition: fill-opacity 0.3s;
|
||||
}
|
||||
|
||||
/* Position map load spinner in the middle of the SVG */
|
||||
#svgContainer {
|
||||
position: relative;
|
||||
display: inline-block;
|
||||
flex-grow: 1;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
#svgContainer .mapLoadSpinner {
|
||||
position: absolute;
|
||||
top: 40%;
|
||||
left: 40%;
|
||||
}
|
||||
|
||||
/* Credit to https://lukehaas.me/projects/css-loaders/ */
|
||||
.mapLoadSpinner,
|
||||
.mapLoadSpinner:after {
|
||||
border-radius: 50%;
|
||||
width: 10em;
|
||||
height: 10em;
|
||||
transition: opacity 1s, visibility 2s;
|
||||
}
|
||||
.mapLoadSpinner.hide {
|
||||
opacity: 0;
|
||||
visibility: hidden;
|
||||
}
|
||||
.mapLoadSpinner {
|
||||
margin: 60px auto;
|
||||
font-size: 10px;
|
||||
position: relative;
|
||||
text-indent: -9999em;
|
||||
border-top: 1.1em solid rgba(255,158,83, 0.2);
|
||||
border-right: 1.1em solid rgba(255,158,83, 0.2);
|
||||
border-bottom: 1.1em solid rgba(255,158,83, 0.2);
|
||||
border-left: 1.1em solid #ff9e53;
|
||||
-webkit-transform: translateZ(0);
|
||||
-ms-transform: translateZ(0);
|
||||
transform: translateZ(0);
|
||||
-webkit-animation: load8 1.1s infinite linear;
|
||||
animation: load8 1.1s infinite linear;
|
||||
}
|
||||
@-webkit-keyframes load8 {
|
||||
0% {
|
||||
-webkit-transform: rotate(0deg);
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
100% {
|
||||
-webkit-transform: rotate(360deg);
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
@keyframes load8 {
|
||||
0% {
|
||||
-webkit-transform: rotate(0deg);
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
100% {
|
||||
-webkit-transform: rotate(360deg);
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
#fetchingText {
|
||||
visibility: hidden;
|
||||
}
|
||||
@media only screen and (max-width: 768px) {
|
||||
main {
|
||||
flex-direction: column;
|
||||
align-items: stretch;
|
||||
}
|
||||
.district {
|
||||
stroke-width: 0.1;
|
||||
}
|
||||
.translationText, .romanizationText {
|
||||
font-size: 0.7em;
|
||||
}
|
||||
.languageText {
|
||||
font-size: 0.6em;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<section id="textStuff">
|
||||
<h1>Indian Translate</h1>
|
||||
<form hx-get="/submit" hx-swap=none hx-indicator="#loading-screen" hx-on::after-request="updateTranslations(event.detail.xhr.response)" class="translateForm" method="get">
|
||||
<form hx-get="/submit" hx-swap=none hx-indicator="#loading-screen" hx-on::before-request="hideTranslationsAndShowText(event.detail.xhr.response)" hx-on::after-request="updateTranslations(event.detail.xhr.response)" class="translateForm" method="get">
|
||||
<label for="query">Enter text to translate:</label>
|
||||
<input type="text" name="query" id="query" required/>
|
||||
<input type="submit" value="Translate"/>
|
||||
</form>
|
||||
<progress id="loading-screen" class="loading-indicator"></progress>
|
||||
<h3 id="fetchingText">Fetching translations...</h3>
|
||||
<noscript>
|
||||
<h2>This website requires javascript to work.</h2>
|
||||
</noscript>
|
||||
</section>
|
||||
|
||||
<svg id = "indiaMap" width="1000" height="1000"></svg>
|
||||
<section id="svgContainer">
|
||||
<div id="mapLoadSpinner" class="mapLoadSpinner"></div>
|
||||
</section>
|
||||
</main>
|
||||
|
||||
<script type="text/javascript" src="index.js"></script>
|
||||
|
||||
344
index.js
344
index.js
@@ -1,57 +1,28 @@
|
||||
const svg = d3.select("svg")
|
||||
|
||||
const tamilColor = "#75d795" // Tamil
|
||||
const malayalamColor = "#ff7c7c" // Malayalam
|
||||
const kannadaColor = "#ffe77c" // Kannada
|
||||
const teluguColor = "#7c9dff" // Telugu
|
||||
const marathiColor = "#e0ff7c" // Marathi
|
||||
const konkaniColor = "#9b7cff" // Konkani
|
||||
const hindiColor = "#d17cff" // Hindi
|
||||
const gujaratiColor = "#7cffee" // Gujarati
|
||||
const marwariColor = "#7bc4c9" // Marwari
|
||||
const oriyaColor = "#9bcc9f" // Oriya
|
||||
const bengaliColor = "#bf9a77" // Bengali
|
||||
const punjabiColor = "#e84a35" // Punjabi
|
||||
const mizoColor = "#a6a4de" // Mizo
|
||||
const assameseColor = "#c9535b" // Assamese
|
||||
const bhojpuriColor = "#b3b876" // Bhojpuri
|
||||
const manipuriColor = "#c9afad" // Manipuri
|
||||
const dogriColor = "#9595e6" // Dogri (near Kashmir)
|
||||
const nepaliColor = "#71998e" // Nepali
|
||||
const urduColor = "#3fa179" // Urdu
|
||||
const tuluColor = "#dedc52" // Tulu
|
||||
const maithaliColor = "#4472a6" // Maithali
|
||||
const santaliColor = "#96bf60" // Santhali
|
||||
const sindhiColor = "#e89931" // Sindhi
|
||||
const awadhiColor = "#847fb5" // Awadhi
|
||||
|
||||
const defaultColor = "#555555"
|
||||
|
||||
const languages = {
|
||||
tamil: {name: "Tamil", color: tamilColor, code: "ta", districts: []},
|
||||
malayalam: {name: "Malayalam", color: malayalamColor, code: "ml", districts: []},
|
||||
kannada: {name: "Kannada", color: kannadaColor, code: "kn", districts: []},
|
||||
telugu: {name: "Telugu", color: teluguColor, code: "te", districts: []},
|
||||
marathi: {name: "Marathi", color: marathiColor, code: "mr", districts: []},
|
||||
konkani: {name: "Konkani", color: konkaniColor, code: "gom", districts: []},
|
||||
hindi: {name: "Hindi", color: hindiColor, code: "hi", districts: []},
|
||||
gujarati: {name: "Gujarati", color: gujaratiColor, code: "gu", districts: []},
|
||||
marwari: {name: "Marwari", color: marwariColor, code: "mwr", districts: []},
|
||||
oriya: {name: "Oriya", color: oriyaColor, code: "or", districts: []},
|
||||
bengali: {name: "Bengali", color: bengaliColor, code: "bn", districts: []},
|
||||
punjabi: {name: "Punjabi", color: punjabiColor, code: "pa", districts: []},
|
||||
mizo: {name: "Mizo", color: mizoColor, code: "lus", districts: []},
|
||||
assamese: {name: "Assamese", color: assameseColor, code: "as", districts: []},
|
||||
bhojpuri: {name: "Bhojpuri", color: bhojpuriColor, code: "bho", districts: []},
|
||||
manipuri: {name: "Manipuri", color: manipuriColor, code: "mni-Mtei", districts: []},
|
||||
dogri: {name: "Dogri", color: dogriColor, code: "doi", districts: []},
|
||||
nepali: {name: "Nepali", color: nepaliColor, code: "ne", districts: []},
|
||||
urdu: {name: "Urdu", color: urduColor, code: "ur", districts: []},
|
||||
tulu: {name: "Tulu", color: tuluColor, code: "tcy", districts: []},
|
||||
maithali: {name: "Maithali", color: maithaliColor, code: "mai", districts: []},
|
||||
santali: {name: "Santali", color: santaliColor, code: "sat", districts: []},
|
||||
sindhi: {name: "Sindhi", color: sindhiColor, code: "sd", districts: []},
|
||||
awadhi: {name: "Awadhi", color: awadhiColor, code: "awa", districts: []},
|
||||
tamil: {name: "Tamil", color: "#75d795", code: "ta", districts: []},
|
||||
malayalam: {name: "Malayalam", color: "#ff7c7c", code: "ml", districts: []},
|
||||
kannada: {name: "Kannada", color: "#ffe77c", code: "kn", districts: []},
|
||||
telugu: {name: "Telugu", color: "#7c9dff", code: "te", districts: []},
|
||||
marathi: {name: "Marathi", color: "#e0ff7c", code: "mr", districts: []},
|
||||
konkani: {name: "Konkani", color: "#9b7cff", code: "gom", districts: []},
|
||||
hindi: {name: "Hindi", color: "#d17cff", code: "hi", districts: []},
|
||||
gujarati: {name: "Gujarati", color: "#7cffee", code: "gu", districts: []},
|
||||
oriya: {name: "Oriya", color: "#9bcc9f", code: "or", districts: []},
|
||||
bengali: {name: "Bengali", color: "#bf9a77", code: "bn", districts: []},
|
||||
punjabi: {name: "Punjabi", color: "#e84a35", code: "pa", districts: []},
|
||||
mizo: {name: "Mizo", color: "#a6a4de", code: "lus", districts: []},
|
||||
assamese: {name: "Assamese", color: "#c9535b", code: "as", districts: []},
|
||||
bhojpuri: {name: "Bhojpuri", color: "#b3b876", code: "bho", districts: []},
|
||||
manipuri: {name: "Manipuri", color: "#c9afad", code: "mni-Mtei", districts: []},
|
||||
dogri: {name: "Dogri", color: "#9595e6", code: "doi", districts: []},
|
||||
nepali: {name: "Nepali", color: "#71998e", code: "ne", districts: []},
|
||||
urdu: {name: "Urdu", color: "#3fa179", code: "ur", districts: []},
|
||||
tulu: {name: "Tulu", color: "#dedc52", code: "tcy", districts: []},
|
||||
maithali: {name: "Maithali", color: "#4472a6", code: "mai", districts: []},
|
||||
sindhi: {name: "Sindhi", color: "#e89931", code: "sd", districts: []},
|
||||
awadhi: {name: "Awadhi", color: "#847fb5", code: "awa", districts: []},
|
||||
};
|
||||
|
||||
// Credit: https://www.artcraftblend.com/blogs/colors/shades-of-pastel
|
||||
@@ -65,7 +36,7 @@ const state2lang = {
|
||||
"Goa": languages["konkani"],
|
||||
"Odisha": languages["oriya"],
|
||||
"Gujarat": languages["gujarati"],
|
||||
"Rajasthan": languages["marwari"],
|
||||
"Rajasthan": languages["hindi"],
|
||||
"Chhattisgarh": languages["hindi"],
|
||||
"Jharkhand": languages["hindi"], // DEFAULT
|
||||
"West Bengal": languages["bengali"],
|
||||
@@ -89,7 +60,6 @@ const state2lang = {
|
||||
"Lakshadweep": languages["malayalam"],
|
||||
"Delhi": languages["hindi"],
|
||||
"Chandigarh": languages["hindi"]
|
||||
|
||||
}
|
||||
|
||||
const district2lang = { // Should override state colors
|
||||
@@ -144,17 +114,6 @@ const district2lang = { // Should override state colors
|
||||
|
||||
"Kutch": languages["sindhi"],
|
||||
|
||||
"Godda": languages["santali"],
|
||||
"Deoghar": languages["santali"],
|
||||
"Dumka": languages["santali"],
|
||||
"Jamtara": languages["santali"],
|
||||
"Sahibganj": languages["santali"],
|
||||
"Pakur": languages["santali"],
|
||||
"East Singhbhum": languages["santali"],
|
||||
"Jhargram": languages["santali"],
|
||||
"Bankura": languages["santali"],
|
||||
"Purulia": languages["santali"],
|
||||
|
||||
"Kanpur": languages["awadhi"],
|
||||
"Lakhimpur Kheri": languages["awadhi"],
|
||||
"Sitapur": languages["awadhi"],
|
||||
@@ -166,11 +125,36 @@ const district2lang = { // Should override state colors
|
||||
"Rae Bareli": languages["awadhi"],
|
||||
"Amethi": languages["awadhi"],
|
||||
"Bahraich": languages["awadhi"],
|
||||
|
||||
|
||||
}
|
||||
|
||||
function responsivefy(svg) {
|
||||
// get container + svg aspect ratio
|
||||
var container = d3.select(svg.node().parentNode),
|
||||
width = parseInt(svg.style("width")),
|
||||
height = parseInt(svg.style("height")),
|
||||
aspect = width / height;
|
||||
|
||||
// add viewBox and preserveAspectRatio properties,
|
||||
// and call resize so that svg resizes on inital page load
|
||||
svg.attr("viewBox", "0 0 " + width + " " + height)
|
||||
.attr("perserveAspectRatio", "xMinYMid")
|
||||
.call(resize);
|
||||
|
||||
// to register multiple listeners for same event type,
|
||||
// you need to add namespace, i.e., 'click.foo'
|
||||
// necessary if you call invoke this function for multiple svgs
|
||||
// api docs: https://github.com/mbostock/d3/wiki/Selections#on
|
||||
d3.select(window).on("resize." + container.attr("id"), resize);
|
||||
|
||||
// get width of container and resize svg to fit it
|
||||
function resize() {
|
||||
var targetWidth = Math.floor(container.node().getBoundingClientRect().width);
|
||||
svg.attr("width", targetWidth);
|
||||
svg.attr("height", Math.round(targetWidth / aspect));
|
||||
}
|
||||
}
|
||||
|
||||
// Functions for calculating and dealing with language boundaries
|
||||
function reverseCoordArrays(coords) {
|
||||
if (!Array.isArray(coords)) {
|
||||
return coords;
|
||||
@@ -186,7 +170,6 @@ function reverseCoordArrays(coords) {
|
||||
return coords.map(reverseCoordArrays);
|
||||
}
|
||||
|
||||
|
||||
function getOuterBoundaryPolygon(features) {
|
||||
// Check if we have features to process
|
||||
if (!features || features.length === 0) {
|
||||
@@ -205,6 +188,15 @@ function getOuterBoundaryPolygon(features) {
|
||||
return combined;
|
||||
}
|
||||
|
||||
function district2langFunc(d) {
|
||||
if (district2lang.hasOwnProperty(d.properties.district)) {
|
||||
return district2lang[d.properties.district];
|
||||
} else if (state2lang.hasOwnProperty(d.properties.st_nm)) {
|
||||
return state2lang[d.properties.st_nm];
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function stateOrDistrictOrLanguage(d) {
|
||||
if (typeof d.properties.district !== 'undefined') {
|
||||
@@ -218,12 +210,40 @@ function stateOrDistrictOrLanguage(d) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// const mapWidth = document.getElementById("indiaMap").getAttribute("width")
|
||||
// const mapHeight = document.getElementById("indiaMap").getAttribute("height")
|
||||
const mapWidth = /*window.innerWidth - */document.querySelector("#svgContainer").offsetWidth * 0.85;
|
||||
// const mapHeight = document.querySelector("#svgContainer").offsetHeight;
|
||||
const mapHeight = (window.innerHeight - document.querySelector("#svgContainer").getBoundingClientRect().top);
|
||||
|
||||
const svg = d3.select("#svgContainer")
|
||||
.append('svg')
|
||||
.attr('width', mapWidth.toString())
|
||||
.attr('height', (mapHeight).toString())
|
||||
// .attr('viewbox', '0 0 ' + mapWidth.toString() + ' ' + mapHeight.toString())
|
||||
// .attr('preserveAspectRatio', "xMidYMin")
|
||||
.attr('id', 'indiaMap')
|
||||
// .call(responsivefy);
|
||||
|
||||
function drawMap(world) {
|
||||
const mapWidth = document.getElementById("indiaMap").getAttribute("width")
|
||||
const mapHeight = document.getElementById("indiaMap").getAttribute("height")
|
||||
const projection = d3.geoMercator().fitSize([mapWidth, mapHeight], world)
|
||||
const path = d3.geoPath().projection(projection);
|
||||
|
||||
requestAnimationFrame(() => {
|
||||
const newSvg = d3.select('svg');
|
||||
const bbox = newSvg.node().getBBox();
|
||||
const originalWidth = +newSvg.attr("width");
|
||||
newSvg
|
||||
// .attr("viewBox", `${bbox.x} ${bbox.y} ${bbox.width} ${bbox.height}`)
|
||||
.attr("viewBox", `0 ${bbox.y} ${originalWidth} ${bbox.height}`)
|
||||
// .attr("width", mapHeight.toString())
|
||||
.attr("height", bbox.height)
|
||||
// .style("display", "block")
|
||||
// .style("max-height", "100%"); // optional: keep it scalable in a flexbox
|
||||
});
|
||||
|
||||
|
||||
const states = svg.selectAll("g")
|
||||
.data(world.features)
|
||||
.enter()
|
||||
@@ -233,39 +253,75 @@ function drawMap(world) {
|
||||
.attr("d", path)
|
||||
.attr("class", d => stateOrDistrictOrLanguage(d))
|
||||
.attr("fill", function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) === "district") {
|
||||
if (district2lang.hasOwnProperty(d.properties.district)) {
|
||||
return district2lang[d.properties.district].color;
|
||||
} else if (state2lang.hasOwnProperty(d.properties.st_nm)) {
|
||||
return state2lang[d.properties.st_nm].color;
|
||||
} else {
|
||||
return defaultColor;
|
||||
}
|
||||
if (stateOrDistrictOrLanguage(d) === "language") {
|
||||
return languages[d.properties.lang_name.toLowerCase()].color;
|
||||
}
|
||||
})
|
||||
.each(function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) === "district") {
|
||||
let districtLang;
|
||||
if (district2lang.hasOwnProperty(d.properties.district)) {
|
||||
districtLang = district2lang[d.properties.district];
|
||||
} else if (state2lang.hasOwnProperty(d.properties.st_nm)) {
|
||||
districtLang = state2lang[d.properties.st_nm];
|
||||
}
|
||||
const districtLang = district2langFunc(d);
|
||||
if (typeof districtLang !== 'undefined') {
|
||||
districtLang.districts.push(d)
|
||||
}
|
||||
}
|
||||
// Hide map load spinner after map has loaded
|
||||
document.getElementById("mapLoadSpinner").classList.add("hide");
|
||||
document.querySelector("svg").classList.add("show");
|
||||
|
||||
})
|
||||
.append("title") // Tooltip
|
||||
.text(d => d.properties.district);
|
||||
|
||||
states.append("text")
|
||||
.attr("x", d => projection(d3.geoCentroid(d))[0])
|
||||
.attr("y", d => projection(d3.geoCentroid(d))[1])
|
||||
.attr("x", function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
rtv = projection(d3.geoCentroid(d))[0];
|
||||
if (d.properties.lang_name == "Kannada") {
|
||||
rtv -= 20;
|
||||
}
|
||||
if (d.properties.lang_name == "Tamil") {
|
||||
rtv += 20;
|
||||
}
|
||||
if (d.properties.lang_name == "Maithali") {
|
||||
rtv += 10;
|
||||
}
|
||||
if (d.properties.lang_name == "Konkani") {
|
||||
rtv -= 15;
|
||||
}
|
||||
if (d.properties.lang_name == "Bengali") {
|
||||
rtv -= 15;
|
||||
}
|
||||
return rtv
|
||||
}
|
||||
})
|
||||
.attr("y", function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
rtv = projection(d3.geoCentroid(d))[1]
|
||||
if (d.properties.lang_name == "Kannada") {
|
||||
rtv += 15;
|
||||
}
|
||||
if (d.properties.lang_name == "Tamil") {
|
||||
rtv -= 20;
|
||||
}
|
||||
if (d.properties.lang_name == "Gujarati") {
|
||||
rtv -= 10;
|
||||
}
|
||||
if (d.properties.lang_name == "Mizo") {
|
||||
rtv += 20;
|
||||
}
|
||||
if (d.properties.lang_name == "Nepali") {
|
||||
rtv -= 10;
|
||||
}
|
||||
if (d.properties.lang_name == "Bengali") {
|
||||
rtv += 25;
|
||||
}
|
||||
|
||||
return rtv
|
||||
}
|
||||
})
|
||||
.attr("text-anchor", "middle")
|
||||
.attr("font-size", "12px")
|
||||
.attr("fill", "black")
|
||||
.attr("class", "languageText")
|
||||
.attr("class", "translationText")
|
||||
.attr("id", function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
return d.properties.lang_code+"Text"
|
||||
@@ -277,45 +333,99 @@ function drawMap(world) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
return d.properties.lang_name;
|
||||
} else {
|
||||
d3.select(this).remove() // Only add text if the element is a language
|
||||
d3.select(this).remove() // Only add this attribute if the element is a language
|
||||
}
|
||||
});
|
||||
|
||||
// Romanization
|
||||
states.append("text")
|
||||
.attr("x", d => stateOrDistrictOrLanguage(d) == "language" ?
|
||||
document.getElementById(d.properties.lang_code + "Text").getAttribute("x") :
|
||||
projection(d3.geoCentroid(d))[0])
|
||||
.attr("y", d => stateOrDistrictOrLanguage(d) == "language" ?
|
||||
parseFloat(document.getElementById(d.properties.lang_code + "Text").getAttribute("y")) + parseFloat(getComputedStyle(document.getElementsByClassName('translationText')[0]).getPropertyValue('font-size')) :
|
||||
projection(d3.geoCentroid(d))[1])
|
||||
.attr("text-anchor", "middle")
|
||||
.attr("fill", "black")
|
||||
.attr("class", "romanizationText")
|
||||
.attr("id", function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
return d.properties.lang_code+"Romanization"
|
||||
} else {
|
||||
d3.select(this).remove()
|
||||
}
|
||||
})
|
||||
.each(function(d) {
|
||||
if (!stateOrDistrictOrLanguage(d) == "language") {
|
||||
d3.select(this).remove() // Only add this attribute if the element is a language
|
||||
}
|
||||
});
|
||||
|
||||
// Language
|
||||
states.append("text")
|
||||
.attr("x", d => stateOrDistrictOrLanguage(d) == "language" ?
|
||||
document.getElementById(d.properties.lang_code + "Text").getAttribute("x") :
|
||||
projection(d3.geoCentroid(d))[0])
|
||||
.attr("y", d => stateOrDistrictOrLanguage(d) == "language" ?
|
||||
parseFloat(document.getElementById(d.properties.lang_code + "Text").getAttribute("y")) - parseFloat(getComputedStyle(document.getElementsByClassName('translationText')[0]).getPropertyValue('font-size')) :
|
||||
projection(d3.geoCentroid(d))[1])
|
||||
.attr("text-anchor", "middle")
|
||||
.attr("fill", "black")
|
||||
.attr("class", "languageText")
|
||||
.attr("id", function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
return d.properties.lang_code+"Language"
|
||||
} else {
|
||||
d3.select(this).remove()
|
||||
}
|
||||
})
|
||||
.each(function(d) {
|
||||
if (!stateOrDistrictOrLanguage(d) == "language") {
|
||||
d3.select(this).remove() // Only add this attribute if the element is a language
|
||||
}
|
||||
})
|
||||
.text(function(d) {
|
||||
if (stateOrDistrictOrLanguage(d) == "language") {
|
||||
return d.properties.lang_name;
|
||||
} else {
|
||||
d3.select(this).remove() // Only add this attribute if the element is a language
|
||||
}
|
||||
})
|
||||
let allLangs = []
|
||||
|
||||
const coordinates = [77.69916967457782,23.389970772934166];
|
||||
const [xCoord, yCoord] = projection(coordinates);
|
||||
|
||||
svg.append("text")
|
||||
.attr("x", xCoord)
|
||||
.attr("y", yCoord)
|
||||
.attr("class", "testClass")
|
||||
.attr("text-anchor", "middle")
|
||||
.attr("font-size", "12px")
|
||||
.attr("fill", "black")
|
||||
.text("Hello, Map!");
|
||||
// for (const [langId,lang] of Object.entries(languages)) {
|
||||
// let geojson = {
|
||||
// "type": "FeatureCollection",
|
||||
// "features": lang.districts
|
||||
// };
|
||||
//
|
||||
// let outerBound = getOuterBoundaryPolygon(geojson.features)
|
||||
// outerBound["id"] = "lang" + lang.name
|
||||
// outerBound.properties["lang_name"]= lang.name
|
||||
// outerBound.properties["lang_code"]= lang.code
|
||||
// allLangs.push(outerBound);
|
||||
// svg.append("text")
|
||||
// .attr("x", xCoord)
|
||||
// .attr("y", yCoord)
|
||||
// .attr("class", "testClass")
|
||||
// .attr("text-anchor", "middle")
|
||||
// .attr("font-size", "12px")
|
||||
// .attr("fill", "black")
|
||||
// .text("Hello, Map!");
|
||||
|
||||
// svg.append("path")
|
||||
// .datum(outerBound)
|
||||
// .attr("d", path)
|
||||
// .attr("fill", "none")
|
||||
// .attr("stroke", "red")
|
||||
// .attr("stroke-width", 2)
|
||||
// }
|
||||
// console.log(JSON.stringify(allLangs))
|
||||
// for (const [langId,lang] of Object.entries(languages)) {
|
||||
// let geojson = {
|
||||
// "type": "FeatureCollection",
|
||||
// "features": lang.districts
|
||||
// };
|
||||
//
|
||||
// let outerBound = getOuterBoundaryPolygon(geojson.features)
|
||||
// outerBound["id"] = "lang" + lang.name
|
||||
// outerBound.properties["lang_name"]= lang.name
|
||||
// outerBound.properties["lang_code"]= lang.code
|
||||
// allLangs.push(outerBound);
|
||||
//
|
||||
// svg.append("path")
|
||||
// .datum(outerBound)
|
||||
// .attr("d", path)
|
||||
// .attr("fill", "none")
|
||||
// .attr("stroke", "red")
|
||||
// .attr("stroke-width", 2)
|
||||
// }
|
||||
// console.log(JSON.stringify(allLangs))
|
||||
|
||||
}
|
||||
|
||||
d3.json("india_with_districts_with_languages.json").then(drawMap)
|
||||
|
||||
d3.json("india_with_districts_with_languages_min.json").then(drawMap)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
1
india_with_districts_with_languages_min.json
Normal file
1
india_with_districts_with_languages_min.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -13,7 +13,6 @@ Mizo
|
||||
Assamese
|
||||
Punjabi
|
||||
Maithili
|
||||
Santali
|
||||
Nepali
|
||||
Konkani
|
||||
Tulu
|
||||
|
||||
3
server.py
Normal file
3
server.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from ai4bharat.transliteration import xlit_server
|
||||
app, engine = xlit_server.get_app()
|
||||
app.run(host='0.0.0.0', port=10000)
|
||||
11
todo.txt
11
todo.txt
@@ -1,6 +1,7 @@
|
||||
1. REFACTOR
|
||||
2. Cache results in a database instead of fetching them each time, use cached results if possible
|
||||
3. Figure out positioning of the different translations, show all of them instead of having to hover
|
||||
4. Show romanizations
|
||||
5. General beautification
|
||||
6. TTS?
|
||||
2. Show language names as well, along with translation and romanization
|
||||
3. Figure out positioning of the different translations, show all of them instead of having to hover; for smaller languages it's hard to move the mouse around to see the full word.
|
||||
4. Better translations (some of them are just terrible with Google Translate - use the ai4bharat one instead?)
|
||||
4. Cache romanizations in DB
|
||||
6. General beautification
|
||||
7. TTS?
|
||||
|
||||
BIN
translations.db
Normal file
BIN
translations.db
Normal file
Binary file not shown.
@@ -1,6 +1,37 @@
|
||||
function hideTranslationsAndShowText(request) {
|
||||
document.querySelectorAll(".translationText, .romanizationText").forEach(element => element.style.visibility = 'hidden')
|
||||
document.getElementById("fetchingText").style.visibility = "visible";
|
||||
}
|
||||
|
||||
function updateTranslations(response) {
|
||||
const translations = JSON.parse(response);
|
||||
document.querySelectorAll(".languageText").forEach(element => {
|
||||
document.querySelectorAll(".translationText").forEach(element => {
|
||||
element.textContent = translations[element.id.replace("Text", "")];
|
||||
});
|
||||
|
||||
// Send result to romanization sever
|
||||
// Since this is an asynchronous opreation, there is no indication that
|
||||
// it's being performed in the backend. So I add the 'loading' class to the progress
|
||||
// bar used to track translations, and then remove it in the 'resolved' handler.
|
||||
const elem = document.getElementById("loading-screen")
|
||||
elem.classList.toggle("loading")
|
||||
document.getElementById("fetchingText").textContent = "Fetching romanizations..."
|
||||
fetch("/romanize", {
|
||||
method: "POST",
|
||||
body: response
|
||||
}).then((newResponse) => newResponse.json()) // It looks like response.json() returns another promise, since the _body_ of the response may not have loaded yet. Do not confuse this json() with the static Response.json() method. Since it returns another promise, I have to call .then() to actually get the result.
|
||||
.then((data) => {
|
||||
elem.classList.toggle("loading");
|
||||
console.log(data);
|
||||
const romanizations = data;
|
||||
document.querySelectorAll(".romanizationText").forEach(element => {
|
||||
if (element.id.replace("Romanization", "") in romanizations) {
|
||||
element.textContent = "(" + romanizations[element.id.replace("Romanization", "")] + ")";
|
||||
}
|
||||
});
|
||||
// Show elements again
|
||||
document.querySelectorAll(".translationText, .romanizationText").forEach(element => element.style.visibility = 'visible')
|
||||
document.getElementById("fetchingText").textContent = "Fetching translations..." // Restore the original text content
|
||||
document.getElementById("fetchingText").style.visibility = "hidden"
|
||||
});
|
||||
}
|
||||
|
||||
287
xlit_server.py
Normal file
287
xlit_server.py
Normal file
@@ -0,0 +1,287 @@
|
||||
"""
|
||||
Expose Transliteration Engine as an HTTP API.
|
||||
|
||||
USAGE:
|
||||
```
|
||||
from ai4bharat.transliteration import xlit_server
|
||||
app, engine = xlit_server.get_app()
|
||||
app.run(host='0.0.0.0', port=8000)
|
||||
```
|
||||
Sample URLs:
|
||||
http://localhost:8000/tl/ta/amma
|
||||
http://localhost:8000/languages
|
||||
|
||||
FORMAT:
|
||||
Based on the Varnam API standard
|
||||
https://api.varnamproject.com/tl/hi/bharat
|
||||
"""
|
||||
|
||||
from flask import Flask, jsonify, request, make_response
|
||||
from uuid import uuid4
|
||||
from datetime import datetime
|
||||
import traceback
|
||||
import enum
|
||||
import sqlite3
|
||||
|
||||
from .utils import LANG_CODE_TO_DISPLAY_NAME, RTL_LANG_CODES, LANG_CODE_TO_SCRIPT_CODE
|
||||
|
||||
class XlitError(enum.Enum):
|
||||
lang_err = "Unsupported langauge ID requested ;( Please check available languages."
|
||||
string_err = "String passed is incompatable ;("
|
||||
internal_err = "Internal crash ;("
|
||||
unknown_err = "Unknown Failure"
|
||||
loading_err = "Loading failed ;( Check if metadata/paths are correctly configured."
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['JSON_AS_ASCII'] = False
|
||||
|
||||
## ----------------------------- Xlit Engine -------------------------------- ##
|
||||
|
||||
from .xlit_src import XlitEngine
|
||||
|
||||
MAX_SUGGESTIONS = 8
|
||||
DEFAULT_NUM_SUGGESTIONS = 5
|
||||
|
||||
ENGINE = {
|
||||
"en2indic": XlitEngine(beam_width=MAX_SUGGESTIONS, rescore=True, model_type="transformer", src_script_type = "roman"),
|
||||
"indic2en": XlitEngine(beam_width=MAX_SUGGESTIONS, rescore=True, model_type="transformer", src_script_type = "indic"),
|
||||
}
|
||||
|
||||
EXPOSED_LANGS = [
|
||||
{
|
||||
"LangCode": lang_code, # ISO-639 code
|
||||
"Identifier": lang_code, # ISO-639 code
|
||||
"DisplayName": LANG_CODE_TO_DISPLAY_NAME[lang_code],
|
||||
"Author": "AI4Bharat", # Name of developer / team
|
||||
"CompiledDate": "09-April-2022", # date on which model was trained
|
||||
"IsStable": True, # Set `False` if the model is experimental
|
||||
"Direction": "rtl" if lang_code in RTL_LANG_CODES else "ltr",
|
||||
"ScriptCode": LANG_CODE_TO_SCRIPT_CODE[lang_code],
|
||||
} for lang_code in sorted(ENGINE["en2indic"].all_supported_langs)
|
||||
]
|
||||
|
||||
def get_app():
|
||||
return app, ENGINE
|
||||
|
||||
## ---------------------------- API End-points ------------------------------ ##
|
||||
|
||||
@app.route('/languages', methods = ['GET', 'POST'])
|
||||
def supported_languages():
|
||||
# Format - https://xlit-api.ai4bharat.org/languages
|
||||
response = make_response(jsonify(EXPOSED_LANGS))
|
||||
if 'xlit_user_id' not in request.cookies:
|
||||
# host = request.environ['HTTP_ORIGIN'].split('://')[1]
|
||||
host = '.ai4bharat.org'
|
||||
response.set_cookie('xlit_user_id', uuid4().hex, max_age=365*24*60*60, domain=host, samesite='None', secure=True, httponly=True)
|
||||
return response
|
||||
|
||||
@app.route('/tl/<lang_code>/<eng_word>', methods = ['GET', 'POST'])
|
||||
def xlit_api(lang_code, eng_word):
|
||||
# Format: https://xlit-api.ai4bharat.org/tl/ta/bharat
|
||||
response = {
|
||||
'success': False,
|
||||
'error': '',
|
||||
'at': str(datetime.utcnow()) + ' +0000 UTC',
|
||||
'input': eng_word.strip(),
|
||||
'result': ''
|
||||
}
|
||||
|
||||
transliterate_numerals = request.args.get('transliterate_numerals', default=False, type=lambda v: v.lower() == 'true')
|
||||
num_suggestions = request.args.get('num_suggestions', default=DEFAULT_NUM_SUGGESTIONS, type=int)
|
||||
|
||||
if lang_code not in ENGINE["en2indic"].all_supported_langs:
|
||||
response['error'] = 'Invalid scheme identifier. Supported languages are: '+ str(ENGINE["en2indic"].all_supported_langs)
|
||||
return jsonify(response)
|
||||
|
||||
try:
|
||||
## Limit char count to --> 70
|
||||
xlit_result = ENGINE["en2indic"].translit_word(eng_word[:70], lang_code, topk=num_suggestions, transliterate_numerals=transliterate_numerals)
|
||||
except Exception as e:
|
||||
xlit_result = XlitError.internal_err
|
||||
|
||||
|
||||
if isinstance(xlit_result, XlitError):
|
||||
response['error'] = xlit_result.value
|
||||
print("XlitError:", traceback.format_exc())
|
||||
else:
|
||||
response['result'] = xlit_result
|
||||
response['success'] = True
|
||||
|
||||
return jsonify(response)
|
||||
|
||||
@app.route('/rtl/<lang_code>/<word>', methods = ['GET', 'POST'])
|
||||
def reverse_xlit_api(lang_code, word):
|
||||
# Format: https://api.varnamproject.com/rtl/hi/%E0%A4%AD%E0%A4%BE%E0%A4%B0%E0%A4%A4
|
||||
response = {
|
||||
'success': False,
|
||||
'error': '',
|
||||
'at': str(datetime.utcnow()) + ' +0000 UTC',
|
||||
'input': word.strip(),
|
||||
'result': ''
|
||||
}
|
||||
|
||||
if lang_code not in ENGINE["indic2en"].all_supported_langs:
|
||||
response['error'] = 'Invalid scheme identifier. Supported languages are: '+ str(ENGINE["indic2en"].all_supported_langs)
|
||||
return jsonify(response)
|
||||
|
||||
num_suggestions = request.args.get('num_suggestions', default=DEFAULT_NUM_SUGGESTIONS, type=int)
|
||||
|
||||
try:
|
||||
## Limit char count to --> 70
|
||||
xlit_result = ENGINE["indic2en"].translit_sentence(word, lang_code)
|
||||
except Exception as e:
|
||||
xlit_result = XlitError.internal_err
|
||||
|
||||
if isinstance(xlit_result, XlitError):
|
||||
response['error'] = xlit_result.value
|
||||
print("XlitError:", traceback.format_exc())
|
||||
else:
|
||||
response['result'] = xlit_result
|
||||
response['success'] = True
|
||||
|
||||
return jsonify(response)
|
||||
|
||||
@app.route('/transliterate', methods=['POST'])
|
||||
def ulca_api():
|
||||
'''
|
||||
ULCA-compliant endpoint. See for sample request-response:
|
||||
https://github.com/ULCA-IN/ulca/tree/master/specs/examples/model/transliteration-model
|
||||
'''
|
||||
data = request.get_json(force=True)
|
||||
|
||||
if "input" not in data or "config" not in data:
|
||||
return jsonify({
|
||||
"status": {
|
||||
"statusCode": 400,
|
||||
"message": "Ensure `input` and `config` fields missing."
|
||||
}
|
||||
}), 400
|
||||
|
||||
if (data["config"]["language"]["sourceLanguage"] == "en" and data["config"]["language"]["targetLanguage"] in ENGINE["en2indic"].all_supported_langs) or (data["config"]["language"]["sourceLanguage"] in ENGINE["indic2en"].all_supported_langs and data["config"]["language"]["targetLanguage"] == 'en'):
|
||||
pass
|
||||
else:
|
||||
return jsonify({
|
||||
"status": {
|
||||
"statusCode": 501,
|
||||
"message": "The mentioned language-pair is not supported yet."
|
||||
}
|
||||
}), 501
|
||||
|
||||
is_sentence = data["config"]["isSentence"] if "isSentence" in data["config"] else False
|
||||
num_suggestions = 1 if is_sentence else (data["config"]["numSuggestions"] if "numSuggestions" in data["config"] else 5)
|
||||
|
||||
if data["config"]["language"]["targetLanguage"] == "en":
|
||||
engine = ENGINE["indic2en"]
|
||||
lang_code = data["config"]["language"]["sourceLanguage"]
|
||||
else:
|
||||
engine = ENGINE["en2indic"]
|
||||
lang_code = data["config"]["language"]["targetLanguage"]
|
||||
|
||||
outputs = []
|
||||
for item in data["input"]:
|
||||
if is_sentence:
|
||||
item["target"] = [engine.translit_sentence(item["source"], lang_code=lang_code)]
|
||||
else:
|
||||
item["source"] = item["source"][:32]
|
||||
item["target"] = engine.translit_word(item["source"], lang_code=lang_code, topk=num_suggestions)
|
||||
|
||||
return {
|
||||
"output": data["input"],
|
||||
# "status": {
|
||||
# "statusCode": 200,
|
||||
# "message" : "success"
|
||||
# }
|
||||
}, 200
|
||||
|
||||
@app.route('/romanize', methods=['POST'])
|
||||
def romanizeHandler():
|
||||
langCodeLookup = {
|
||||
"hi": "hi",
|
||||
"bn": "bn",
|
||||
"mr": "mr",
|
||||
"ta": "ta",
|
||||
"te": "te",
|
||||
"kn": "kn",
|
||||
"ml": "ml",
|
||||
"or": "or",
|
||||
"gu": "gu",
|
||||
"ur": "ur",
|
||||
"as": "as",
|
||||
"pa": "pa",
|
||||
"mai": "mai",
|
||||
"ne": "ne",
|
||||
"gom": "gom",
|
||||
"tcy": "kn", # Tulu uses Kannada script
|
||||
"bho": "hi", # Bhojpuri uses Hindi script
|
||||
"doi": "hi", # Dogri uses Hindi script
|
||||
"mni-Mtei": "mni",
|
||||
"sd": "sd",
|
||||
"awa": "hi", # Awadhi uses Hindi script
|
||||
}
|
||||
|
||||
lang2code = {
|
||||
"hindi": "hi",
|
||||
"bengali": "bn",
|
||||
"marathi": "mr",
|
||||
"tamil": "ta",
|
||||
"telugu": "te",
|
||||
"malayalam": "ml",
|
||||
"kannada": "kn",
|
||||
"oriya": "or",
|
||||
"gujarati": "gu",
|
||||
"urdu": "ur",
|
||||
"assamese": "as",
|
||||
"punjabi": "pa",
|
||||
"maithili": "mai",
|
||||
"nepali": "ne",
|
||||
"konkani": "gom",
|
||||
"tulu": "tcy",
|
||||
"bhojpuri": "bho",
|
||||
"dogri": "doi",
|
||||
"manipuri": "mni-Mtei",
|
||||
"sindhi": "sd",
|
||||
"awadhi": "awa",
|
||||
"english": "en",
|
||||
}
|
||||
code2lang = {v:k for k,v in lang2code.items()}
|
||||
|
||||
rtv = dict()
|
||||
|
||||
data = request.get_json(force=True)
|
||||
|
||||
# Check if database contains the romanizations already
|
||||
englishWord = data['en']
|
||||
rtv["en"] = englishWord
|
||||
print(englishWord)
|
||||
con = sqlite3.connect("../translations.db")
|
||||
cur = con.cursor()
|
||||
cur.execute("CREATE TABLE IF NOT EXISTS romanizations AS SELECT * FROM translations WHERE 0") # Copy schema from 'translations' table
|
||||
cur.execute('SELECT * FROM romanizations WHERE english = ?', (englishWord,))
|
||||
romanizations = cur.fetchall()
|
||||
columnNames = [column[0] for column in cur.description]
|
||||
romanizationsDict = []
|
||||
if len(romanizations) > 0:
|
||||
for row in romanizations:
|
||||
row_dict = {lang2code[columnNames[i]]: row[i] for i in range(len(langCodeLookup)+1)} # The '+1' is because of English, which isn't in langCodeLookup
|
||||
romanizationsDict.append(row_dict)
|
||||
json_data = jsonify(romanizationsDict[0])
|
||||
con.close()
|
||||
return json_data
|
||||
# if len(romanizations) != 0:
|
||||
|
||||
# Assuming the romanizations didn't exist before
|
||||
for key in data:
|
||||
if key in langCodeLookup:
|
||||
langCode = langCodeLookup[key]
|
||||
text = data[key]
|
||||
response = reverse_xlit_api(langCode, text)
|
||||
responseJson = response.get_json()
|
||||
rtv[key] = responseJson['result']
|
||||
|
||||
rtvJson = jsonify(rtv)
|
||||
rtv["en"] = englishWord
|
||||
cur.execute("INSERT INTO romanizations " + str(tuple([code2lang[val] for val in rtv.keys()])) + " VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", tuple(rtv.values()))
|
||||
con.commit()
|
||||
|
||||
con.close()
|
||||
return rtvJson
|
||||
Reference in New Issue
Block a user