Google Cloud Image Annotation (OCR)
The Vision API can detect and extract text from images. There are two annotation features that support optical character recognition (OCR):
- TEXT_DETECTION: detects and extracts text from any image. For example, a photograph might contain a street sign or traffic sign. The JSON includes the entire extracted string, as well as individual words, and their bounding boxes.
- DOCUMENT_TEXT_DETECTION: also extracts text from an image, but the response is optimized for dense text and documents. The JSON includes page, block, paragraph, word, and break information.
Request
The NCP Translata(Papago) API consists of a single HTTP POST endpoint. The body must be a multipart/form-data data with following fields.
-
feature
TEXT_DETECTION
orDOCUMENT_TEXT_DETECTION
-
image is the contents of the image file
Example Image
Response
- In case of success (example):
{
"textAnnotations": [
{
"locale": "en",
"description": "WAITING?\nPLEASE\nTURN OFF\nYOUR\nENGINE",
"boundingPoly": {
"vertices": [
{
"x": 310,
"y": 821
},
{
"x": 2225,
"y": 821
},
{
"x": 2225,
"y": 1965
},
{
"x": 310,
"y": 1965
}
]
}
},
{
"description": "WAITING",
"boundingPoly": {
"vertices": [
{
"x": 344,
"y": 821
},
{
"x": 2025,
"y": 879
},
{
"x": 2016,
"y": 1127
},
{
"x": 335,
"y": 1069
}
]
}
},
{
"description": "?",
"boundingPoly": {
"vertices": [
{
"x": 2057,
"y": 881
},
{
"x": 2225,
"y": 887
},
{
"x": 2216,
"y": 1134
},
{
"x": 2048,
"y": 1128
}
]
}
},
{
"description": "PLEASE",
"boundingPoly": {
"vertices": [
{
"x": 1208,
"y": 1230
},
{
"x": 1895,
"y": 1253
},
{
"x": 1891,
"y": 1374
},
{
"x": 1204,
"y": 1351
}
]
}
},
{
"description": "TURN",
"boundingPoly": {
"vertices": [
{
"x": 1217,
"y": 1414
},
{
"x": 1718,
"y": 1434
},
{
"x": 1713,
"y": 1558
},
{
"x": 1212,
"y": 1538
}
]
}
},
{
"description": "OFF",
"boundingPoly": {
"vertices": [
{
"x": 1787,
"y": 1437
},
{
"x": 2133,
"y": 1451
},
{
"x": 2128,
"y": 1575
},
{
"x": 1782,
"y": 1561
}
]
}
},
{
"description": "YOUR",
"boundingPoly": {
"vertices": [
{
"x": 1211,
"y": 1609
},
{
"x": 1741,
"y": 1626
},
{
"x": 1737,
"y": 1747
},
{
"x": 1207,
"y": 1731
}
]
}
},
{
"description": "ENGINE",
"boundingPoly": {
"vertices": [
{
"x": 1213,
"y": 1805
},
{
"x": 1922,
"y": 1819
},
{
"x": 1919,
"y": 1949
},
{
"x": 1210,
"y": 1935
}
]
}
}
],
"fullTextAnnotation": {
"pages": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 0.8673632
}
]
},
"width": 2745,
"height": 2040,
"blocks": [
{
"boundingBox": {
"vertices": [
{
"x": 343,
"y": 821
},
{
"x": 2225,
"y": 879
},
{
"x": 2192,
"y": 1965
},
{
"x": 310,
"y": 1907
}
]
},
"paragraphs": [
{
"boundingBox": {
"vertices": [
{
"x": 344,
"y": 821
},
{
"x": 2225,
"y": 886
},
{
"x": 2216,
"y": 1134
},
{
"x": 335,
"y": 1069
}
]
},
"words": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 1
}
]
},
"boundingBox": {
"vertices": [
{
"x": 344,
"y": 821
},
{
"x": 2025,
"y": 879
},
{
"x": 2016,
"y": 1127
},
{
"x": 335,
"y": 1069
}
]
},
"symbols": [
{
"boundingBox": {
"vertices": [
{
"x": 344,
"y": 822
},
{
"x": 667,
"y": 833
},
{
"x": 658,
"y": 1080
},
{
"x": 335,
"y": 1069
}
]
},
"text": "W"
},
{
"boundingBox": {
"vertices": [
{
"x": 694,
"y": 834
},
{
"x": 977,
"y": 844
},
{
"x": 968,
"y": 1091
},
{
"x": 685,
"y": 1081
}
]
},
"text": "A"
},
{
"boundingBox": {
"vertices": [
{
"x": 982,
"y": 844
},
{
"x": 1101,
"y": 848
},
{
"x": 1092,
"y": 1095
},
{
"x": 973,
"y": 1091
}
]
},
"text": "I"
},
{
"boundingBox": {
"vertices": [
{
"x": 1125,
"y": 849
},
{
"x": 1349,
"y": 857
},
{
"x": 1340,
"y": 1104
},
{
"x": 1116,
"y": 1096
}
]
},
"text": "T"
},
{
"boundingBox": {
"vertices": [
{
"x": 1364,
"y": 857
},
{
"x": 1481,
"y": 861
},
{
"x": 1472,
"y": 1108
},
{
"x": 1355,
"y": 1104
}
]
},
"text": "I"
},
{
"boundingBox": {
"vertices": [
{
"x": 1501,
"y": 862
},
{
"x": 1748,
"y": 871
},
{
"x": 1739,
"y": 1117
},
{
"x": 1492,
"y": 1109
}
]
},
"text": "N"
},
{
"boundingBox": {
"vertices": [
{
"x": 1781,
"y": 871
},
{
"x": 2025,
"y": 879
},
{
"x": 2016,
"y": 1126
},
{
"x": 1772,
"y": 1118
}
]
},
"text": "G"
}
]
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 1
}
]
},
"boundingBox": {
"vertices": [
{
"x": 2057,
"y": 881
},
{
"x": 2225,
"y": 887
},
{
"x": 2216,
"y": 1134
},
{
"x": 2048,
"y": 1128
}
]
},
"symbols": [
{
"property": {
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"boundingBox": {
"vertices": [
{
"x": 2057,
"y": 881
},
{
"x": 2225,
"y": 887
},
{
"x": 2216,
"y": 1134
},
{
"x": 2048,
"y": 1128
}
]
},
"text": "?"
}
]
}
]
},
{
"boundingBox": {
"vertices": [
{
"x": 1208,
"y": 1227
},
{
"x": 2139,
"y": 1261
},
{
"x": 2128,
"y": 1575
},
{
"x": 1196,
"y": 1541
}
]
},
"words": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 1
}
]
},
"boundingBox": {
"vertices": [
{
"x": 1208,
"y": 1230
},
{
"x": 1895,
"y": 1253
},
{
"x": 1891,
"y": 1374
},
{
"x": 1204,
"y": 1351
}
]
},
"symbols": [
{
"boundingBox": {
"vertices": [
{
"x": 1208,
"y": 1231
},
{
"x": 1313,
"y": 1234
},
{
"x": 1309,
"y": 1354
},
{
"x": 1204,
"y": 1351
}
]
},
"text": "P"
},
{
"boundingBox": {
"vertices": [
{
"x": 1321,
"y": 1234
},
{
"x": 1422,
"y": 1237
},
{
"x": 1418,
"y": 1357
},
{
"x": 1317,
"y": 1354
}
]
},
"text": "L"
},
{
"boundingBox": {
"vertices": [
{
"x": 1427,
"y": 1238
},
{
"x": 1536,
"y": 1242
},
{
"x": 1532,
"y": 1362
},
{
"x": 1423,
"y": 1358
}
]
},
"text": "E"
},
{
"boundingBox": {
"vertices": [
{
"x": 1540,
"y": 1242
},
{
"x": 1672,
"y": 1246
},
{
"x": 1668,
"y": 1366
},
{
"x": 1536,
"y": 1362
}
]
},
"text": "A"
},
{
"boundingBox": {
"vertices": [
{
"x": 1666,
"y": 1246
},
{
"x": 1778,
"y": 1250
},
{
"x": 1774,
"y": 1370
},
{
"x": 1662,
"y": 1366
}
]
},
"text": "S"
},
{
"property": {
"detectedBreak": {
"type": "EOL_SURE_SPACE"
}
},
"boundingBox": {
"vertices": [
{
"x": 1790,
"y": 1250
},
{
"x": 1895,
"y": 1253
},
{
"x": 1891,
"y": 1373
},
{
"x": 1786,
"y": 1370
}
]
},
"text": "E"
}
]
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 1
}
]
},
"boundingBox": {
"vertices": [
{
"x": 1217,
"y": 1414
},
{
"x": 1718,
"y": 1434
},
{
"x": 1713,
"y": 1558
},
{
"x": 1212,
"y": 1538
}
]
},
"symbols": [
{
"boundingBox": {
"vertices": [
{
"x": 1217,
"y": 1415
},
{
"x": 1328,
"y": 1420
},
{
"x": 1323,
"y": 1542
},
{
"x": 1212,
"y": 1538
}
]
},
"text": "T"
},
{
"boundingBox": {
"vertices": [
{
"x": 1331,
"y": 1419
},
{
"x": 1454,
"y": 1424
},
{
"x": 1449,
"y": 1547
},
{
"x": 1326,
"y": 1542
}
]
},
"text": "U"
},
{
"boundingBox": {
"vertices": [
{
"x": 1469,
"y": 1425
},
{
"x": 1590,
"y": 1430
},
{
"x": 1585,
"y": 1553
},
{
"x": 1464,
"y": 1548
}
]
},
"text": "R"
},
{
"property": {
"detectedBreak": {
"type": "SPACE"
}
},
"boundingBox": {
"vertices": [
{
"x": 1594,
"y": 1430
},
{
"x": 1718,
"y": 1435
},
{
"x": 1713,
"y": 1558
},
{
"x": 1589,
"y": 1553
}
]
},
"text": "N"
}
]
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 1
}
]
},
"boundingBox": {
"vertices": [
{
"x": 1787,
"y": 1437
},
{
"x": 2133,
"y": 1451
},
{
"x": 2128,
"y": 1575
},
{
"x": 1782,
"y": 1561
}
]
},
"symbols": [
{
"boundingBox": {
"vertices": [
{
"x": 1787,
"y": 1438
},
{
"x": 1914,
"y": 1443
},
{
"x": 1909,
"y": 1566
},
{
"x": 1782,
"y": 1561
}
]
},
"text": "O"
},
{
"boundingBox": {
"vertices": [
{
"x": 1923,
"y": 1443
},
{
"x": 2020,
"y": 1447
},
{
"x": 2015,
"y": 1570
},
{
"x": 1918,
"y": 1566
}
]
},
"text": "F"
},
{
"property": {
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"boundingBox": {
"vertices": [
{
"x": 2031,
"y": 1448
},
{
"x": 2133,
"y": 1452
},
{
"x": 2128,
"y": 1575
},
{
"x": 2026,
"y": 1571
}
]
},
"text": "F"
}
]
}
]
},
{
"boundingBox": {
"vertices": [
{
"x": 1211,
"y": 1609
},
{
"x": 1741,
"y": 1626
},
{
"x": 1737,
"y": 1747
},
{
"x": 1207,
"y": 1731
}
]
},
"words": [
{
"boundingBox": {
"vertices": [
{
"x": 1211,
"y": 1609
},
{
"x": 1741,
"y": 1626
},
{
"x": 1737,
"y": 1747
},
{
"x": 1207,
"y": 1731
}
]
},
"symbols": [
{
"boundingBox": {
"vertices": [
{
"x": 1211,
"y": 1610
},
{
"x": 1344,
"y": 1614
},
{
"x": 1340,
"y": 1735
},
{
"x": 1207,
"y": 1731
}
]
},
"text": "Y"
},
{
"boundingBox": {
"vertices": [
{
"x": 1335,
"y": 1613
},
{
"x": 1463,
"y": 1617
},
{
"x": 1459,
"y": 1738
},
{
"x": 1331,
"y": 1734
}
]
},
"text": "O"
},
{
"boundingBox": {
"vertices": [
{
"x": 1473,
"y": 1618
},
{
"x": 1592,
"y": 1622
},
{
"x": 1588,
"y": 1743
},
{
"x": 1469,
"y": 1739
}
]
},
"text": "U"
},
{
"property": {
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"boundingBox": {
"vertices": [
{
"x": 1617,
"y": 1622
},
{
"x": 1741,
"y": 1626
},
{
"x": 1737,
"y": 1747
},
{
"x": 1613,
"y": 1743
}
]
},
"text": "R"
}
]
}
]
},
{
"boundingBox": {
"vertices": [
{
"x": 1213,
"y": 1805
},
{
"x": 1922,
"y": 1819
},
{
"x": 1919,
"y": 1949
},
{
"x": 1210,
"y": 1935
}
]
},
"words": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 1
}
]
},
"boundingBox": {
"vertices": [
{
"x": 1213,
"y": 1805
},
{
"x": 1922,
"y": 1819
},
{
"x": 1919,
"y": 1949
},
{
"x": 1210,
"y": 1935
}
]
},
"symbols": [
{
"boundingBox": {
"vertices": [
{
"x": 1213,
"y": 1806
},
{
"x": 1318,
"y": 1808
},
{
"x": 1315,
"y": 1937
},
{
"x": 1210,
"y": 1935
}
]
},
"text": "E"
},
{
"boundingBox": {
"vertices": [
{
"x": 1332,
"y": 1808
},
{
"x": 1454,
"y": 1810
},
{
"x": 1451,
"y": 1939
},
{
"x": 1329,
"y": 1937
}
]
},
"text": "N"
},
{
"boundingBox": {
"vertices": [
{
"x": 1475,
"y": 1811
},
{
"x": 1596,
"y": 1813
},
{
"x": 1593,
"y": 1942
},
{
"x": 1472,
"y": 1940
}
]
},
"text": "G"
},
{
"boundingBox": {
"vertices": [
{
"x": 1608,
"y": 1813
},
{
"x": 1659,
"y": 1814
},
{
"x": 1656,
"y": 1943
},
{
"x": 1605,
"y": 1942
}
]
},
"text": "I"
},
{
"boundingBox": {
"vertices": [
{
"x": 1676,
"y": 1815
},
{
"x": 1794,
"y": 1817
},
{
"x": 1791,
"y": 1946
},
{
"x": 1673,
"y": 1944
}
]
},
"text": "N"
},
{
"property": {
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"boundingBox": {
"vertices": [
{
"x": 1818,
"y": 1818
},
{
"x": 1922,
"y": 1820
},
{
"x": 1919,
"y": 1949
},
{
"x": 1815,
"y": 1947
}
]
},
"text": "E"
}
]
}
]
}
],
"blockType": "TEXT"
}
]
}
],
"text": "WAITING?\nPLEASE\nTURN OFF\nYOUR\nENGINE"
}
}
-
In case of failure:
- the response mime-type is application/json, error type is indicated by the response status code and details are in the json body
Snippets
curl --location \
--request POST 'https://manta-api.coxwave.app/api/google-cloud/vision/image-annotation' \
--header 'X-MANTA-HUB-API-KEY: <YOUR-API-KEY-HERE>'\
--form 'image=@image.png' \
--form 'feature="TEXT_DETECTION"'
const axios = require('axios');
const FormData = require('form-data');
const fs = require('fs');
const data = new FormData();
data.append('image', fs.createReadStream('image.png'));
data.append('feature', 'TEXT_DETECTION');
const config = {
method: 'post',
url: 'https://manta-api.coxwave.app/api/google-cloud/vision/image-annotation',
headers: {
'X-MANTA-HUB-API-KEY': '<YOUR-API-KEY-HERE>',
...data.getHeaders()
},
data : data
};
axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});
import requests
url = "https://manta-api.coxwave.app/api/google-cloud/vision/image-annotation"
payload={'feature': 'TEXT_DETECTION'}
files=[ ('image',('image.png',open('image.png','rb'),'image/png')) ]
headers = {
'X-MANTA-HUB-API-KEY': '<YOUR-API-KEY-HERE>',
}
response = requests.request("POST", url, headers=headers, data=payload, files=files)
print(response.text)