я тут для тренировки мозга не так давно пытался написать распознаватель текста. звучит смешно? конечно смешно
, но всеже... естественно у меня ничего не вышло (насчет универсальности), но в частных случаях он отлично распознает его. есть конечно куча косяков, например с можеством различных знаков, которые хз как отличить друг от друга. да, есть много недороботок. вобщем, руки у меня естественно опустились не завершив начатое.
а набор полезных подпрограмм остался, и он мне часто помогает в таких вот случаях как с авито.
поднастроил основную часть скрипта, добавил масштабирования в браузере, минут двадцать обучения, и номера без проблем и ошибок распознаются кликером без сторонней помощи. с авито немного своеобразное обучение выходит, так как там дефисы заступают за цифры, цифры заступают на соседние цифры, и даже изменяя контраст картинки, правильно разделить символы не выходит. но в этоге, всё работет на 100%. если еще часок пообучать (изредка всеже вылазят корявые цифры), то можно запускать на часы и сутки так сказать. но я пока не в курсе в чем смысл этих номеров
видео с кликером в работе на авито: https://www.youtube.com/watch?v=-sNwv0x3vp4
сам код:
#name "text_reader"
////////////////////////////////////////////////////////////////////////////////
/////////разбивает текст на строки//////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
SUB(lines,$x1_text,$y1_text,$x2_text,$y2_text)
UNDEFINE ($x1_line_arr)
UNDEFINE ($y1_line_arr)
UNDEFINE ($x2_line_arr)
UNDEFINE ($y2_line_arr)
IF_PIXEL_IN($x1_text,$y1_text, $x1_text,$y2_text, $char_color)
print("текста не влазит в границы распознавания слева")
halt
END_IF
IF_PIXEL_IN($x2_text,$y1_text, $x2_text,$y2_text, $char_color)
print("текста не влазит в границы распознавания справа")
halt
END_IF
IF(PXLCOUNT($x1_text,$y1_text,$x2_text,$y1_text,$back_color)<$x2_text-$x1_text+1)
print("первоя строка обрезана,неучитывается")
while(PXLCOUNT($x1_text,$y1_text,$x2_text,$y1_text,$back_color)<$x2_text-$x1_text+1)
inc($y1_text)
END_CYC
END_IF
while($y1_text<$y2_text)
while((PXLCOUNT($x1_text,$y1_text,$x2_text,$y1_text,$back_color)=$x2_text-$x1_text+1)&($y1_text<$y2_text))
inc($y1_text)
END_CYC
IF($y1_text<$y2_text)
arrpush($x1_line_arr, $x1_text)
arrpush($y1_line_arr, $y1_text)
END_IF
$h_str=0
while($h_str<8)
while((PXLCOUNT($x1_text,$y1_text,$x2_text,$y1_text,$back_color)<$x2_text-$x1_text+1)&($y1_text<$y2_text))
inc($y1_text)
inc($h_str)
END_CYC
IF($h_str<8)
inc($y1_text)
inc($h_str)
END_IF
END_CYC
IF(PXLCOUNT($x1_text,$y1_text,$x2_text,$y1_text,$back_color)=$x2_text-$x1_text+1)
arrpush($x2_line_arr, $x2_text)
arrpush($y2_line_arr, $y1_text)
else
print("последняя строка обрезана,неучитывается")
$del_arr = arrpop($x1_line_arr)
$del_arr = arrpop($y1_line_arr)
END_IF
END_CYC
print("количество строк - ",arrsize($x1_line_arr))
// FOR($a=0,$a<arrsize($x1_line_arr))
// print($x1_line_arr[$a]," ",$y1_line_arr[$a]," ",$x2_line_arr[$a]," ",$y2_line_arr[$a])
// SCREENSHOTEX($x1_line_arr[$a],$y1_line_arr[$a],$x2_line_arr[$a],$y2_line_arr[$a])
// END_CYC
IF(arrsize($x1_line_arr)=0)
print("текста не найденно")
halt
END_IF
END_SUB
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/////////координаты и размер каждого символа в строке///////////////////////////
////////////////////////////////////////////////////////////////////////////////
SUB(pos_size,$x1_line,$y1_line,$x2_line,$y2_line)
UNDEFINE ($x1_char_arr)
UNDEFINE ($y1_char_arr)
UNDEFINE ($x2_char_arr)
UNDEFINE ($y2_char_arr)
FOR($x1_line_temp=$x1_line,$x1_line_temp<$x2_line+1)
FOR($y1_line_temp=$y1_line,$y1_line_temp<$y2_line+1)
IF(pxl($x1_line_temp,$y1_line_temp)=$char_color)
arrpush($x1_char_arr,$x1_line_temp)
while(PXLCOUNT($x1_line_temp,$y1_line,$x1_line_temp,$y2_line,$back_color)<$y2_line-$y1_line+1)
inc($x1_line_temp)
END_CYC
arrpush($x2_char_arr,$x1_line_temp-1)
END_IF
END_CYC
END_CYC
FOR($i_char=0,$i_char<arrsize($x1_char_arr))
SCANPXL ($pix_temp,$x1_char_arr[$i_char],$y1_line,$x2_char_arr[$i_char],$y2_line,$char_color)
arrpush($y1_char_arr,$pix_temp[1])
arrpush($y2_char_arr,$pix_temp[arrsize($pix_temp)-1])
UNDEFINE ($pix_temp)
END_CYC
print("количество символов в строке - ",arrsize($x1_char_arr))
// FOR($a=0,$a<arrsize($x1_char_arr))
// SCREENSHOTEX($x1_char_arr[$a],$y1_char_arr[$a],$x2_char_arr[$a],$y2_char_arr[$a])
// END_CYC
END_SUB
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////очертание символов//////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
SUB(shape,$x1_char,$y1_char,$x2_char,$y2_char)
UNDEFINE($left_1)
UNDEFINE($left_2)
UNDEFINE($left_3)
UNDEFINE($left_4)
UNDEFINE($left_5)
UNDEFINE($right_1)
UNDEFINE($right_2)
UNDEFINE($right_3)
UNDEFINE($right_4)
UNDEFINE($right_5)
UNDEFINE($up_1)
UNDEFINE($up_2)
UNDEFINE($up_3)
UNDEFINE($up_4)
UNDEFINE($up_5)
UNDEFINE($down_1)
UNDEFINE($down_2)
UNDEFINE($down_3)
UNDEFINE($down_4)
UNDEFINE($down_5)
$accur = 3
//лево
FOR($temp=$x1_char,(pxl($temp,$y2_char)!$char_color)&($temp<$x2_char))
inc($left_1)
END_CYC
$left_1 = ROUND($left_1/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x1_char,(pxl($temp,$y2_char-ROUND(($y2_char-$y1_char)/4,0))!$char_color)&($temp<$x2_char))
inc($left_2)
END_CYC
$left_2 = ROUND($left_2/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x1_char,(pxl($temp,ROUND(($y2_char-$y1_char)/2,0))!$char_color)&($temp<$x2_char))
inc($left_3)
END_CYC
$left_3 = ROUND($left_3/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x1_char,(pxl($temp,$y1_char+ROUND(($y2_char-$y1_char)/4,0))!$char_color)&($temp<$x2_char))
inc($left_4)
END_CYC
$left_4 = ROUND($left_4/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x1_char,(pxl($temp,$y1_char)!$char_color)&($temp<$x2_char))
inc($left_5)
END_CYC
$left_5 = ROUND($left_5/($x2_char-$x1_char+1)/$accur,-1)*10
//право
FOR($temp=$x2_char,(pxl($temp,$y2_char)!$char_color)&($temp>$x1_char),-1)
inc($right_1)
END_CYC
$right_1 = ROUND($right_1/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x2_char,(pxl($temp,$y2_char-ROUND(($y2_char-$y1_char)/4,0))!$char_color)&($temp>$x1_char),-1)
inc($right_2)
END_CYC
$right_2 = ROUND($right_2/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x2_char,(pxl($temp,ROUND(($y2_char-$y1_char)/2,0))!$char_color)&($temp>$x1_char),-1)
inc($right_3)
END_CYC
$right_3 = ROUND($right_3/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x2_char,(pxl($temp,$y1_char+ROUND(($y2_char-$y1_char)/4,0))!$char_color)&($temp>$x1_char),-1)
inc($right_4)
END_CYC
$right_4 = ROUND($right_4/($x2_char-$x1_char+1)/$accur,-1)*10
FOR($temp=$x2_char,(pxl($temp,$y1_char)!$char_color)&($temp>$x1_char),-1)
inc($right_5)
END_CYC
$right_5 = ROUND($right_5/($x2_char-$x1_char+1)/$accur,-1)*10
//верх
FOR($temp=$y1_char,(pxl($x1_char,$temp)!$char_color)&($temp<$y2_char))
inc($up_1)
END_CYC
$up_1 = ROUND($up_1/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y1_char,(pxl($x1_char+ROUND(($x2_char-$x1_char)/4,0),$temp)!$char_color)&($temp<$y2_char))
inc($up_2)
END_CYC
$up_2 = ROUND($up_2/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y1_char,(pxl(ROUND(($x2_char-$x1_char)/2,0),$temp)!$char_color)&($temp<$y2_char))
inc($up_3)
END_CYC
$up_3 = ROUND($up_3/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y1_char,(pxl($x2_char-ROUND(($x2_char-$x1_char)/4,0),$temp)!$char_color)&($temp<$y2_char))
inc($up_4)
END_CYC
$up_4 = ROUND($up_4/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y1_char,(pxl($x2_char,$temp)!$char_color)&($temp<$y2_char))
inc($up_5)
END_CYC
$up_5 = ROUND($up_5/($y2_char-$y1_char+1)/$accur,-1)*10
//низ
FOR($temp=$y2_char,(pxl($x1_char,$temp)!$char_color)&($temp>$y1_char),-1)
inc($down_1)
END_CYC
$down_1 = ROUND($down_1/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y2_char,(pxl($x1_char+ROUND(($x2_char-$x1_char)/4,0),$temp)!$char_color)&($temp>$y1_char),-1)
inc($down_2)
END_CYC
$down_2 = ROUND($down_2/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y2_char,(pxl(ROUND(($x2_char-$x1_char)/2,0),$temp)!$char_color)&($temp>$y1_char),-1)
inc($down_3)
END_CYC
$down_3 = ROUND($down_3/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y2_char,(pxl($x2_char-ROUND(($x2_char-$x1_char)/4,0),$temp)!$char_color)&($temp>$y1_char),-1)
inc($down_4)
END_CYC
$down_4 = ROUND($down_4/($y2_char-$y1_char+1)/$accur,-1)*10
FOR($temp=$y2_char,(pxl($x2_char,$temp)!$char_color)&($temp>$y1_char),-1)
inc($down_5)
END_CYC
$down_5 = ROUND($down_5/($y2_char-$y1_char+1)/$accur,-1)*10
// print($left_1," ",$left_2," ",$left_3," ",$left_4," ",$left_5)
// print($right_1," ",$right_2," ",$right_3," ",$right_4," ",$right_5)
// print($up_1," ",$up_2," ",$up_3," ",$up_4," ",$up_5)
// print($down_1," ",$down_2," ",$down_3," ",$down_4," ",$down_5)
$shape_str=STRCONCAT($left_1,$left_2,$left_3,$left_4,$left_5,$right_1,$right_2,$right_3,$right_4,$right_5,$up_1,$up_2,$up_3,$up_4,$up_5,$down_1,$down_2,$down_3,$down_4,$down_5,"~")
// print($shape_str)
END_SUB
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////контраст////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
SUB(kontrast,$x1_contr,$y1_contr,$x2_contr,$y2_contr,$mid,$k)
FOR($y_temp=$y1_contr,$y_temp<$y2_contr+1)
FOR($x_temp=$x1_contr,$x_temp<$x2_contr+1)
$col1= pxl($x_temp,$y_temp)
IF(((colorr($col1)+colorg($col1)+colorb($col1))/3) > $mid)
$outr=colorr($col1)+$k
$outg=colorg($col1)+$k
$outb=colorb($col1)+$k
IF($outr < $mid)
$outr=$mid
END_IF
IF($outg < $mid)
$outg=$mid
END_IF
IF($outb < $mid)
$outb=$mid
END_IF
else
$outr=colorr($col1)-$k
$outg=colorg($col1)-$k
$outb=colorb($col1)-$k
IF($outr > $mid)
$outr=$mid
END_IF
IF($outg > $mid)
$outg=$mid
END_IF
IF($outb > $mid)
$outb=$mid
END_IF
END_IF
IF($outr < 0)
$outr=0
END_IF
IF($outg < 0)
$outg=0
END_IF
IF($outb < 0)
$outb=0
END_IF
IF($outr > 255)
$outr=255
END_IF
IF($outg > 255)
$outg=255
END_IF
IF($outb > 255)
$outb=255
END_IF
PXLREPLACE($x_temp,$y_temp,$x_temp,$y_temp,$col1,COLORGEN($outr,$outg,$outb))
END_CYC
END_CYC
SCREENSHOTEX($x1_contr,$y1_contr,$x2_contr,$y2_contr)
END_SUB
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////распознавание и обучение////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
IF(TFCOUNT("data.txt") = 0)
TFWRITE ("data.txt","а:")
END_IF
$all_str = ""
SUB(reader,$shape)
FOR($n_str=1,$n_str<TFCOUNT("data.txt")+1)
IF(STRPOS (TFREAD ("data.txt",$n_str), $shape) > 0)
// print(STRCUT(TFREAD ("data.txt",$n_str), 1, STRPOS(TFREAD("data.txt",$n_str),":")-1))
$all_str = STRCONCAT ($all_str,STRCUT(TFREAD ("data.txt",$n_str), 1, STRPOS(TFREAD("data.txt",$n_str),":")-1))
$n_str=TFCOUNT("data.txt")+1
$check=0
else
$check=1
END_IF
END_CYC
IF($check=1)
move(int(($x2_char_arr[$s]-$x1_char_arr[$s])/2)+$x1_char_arr[$s],$y2_char_arr[$s])
$char_write = INPUTBOX(STRCONCAT("введи цифру под номером ",$s+1), "", 120)
IF($char_write = "")
print("cancel")
halt
END_IF
FOR($n_str=1,$n_str<TFCOUNT("data.txt")+1)
IF(STRPOS (TFREAD ("data.txt",$n_str), STRCONCAT($char_write,":")) > 0)
TFWRITE ("data.txt",STRCONCAT(TFREAD("data.txt",$n_str),$shape))
TFDELETE ("data.txt",$n_str)
$n_str=TFCOUNT("data.txt")+1
$check=0
else
$check=1
END_IF
END_CYC
IF($check=1)
TFWRITE ("data.txt",STRCONCAT($char_write,":",$shape))
END_IF
END_IF
END_SUB
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
///////основной код/////////////////////////////////////////////////////////////
$char_color = 0
$back_color = 16777215
GETSCREEN
IF_PIXEL_IN(1346,828,2266,1421, 13405953)
$x1c = $_return1 + 10
$y1c = $_return2 - 10
$x2c = $_return1 + 170
$y2c = $_return2 + 18
END_IF
COLORMODE(8)
//kontrast($x1c,$y1c,$x2c,$y2c,160,255)
lines($x1c,$y1c,$x2c,$y2c)
FOR($li=0,$li<arrsize($x1_line_arr))
pos_size($x1_line_arr[$li],$y1_line_arr[$li],$x2_line_arr[$li],$y2_line_arr[$li])
FOR($s=0,$s<arrsize($x1_char_arr))
shape($x1_char_arr[$s],$y1_char_arr[$s],$x2_char_arr[$s],$y2_char_arr[$s])
reader($shape_str)
END_CYC
END_CYC
print(STRFILTER ($all_str, "-", 0))
halt
то, что у меня записалось в файл "data.txt" за 20 минут обучания:
-:11000110000000020002~00000000000000000000~
9-:02001200121002220011~
2-:02020100120032200311~02020100120002200011~
0-:00001200121002210011~
9-4:01000000001001220011~
6-4:00000000001001210011~
4-4:10011000002002210011~
4:30022000002200211001~20012100012200211001~
4-:20011100112202211011~
5-:02000200211002200011~
-4:30023000002201211011~30023000002202211011~
2-4:01010000000001200011~
5-4:01000000001001203011~
64:10001000001002210011~10001000001001210011~
54:01000000001002200011~01000000001001200011~
34:01011000001002210011~01011000001002200011~
74:11010000000002231011~
94:01000000000002200011~01001000001001220011~
044:00000000001002210011~30023000002200211011~
24:02010000000001200011~
9:13001100011000020001~13001100011000120002~13001100011000120001~13001100010000000001~03001100010000100002~03001100010000100001~13001100011000220001~13001100011010120001~
8:10001100010000000000~10001100010000200001~10001100012000010000~10001100012000210001~10001100010000000001~10001100012010210001~
8-:10001200122002210011~11000000000000020000~
1:32023000001000023000~11011100010000030000~22023000001000023000~22022000000000033000~22022000000000022000~22022000001000023000~22022000011100022000~
2:03031030010000000000~03031030010000100002~03031030011000020000~03031030011000000000~
04:00001000001001210011~20012000002200211001~
3:13031100011000000001~13031100010000000000~13031100010000100001~13031100010000100002~13031100010000200001~13031100011000010001~13031100011000100001~13031100011300010000~
3-:02020200120002200011~00000110000000000002~
6:10001100311000012000~10001100312000211001~10001000301000010000~10001100311000211001~10001000301000011000~10001000302000011000~10001100311000210001~10001000302000021000~10001000301000021000~10001100301000011000~10001100312010211001~10001100311010211001~10001100311000011000~
0:10001100011000110001~10001100011000120001~10001100012000120001~10001100011000010000~10001100010000100001~10001100011000210001~
7:11030220000000030023~11020210100000033023~11020110000000033023~
5:13001100101000000001~13000100201000200031~13000100201000000001~13001100101000010001~13000100101000000031~
6-:10001200321002210011~00001200321002210011~
с этим файлом, естественно, будет работать только при определенно масштабировании в браузере. какое оно у меня в браузеое, в новой версии оперы я так и не нашел.
мощно) пригодится
можно сделать комбайн, скрин в нужной категории всех картинок с номером, потом уже сбор в текстовый