Lines Matching refs:in_depth

30 // large 'in_depth' and 'out_depth' product. See cost models below for details).
49 int out_tile_rows, int out_tile_cols, int in_depth,
54 input_tile_spatial_size * input_tile_spatial_size * in_depth;
57 const int64 product_cost = input_tile_spatial_size * in_depth * out_depth;
74 static int64 GetDirectConvCost(int filter_rows, int filter_cols, int in_depth,
76 return filter_rows * filter_cols * in_depth * out_depth * out_rows * out_cols;
98 int filter_cols, int in_depth, int out_depth,
117 t.output_shape().cols, in_depth, out_depth, out_rows, out_cols);
119 filter_rows, filter_cols, in_depth, out_depth, out_rows, out_cols);
132 // Copies data from 'filter_in' to 'filter_buf' along 'in_depth' dimension.
135 // [filter_rows, filter_cols, in_depth, out_depth]
138 // [base_filter_rows, base_filter_cols, in_depth]
146 const int64 vectorized_size = args.in_depth / kPacketSize;
147 const int64 scalar_size = args.in_depth % kPacketSize;
174 // in_depth]
177 // [tile_rows, tile_cols, out_depth, shard_rows, shard_cols, in_depth]
183 // [tile_spatial_size, num_filters, shard_rows, shard_cols, in_depth]
205 const int64 in_depth = args.in_depth;
222 const int64 scalar_size = in_depth % kPacketSize;
223 const int64 vectorized_size = in_depth / kPacketSize;
225 const int64 shard_stride = args.in_depth;
241 // Copy vectorized portion of 'in_depth'.
247 // Transform scalar portion of 'in_depth'.
267 // [filter_rows, filter_cols, in_depth, out_depth]
270 // [tile_rows, tile_cols, out_depth, shard_rows, shard_cols, in_depth]
274 // in_depth]
280 // [tile_spatial_size, num_filters, shard_rows, shard_cols, in_depth]
305 const int64 shard_stride = args.in_depth;
309 num_filters * shard_rows * shard_cols * args.in_depth;
314 shard_rows * shard_cols * args.in_depth;
342 (args.in_depth * (f_r * args.filter_cols + f_c)) +
370 // [filter_rows, filter_cols, in_depth, out_depth]
373 // [tile_rows, tile_cols, out_depth, shard_rows, shard_cols, in_depth]
381 const int64 in_depth = args.in_depth;
406 base_filter_spatial_size * in_depth * filter_shards_total;
409 base_filter_spatial_size * filter_shards_total * in_depth;
412 tile_spatial_size * filter_shards_total * in_depth;
436 &num_filters_transform, &in_depth, &out_depth,
441 // [base_filter_rows, base_filter_cols, num_filters_transform, in_depth]
449 filter_shards_col, in_depth}),
454 // [tile_rows, tile_cols, out_depth, shard_rows, shard_cols, in_depth]
461 filter_shards_row, filter_shards_col, in_depth}),
483 const int64 shard_cost = args.filter_rows * args.filter_cols * in_depth *
494 // [out_depth, shard_rows, shard_cols, in_depth].
530 const int64 in_depth = args.in_depth;
535 &tile_spatial_size, &in_depth, &out_depth, &filter_shards_row,
537 const int64 filter_coord_stride = num_filters * in_depth;
539 // Allocate filter buffer [out_depth, shard_rows, shard_cols, in_depth].
543 filter_shards_col, in_depth}),
548 num_filters, in_depth,
555 num_filters * in_depth, shard);
563 // [out_depth, shard_rows, shard_cols, in_depth].
566 // [num_tiles, in_depth]
622 // [in_rows, in_cols, in_depth]
625 // [tile_rows, tile_cols, num_tiles, in_depth]
638 const int64 coord_stride = num_tiles * args.in_depth;
640 // Calculate vectorized and scalar (residual) lengths for 'in_depth'.
642 (args.in_depth / kPacketSize) * kPacketSize;
643 const int64 input_scalar_size = args.in_depth % kPacketSize;
653 auto* in = input + (in_r * args.in_cols + in_c) * args.in_depth;
675 // [in_rows, in_cols, in_depth]
677 // [tile_rows, tile_cols, num_tiles, in_depth]
681 // [tile_rows, tile_cols, num_tiles, in_depth]
702 const int64 coord_stride = num_tiles * args.in_depth;
703 const int64 num_tiles_stride = args.in_depth;
889 const int64 in_depth = args.in_depth;
893 const int64 tile_coord_stride = num_tiles * in_depth;
898 GemmState<T> gemm(num_filters, num_tiles, in_depth, gemm_out_buf_size,
922 // in_depth * out_depth).
942 const int64 in_depth = args.in_depth;
969 filter_shards_row, filter_shards_col, in_depth}),
1004 auto shard = [&ctx, &args, &transform, &packed_filters, &in_depth,
1030 const int64 filter_depth_size = in_depth * out_depth * filter_shard_size;
1040 tile_spatial_size * std::max(in_depth, out_depth * filter_shard_size);
1042 std::max(tile_spatial_size * in_depth,
1044 const int64 packed_tile_per_tile_size = in_depth;
1059 const int64 buffer1_tile_size = tile_spatial_size * num_tiles * in_depth;
1074 tile_spatial_size * num_tiles * in_depth;
1086 // packed tile buffer: [num_tiles, in_depth].
1089 TensorShape({num_tiles, in_depth}),
1113 const int64 input_image_size = args.in_rows * args.in_cols * in_depth;
1147 tile_spatial_size * args.in_depth;